1 //===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This tablegen backend is responsible for emitting arm_neon.h, which includes
11 // a declaration and definition of each function specified by the ARM NEON
12 // compiler interface. See ARM document DUI0348B.
13 //
14 // Each NEON instruction is implemented in terms of 1 or more functions which
15 // are suffixed with the element type of the input vectors. Functions may be
16 // implemented in terms of generic vector operations such as +, *, -, etc. or
17 // by calling a __builtin_-prefixed function which will be handled by clang's
18 // CodeGen library.
19 //
20 // Additional validation code can be generated by this file when runHeader() is
21 // called, rather than the normal run() entry point. A complete set of tests
22 // for Neon intrinsics can be generated by calling the runTests() entry point.
23 //
24 //===----------------------------------------------------------------------===//
25
26 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/ADT/SmallString.h"
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/StringExtras.h"
30 #include "llvm/ADT/StringMap.h"
31 #include "llvm/Support/ErrorHandling.h"
32 #include "llvm/TableGen/Error.h"
33 #include "llvm/TableGen/Record.h"
34 #include "llvm/TableGen/TableGenBackend.h"
35 #include <string>
36 using namespace llvm;
37
38 enum OpKind {
39 OpNone,
40 OpUnavailable,
41 OpAdd,
42 OpAddl,
43 OpAddlHi,
44 OpAddw,
45 OpAddwHi,
46 OpSub,
47 OpSubl,
48 OpSublHi,
49 OpSubw,
50 OpSubwHi,
51 OpMul,
52 OpMla,
53 OpMlal,
54 OpMullHi,
55 OpMullHiN,
56 OpMlalHi,
57 OpMlalHiN,
58 OpMls,
59 OpMlsl,
60 OpMlslHi,
61 OpMlslHiN,
62 OpMulN,
63 OpMlaN,
64 OpMlsN,
65 OpFMlaN,
66 OpFMlsN,
67 OpMlalN,
68 OpMlslN,
69 OpMulLane,
70 OpMulXLane,
71 OpMullLane,
72 OpMullHiLane,
73 OpMlaLane,
74 OpMlsLane,
75 OpMlalLane,
76 OpMlalHiLane,
77 OpMlslLane,
78 OpMlslHiLane,
79 OpQDMullLane,
80 OpQDMullHiLane,
81 OpQDMlalLane,
82 OpQDMlalHiLane,
83 OpQDMlslLane,
84 OpQDMlslHiLane,
85 OpQDMulhLane,
86 OpQRDMulhLane,
87 OpFMSLane,
88 OpFMSLaneQ,
89 OpTrn1,
90 OpZip1,
91 OpUzp1,
92 OpTrn2,
93 OpZip2,
94 OpUzp2,
95 OpEq,
96 OpGe,
97 OpLe,
98 OpGt,
99 OpLt,
100 OpNeg,
101 OpNot,
102 OpAnd,
103 OpOr,
104 OpXor,
105 OpAndNot,
106 OpOrNot,
107 OpCast,
108 OpConcat,
109 OpDup,
110 OpDupLane,
111 OpHi,
112 OpLo,
113 OpSelect,
114 OpRev16,
115 OpRev32,
116 OpRev64,
117 OpXtnHi,
118 OpSqxtunHi,
119 OpQxtnHi,
120 OpFcvtnHi,
121 OpFcvtlHi,
122 OpFcvtxnHi,
123 OpReinterpret,
124 OpAddhnHi,
125 OpRAddhnHi,
126 OpSubhnHi,
127 OpRSubhnHi,
128 OpAbdl,
129 OpAbdlHi,
130 OpAba,
131 OpAbal,
132 OpAbalHi,
133 OpQDMullHi,
134 OpQDMullHiN,
135 OpQDMlalHi,
136 OpQDMlalHiN,
137 OpQDMlslHi,
138 OpQDMlslHiN,
139 OpDiv,
140 OpLongHi,
141 OpNarrowHi,
142 OpMovlHi,
143 OpCopyLane,
144 OpCopyQLane,
145 OpCopyLaneQ,
146 OpScalarMulLane,
147 OpScalarMulLaneQ,
148 OpScalarMulXLane,
149 OpScalarMulXLaneQ,
150 OpScalarVMulXLane,
151 OpScalarVMulXLaneQ,
152 OpScalarQDMullLane,
153 OpScalarQDMullLaneQ,
154 OpScalarQDMulHiLane,
155 OpScalarQDMulHiLaneQ,
156 OpScalarQRDMulHiLane,
157 OpScalarQRDMulHiLaneQ,
158 OpScalarGetLane,
159 OpScalarSetLane
160 };
161
162 enum ClassKind {
163 ClassNone,
164 ClassI, // generic integer instruction, e.g., "i8" suffix
165 ClassS, // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
166 ClassW, // width-specific instruction, e.g., "8" suffix
167 ClassB, // bitcast arguments with enum argument to specify type
168 ClassL, // Logical instructions which are op instructions
169 // but we need to not emit any suffix for in our
170 // tests.
171 ClassNoTest // Instructions which we do not test since they are
172 // not TRUE instructions.
173 };
174
175 /// NeonTypeFlags - Flags to identify the types for overloaded Neon
176 /// builtins. These must be kept in sync with the flags in
177 /// include/clang/Basic/TargetBuiltins.h.
178 namespace {
179 class NeonTypeFlags {
180 enum {
181 EltTypeMask = 0xf,
182 UnsignedFlag = 0x10,
183 QuadFlag = 0x20
184 };
185 uint32_t Flags;
186
187 public:
188 enum EltType {
189 Int8,
190 Int16,
191 Int32,
192 Int64,
193 Poly8,
194 Poly16,
195 Poly64,
196 Float16,
197 Float32,
198 Float64
199 };
200
NeonTypeFlags(unsigned F)201 NeonTypeFlags(unsigned F) : Flags(F) {}
NeonTypeFlags(EltType ET,bool IsUnsigned,bool IsQuad)202 NeonTypeFlags(EltType ET, bool IsUnsigned, bool IsQuad) : Flags(ET) {
203 if (IsUnsigned)
204 Flags |= UnsignedFlag;
205 if (IsQuad)
206 Flags |= QuadFlag;
207 }
208
getFlags() const209 uint32_t getFlags() const { return Flags; }
210 };
211 } // end anonymous namespace
212
213 namespace {
214 class NeonEmitter {
215 RecordKeeper &Records;
216 StringMap<OpKind> OpMap;
217 DenseMap<Record*, ClassKind> ClassMap;
218
219 public:
NeonEmitter(RecordKeeper & R)220 NeonEmitter(RecordKeeper &R) : Records(R) {
221 OpMap["OP_NONE"] = OpNone;
222 OpMap["OP_UNAVAILABLE"] = OpUnavailable;
223 OpMap["OP_ADD"] = OpAdd;
224 OpMap["OP_ADDL"] = OpAddl;
225 OpMap["OP_ADDLHi"] = OpAddlHi;
226 OpMap["OP_ADDW"] = OpAddw;
227 OpMap["OP_ADDWHi"] = OpAddwHi;
228 OpMap["OP_SUB"] = OpSub;
229 OpMap["OP_SUBL"] = OpSubl;
230 OpMap["OP_SUBLHi"] = OpSublHi;
231 OpMap["OP_SUBW"] = OpSubw;
232 OpMap["OP_SUBWHi"] = OpSubwHi;
233 OpMap["OP_MUL"] = OpMul;
234 OpMap["OP_MLA"] = OpMla;
235 OpMap["OP_MLAL"] = OpMlal;
236 OpMap["OP_MULLHi"] = OpMullHi;
237 OpMap["OP_MULLHi_N"] = OpMullHiN;
238 OpMap["OP_MLALHi"] = OpMlalHi;
239 OpMap["OP_MLALHi_N"] = OpMlalHiN;
240 OpMap["OP_MLS"] = OpMls;
241 OpMap["OP_MLSL"] = OpMlsl;
242 OpMap["OP_MLSLHi"] = OpMlslHi;
243 OpMap["OP_MLSLHi_N"] = OpMlslHiN;
244 OpMap["OP_MUL_N"] = OpMulN;
245 OpMap["OP_MLA_N"] = OpMlaN;
246 OpMap["OP_MLS_N"] = OpMlsN;
247 OpMap["OP_FMLA_N"] = OpFMlaN;
248 OpMap["OP_FMLS_N"] = OpFMlsN;
249 OpMap["OP_MLAL_N"] = OpMlalN;
250 OpMap["OP_MLSL_N"] = OpMlslN;
251 OpMap["OP_MUL_LN"]= OpMulLane;
252 OpMap["OP_MULX_LN"]= OpMulXLane;
253 OpMap["OP_MULL_LN"] = OpMullLane;
254 OpMap["OP_MULLHi_LN"] = OpMullHiLane;
255 OpMap["OP_MLA_LN"]= OpMlaLane;
256 OpMap["OP_MLS_LN"]= OpMlsLane;
257 OpMap["OP_MLAL_LN"] = OpMlalLane;
258 OpMap["OP_MLALHi_LN"] = OpMlalHiLane;
259 OpMap["OP_MLSL_LN"] = OpMlslLane;
260 OpMap["OP_MLSLHi_LN"] = OpMlslHiLane;
261 OpMap["OP_QDMULL_LN"] = OpQDMullLane;
262 OpMap["OP_QDMULLHi_LN"] = OpQDMullHiLane;
263 OpMap["OP_QDMLAL_LN"] = OpQDMlalLane;
264 OpMap["OP_QDMLALHi_LN"] = OpQDMlalHiLane;
265 OpMap["OP_QDMLSL_LN"] = OpQDMlslLane;
266 OpMap["OP_QDMLSLHi_LN"] = OpQDMlslHiLane;
267 OpMap["OP_QDMULH_LN"] = OpQDMulhLane;
268 OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane;
269 OpMap["OP_FMS_LN"] = OpFMSLane;
270 OpMap["OP_FMS_LNQ"] = OpFMSLaneQ;
271 OpMap["OP_TRN1"] = OpTrn1;
272 OpMap["OP_ZIP1"] = OpZip1;
273 OpMap["OP_UZP1"] = OpUzp1;
274 OpMap["OP_TRN2"] = OpTrn2;
275 OpMap["OP_ZIP2"] = OpZip2;
276 OpMap["OP_UZP2"] = OpUzp2;
277 OpMap["OP_EQ"] = OpEq;
278 OpMap["OP_GE"] = OpGe;
279 OpMap["OP_LE"] = OpLe;
280 OpMap["OP_GT"] = OpGt;
281 OpMap["OP_LT"] = OpLt;
282 OpMap["OP_NEG"] = OpNeg;
283 OpMap["OP_NOT"] = OpNot;
284 OpMap["OP_AND"] = OpAnd;
285 OpMap["OP_OR"] = OpOr;
286 OpMap["OP_XOR"] = OpXor;
287 OpMap["OP_ANDN"] = OpAndNot;
288 OpMap["OP_ORN"] = OpOrNot;
289 OpMap["OP_CAST"] = OpCast;
290 OpMap["OP_CONC"] = OpConcat;
291 OpMap["OP_HI"] = OpHi;
292 OpMap["OP_LO"] = OpLo;
293 OpMap["OP_DUP"] = OpDup;
294 OpMap["OP_DUP_LN"] = OpDupLane;
295 OpMap["OP_SEL"] = OpSelect;
296 OpMap["OP_REV16"] = OpRev16;
297 OpMap["OP_REV32"] = OpRev32;
298 OpMap["OP_REV64"] = OpRev64;
299 OpMap["OP_XTN"] = OpXtnHi;
300 OpMap["OP_SQXTUN"] = OpSqxtunHi;
301 OpMap["OP_QXTN"] = OpQxtnHi;
302 OpMap["OP_VCVT_NA_HI"] = OpFcvtnHi;
303 OpMap["OP_VCVT_EX_HI"] = OpFcvtlHi;
304 OpMap["OP_VCVTX_HI"] = OpFcvtxnHi;
305 OpMap["OP_REINT"] = OpReinterpret;
306 OpMap["OP_ADDHNHi"] = OpAddhnHi;
307 OpMap["OP_RADDHNHi"] = OpRAddhnHi;
308 OpMap["OP_SUBHNHi"] = OpSubhnHi;
309 OpMap["OP_RSUBHNHi"] = OpRSubhnHi;
310 OpMap["OP_ABDL"] = OpAbdl;
311 OpMap["OP_ABDLHi"] = OpAbdlHi;
312 OpMap["OP_ABA"] = OpAba;
313 OpMap["OP_ABAL"] = OpAbal;
314 OpMap["OP_ABALHi"] = OpAbalHi;
315 OpMap["OP_QDMULLHi"] = OpQDMullHi;
316 OpMap["OP_QDMULLHi_N"] = OpQDMullHiN;
317 OpMap["OP_QDMLALHi"] = OpQDMlalHi;
318 OpMap["OP_QDMLALHi_N"] = OpQDMlalHiN;
319 OpMap["OP_QDMLSLHi"] = OpQDMlslHi;
320 OpMap["OP_QDMLSLHi_N"] = OpQDMlslHiN;
321 OpMap["OP_DIV"] = OpDiv;
322 OpMap["OP_LONG_HI"] = OpLongHi;
323 OpMap["OP_NARROW_HI"] = OpNarrowHi;
324 OpMap["OP_MOVL_HI"] = OpMovlHi;
325 OpMap["OP_COPY_LN"] = OpCopyLane;
326 OpMap["OP_COPYQ_LN"] = OpCopyQLane;
327 OpMap["OP_COPY_LNQ"] = OpCopyLaneQ;
328 OpMap["OP_SCALAR_MUL_LN"]= OpScalarMulLane;
329 OpMap["OP_SCALAR_MUL_LNQ"]= OpScalarMulLaneQ;
330 OpMap["OP_SCALAR_MULX_LN"]= OpScalarMulXLane;
331 OpMap["OP_SCALAR_MULX_LNQ"]= OpScalarMulXLaneQ;
332 OpMap["OP_SCALAR_VMULX_LN"]= OpScalarVMulXLane;
333 OpMap["OP_SCALAR_VMULX_LNQ"]= OpScalarVMulXLaneQ;
334 OpMap["OP_SCALAR_QDMULL_LN"] = OpScalarQDMullLane;
335 OpMap["OP_SCALAR_QDMULL_LNQ"] = OpScalarQDMullLaneQ;
336 OpMap["OP_SCALAR_QDMULH_LN"] = OpScalarQDMulHiLane;
337 OpMap["OP_SCALAR_QDMULH_LNQ"] = OpScalarQDMulHiLaneQ;
338 OpMap["OP_SCALAR_QRDMULH_LN"] = OpScalarQRDMulHiLane;
339 OpMap["OP_SCALAR_QRDMULH_LNQ"] = OpScalarQRDMulHiLaneQ;
340 OpMap["OP_SCALAR_GET_LN"] = OpScalarGetLane;
341 OpMap["OP_SCALAR_SET_LN"] = OpScalarSetLane;
342
343 Record *SI = R.getClass("SInst");
344 Record *II = R.getClass("IInst");
345 Record *WI = R.getClass("WInst");
346 Record *SOpI = R.getClass("SOpInst");
347 Record *IOpI = R.getClass("IOpInst");
348 Record *WOpI = R.getClass("WOpInst");
349 Record *LOpI = R.getClass("LOpInst");
350 Record *NoTestOpI = R.getClass("NoTestOpInst");
351
352 ClassMap[SI] = ClassS;
353 ClassMap[II] = ClassI;
354 ClassMap[WI] = ClassW;
355 ClassMap[SOpI] = ClassS;
356 ClassMap[IOpI] = ClassI;
357 ClassMap[WOpI] = ClassW;
358 ClassMap[LOpI] = ClassL;
359 ClassMap[NoTestOpI] = ClassNoTest;
360 }
361
362 // run - Emit arm_neon.h.inc
363 void run(raw_ostream &o);
364
365 // runHeader - Emit all the __builtin prototypes used in arm_neon.h
366 void runHeader(raw_ostream &o);
367
368 // runTests - Emit tests for all the Neon intrinsics.
369 void runTests(raw_ostream &o);
370
371 private:
372 void emitIntrinsic(raw_ostream &OS, Record *R,
373 StringMap<ClassKind> &EmittedMap);
374 void genBuiltinsDef(raw_ostream &OS, StringMap<ClassKind> &A64IntrinsicMap,
375 bool isA64GenBuiltinDef);
376 void genOverloadTypeCheckCode(raw_ostream &OS,
377 StringMap<ClassKind> &A64IntrinsicMap,
378 bool isA64TypeCheck);
379 void genIntrinsicRangeCheckCode(raw_ostream &OS,
380 StringMap<ClassKind> &A64IntrinsicMap,
381 bool isA64RangeCheck);
382 void genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap,
383 bool isA64TestGen);
384 };
385 } // end anonymous namespace
386
387 /// ParseTypes - break down a string such as "fQf" into a vector of StringRefs,
388 /// which each StringRef representing a single type declared in the string.
389 /// for "fQf" we would end up with 2 StringRefs, "f", and "Qf", representing
390 /// 2xfloat and 4xfloat respectively.
ParseTypes(Record * r,std::string & s,SmallVectorImpl<StringRef> & TV)391 static void ParseTypes(Record *r, std::string &s,
392 SmallVectorImpl<StringRef> &TV) {
393 const char *data = s.data();
394 int len = 0;
395
396 for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) {
397 if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U'
398 || data[len] == 'H' || data[len] == 'S')
399 continue;
400
401 switch (data[len]) {
402 case 'c':
403 case 's':
404 case 'i':
405 case 'l':
406 case 'h':
407 case 'f':
408 case 'd':
409 break;
410 default:
411 PrintFatalError(r->getLoc(),
412 "Unexpected letter: " + std::string(data + len, 1));
413 }
414 TV.push_back(StringRef(data, len + 1));
415 data += len + 1;
416 len = -1;
417 }
418 }
419
420 /// Widen - Convert a type code into the next wider type. char -> short,
421 /// short -> int, etc.
Widen(const char t)422 static char Widen(const char t) {
423 switch (t) {
424 case 'c':
425 return 's';
426 case 's':
427 return 'i';
428 case 'i':
429 return 'l';
430 case 'h':
431 return 'f';
432 case 'f':
433 return 'd';
434 default:
435 PrintFatalError("unhandled type in widen!");
436 }
437 }
438
439 /// Narrow - Convert a type code into the next smaller type. short -> char,
440 /// float -> half float, etc.
Narrow(const char t)441 static char Narrow(const char t) {
442 switch (t) {
443 case 's':
444 return 'c';
445 case 'i':
446 return 's';
447 case 'l':
448 return 'i';
449 case 'f':
450 return 'h';
451 case 'd':
452 return 'f';
453 default:
454 PrintFatalError("unhandled type in narrow!");
455 }
456 }
457
GetNarrowTypestr(StringRef ty)458 static std::string GetNarrowTypestr(StringRef ty)
459 {
460 std::string s;
461 for (size_t i = 0, end = ty.size(); i < end; i++) {
462 switch (ty[i]) {
463 case 's':
464 s += 'c';
465 break;
466 case 'i':
467 s += 's';
468 break;
469 case 'l':
470 s += 'i';
471 break;
472 default:
473 s += ty[i];
474 break;
475 }
476 }
477
478 return s;
479 }
480
481 /// For a particular StringRef, return the base type code, and whether it has
482 /// the quad-vector, polynomial, or unsigned modifiers set.
ClassifyType(StringRef ty,bool & quad,bool & poly,bool & usgn)483 static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) {
484 unsigned off = 0;
485 // ignore scalar.
486 if (ty[off] == 'S') {
487 ++off;
488 }
489 // remember quad.
490 if (ty[off] == 'Q' || ty[off] == 'H') {
491 quad = true;
492 ++off;
493 }
494
495 // remember poly.
496 if (ty[off] == 'P') {
497 poly = true;
498 ++off;
499 }
500
501 // remember unsigned.
502 if (ty[off] == 'U') {
503 usgn = true;
504 ++off;
505 }
506
507 // base type to get the type string for.
508 return ty[off];
509 }
510
511 /// ModType - Transform a type code and its modifiers based on a mod code. The
512 /// mod code definitions may be found at the top of arm_neon.td.
ModType(const char mod,char type,bool & quad,bool & poly,bool & usgn,bool & scal,bool & cnst,bool & pntr)513 static char ModType(const char mod, char type, bool &quad, bool &poly,
514 bool &usgn, bool &scal, bool &cnst, bool &pntr) {
515 switch (mod) {
516 case 't':
517 if (poly) {
518 poly = false;
519 usgn = true;
520 }
521 break;
522 case 'b':
523 scal = true;
524 case 'u':
525 usgn = true;
526 poly = false;
527 if (type == 'f')
528 type = 'i';
529 if (type == 'd')
530 type = 'l';
531 break;
532 case '$':
533 scal = true;
534 case 'x':
535 usgn = false;
536 poly = false;
537 if (type == 'f')
538 type = 'i';
539 if (type == 'd')
540 type = 'l';
541 break;
542 case 'o':
543 scal = true;
544 type = 'd';
545 usgn = false;
546 break;
547 case 'y':
548 scal = true;
549 case 'f':
550 if (type == 'h')
551 quad = true;
552 type = 'f';
553 usgn = false;
554 break;
555 case 'F':
556 type = 'd';
557 usgn = false;
558 break;
559 case 'g':
560 quad = false;
561 break;
562 case 'B':
563 case 'C':
564 case 'D':
565 case 'j':
566 quad = true;
567 break;
568 case 'w':
569 type = Widen(type);
570 quad = true;
571 break;
572 case 'n':
573 type = Widen(type);
574 break;
575 case 'i':
576 type = 'i';
577 scal = true;
578 break;
579 case 'l':
580 type = 'l';
581 scal = true;
582 usgn = true;
583 break;
584 case 'z':
585 type = Narrow(type);
586 scal = true;
587 break;
588 case 'r':
589 type = Widen(type);
590 scal = true;
591 break;
592 case 's':
593 case 'a':
594 scal = true;
595 break;
596 case 'k':
597 quad = true;
598 break;
599 case 'c':
600 cnst = true;
601 case 'p':
602 pntr = true;
603 scal = true;
604 break;
605 case 'h':
606 type = Narrow(type);
607 if (type == 'h')
608 quad = false;
609 break;
610 case 'q':
611 type = Narrow(type);
612 quad = true;
613 break;
614 case 'e':
615 type = Narrow(type);
616 usgn = true;
617 break;
618 case 'm':
619 type = Narrow(type);
620 quad = false;
621 break;
622 default:
623 break;
624 }
625 return type;
626 }
627
IsMultiVecProto(const char p)628 static bool IsMultiVecProto(const char p) {
629 return ((p >= '2' && p <= '4') || (p >= 'B' && p <= 'D'));
630 }
631
632 /// TypeString - for a modifier and type, generate the name of the typedef for
633 /// that type. QUc -> uint8x8_t.
TypeString(const char mod,StringRef typestr)634 static std::string TypeString(const char mod, StringRef typestr) {
635 bool quad = false;
636 bool poly = false;
637 bool usgn = false;
638 bool scal = false;
639 bool cnst = false;
640 bool pntr = false;
641
642 if (mod == 'v')
643 return "void";
644 if (mod == 'i')
645 return "int";
646
647 // base type to get the type string for.
648 char type = ClassifyType(typestr, quad, poly, usgn);
649
650 // Based on the modifying character, change the type and width if necessary.
651 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
652
653 SmallString<128> s;
654
655 if (usgn)
656 s.push_back('u');
657
658 switch (type) {
659 case 'c':
660 s += poly ? "poly8" : "int8";
661 if (scal)
662 break;
663 s += quad ? "x16" : "x8";
664 break;
665 case 's':
666 s += poly ? "poly16" : "int16";
667 if (scal)
668 break;
669 s += quad ? "x8" : "x4";
670 break;
671 case 'i':
672 s += "int32";
673 if (scal)
674 break;
675 s += quad ? "x4" : "x2";
676 break;
677 case 'l':
678 s += (poly && !usgn)? "poly64" : "int64";
679 if (scal)
680 break;
681 s += quad ? "x2" : "x1";
682 break;
683 case 'h':
684 s += "float16";
685 if (scal)
686 break;
687 s += quad ? "x8" : "x4";
688 break;
689 case 'f':
690 s += "float32";
691 if (scal)
692 break;
693 s += quad ? "x4" : "x2";
694 break;
695 case 'd':
696 s += "float64";
697 if (scal)
698 break;
699 s += quad ? "x2" : "x1";
700 break;
701
702 default:
703 PrintFatalError("unhandled type!");
704 }
705
706 if (mod == '2' || mod == 'B')
707 s += "x2";
708 if (mod == '3' || mod == 'C')
709 s += "x3";
710 if (mod == '4' || mod == 'D')
711 s += "x4";
712
713 // Append _t, finishing the type string typedef type.
714 s += "_t";
715
716 if (cnst)
717 s += " const";
718
719 if (pntr)
720 s += " *";
721
722 return s.str();
723 }
724
725 /// BuiltinTypeString - for a modifier and type, generate the clang
726 /// BuiltinsARM.def prototype code for the function. See the top of clang's
727 /// Builtins.def for a description of the type strings.
BuiltinTypeString(const char mod,StringRef typestr,ClassKind ck,bool ret)728 static std::string BuiltinTypeString(const char mod, StringRef typestr,
729 ClassKind ck, bool ret) {
730 bool quad = false;
731 bool poly = false;
732 bool usgn = false;
733 bool scal = false;
734 bool cnst = false;
735 bool pntr = false;
736
737 if (mod == 'v')
738 return "v"; // void
739 if (mod == 'i')
740 return "i"; // int
741
742 // base type to get the type string for.
743 char type = ClassifyType(typestr, quad, poly, usgn);
744
745 // Based on the modifying character, change the type and width if necessary.
746 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
747
748 // All pointers are void* pointers. Change type to 'v' now.
749 if (pntr) {
750 usgn = false;
751 poly = false;
752 type = 'v';
753 }
754 // Treat half-float ('h') types as unsigned short ('s') types.
755 if (type == 'h') {
756 type = 's';
757 usgn = true;
758 }
759 usgn = usgn | poly | ((ck == ClassI || ck == ClassW) &&
760 scal && type != 'f' && type != 'd');
761
762 if (scal) {
763 SmallString<128> s;
764
765 if (usgn)
766 s.push_back('U');
767 else if (type == 'c')
768 s.push_back('S'); // make chars explicitly signed
769
770 if (type == 'l') // 64-bit long
771 s += "LLi";
772 else
773 s.push_back(type);
774
775 if (cnst)
776 s.push_back('C');
777 if (pntr)
778 s.push_back('*');
779 return s.str();
780 }
781
782 // Since the return value must be one type, return a vector type of the
783 // appropriate width which we will bitcast. An exception is made for
784 // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
785 // fashion, storing them to a pointer arg.
786 if (ret) {
787 if (IsMultiVecProto(mod))
788 return "vv*"; // void result with void* first argument
789 if (mod == 'f' || (ck != ClassB && type == 'f'))
790 return quad ? "V4f" : "V2f";
791 if (mod == 'F' || (ck != ClassB && type == 'd'))
792 return quad ? "V2d" : "V1d";
793 if (ck != ClassB && type == 's')
794 return quad ? "V8s" : "V4s";
795 if (ck != ClassB && type == 'i')
796 return quad ? "V4i" : "V2i";
797 if (ck != ClassB && type == 'l')
798 return quad ? "V2LLi" : "V1LLi";
799
800 return quad ? "V16Sc" : "V8Sc";
801 }
802
803 // Non-return array types are passed as individual vectors.
804 if (mod == '2' || mod == 'B')
805 return quad ? "V16ScV16Sc" : "V8ScV8Sc";
806 if (mod == '3' || mod == 'C')
807 return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc";
808 if (mod == '4' || mod == 'D')
809 return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc";
810
811 if (mod == 'f' || (ck != ClassB && type == 'f'))
812 return quad ? "V4f" : "V2f";
813 if (mod == 'F' || (ck != ClassB && type == 'd'))
814 return quad ? "V2d" : "V1d";
815 if (ck != ClassB && type == 's')
816 return quad ? "V8s" : "V4s";
817 if (ck != ClassB && type == 'i')
818 return quad ? "V4i" : "V2i";
819 if (ck != ClassB && type == 'l')
820 return quad ? "V2LLi" : "V1LLi";
821
822 return quad ? "V16Sc" : "V8Sc";
823 }
824
825 /// InstructionTypeCode - Computes the ARM argument character code and
826 /// quad status for a specific type string and ClassKind.
InstructionTypeCode(const StringRef & typeStr,const ClassKind ck,bool & quad,std::string & typeCode)827 static void InstructionTypeCode(const StringRef &typeStr,
828 const ClassKind ck,
829 bool &quad,
830 std::string &typeCode) {
831 bool poly = false;
832 bool usgn = false;
833 char type = ClassifyType(typeStr, quad, poly, usgn);
834
835 switch (type) {
836 case 'c':
837 switch (ck) {
838 case ClassS: typeCode = poly ? "p8" : usgn ? "u8" : "s8"; break;
839 case ClassI: typeCode = "i8"; break;
840 case ClassW: typeCode = "8"; break;
841 default: break;
842 }
843 break;
844 case 's':
845 switch (ck) {
846 case ClassS: typeCode = poly ? "p16" : usgn ? "u16" : "s16"; break;
847 case ClassI: typeCode = "i16"; break;
848 case ClassW: typeCode = "16"; break;
849 default: break;
850 }
851 break;
852 case 'i':
853 switch (ck) {
854 case ClassS: typeCode = usgn ? "u32" : "s32"; break;
855 case ClassI: typeCode = "i32"; break;
856 case ClassW: typeCode = "32"; break;
857 default: break;
858 }
859 break;
860 case 'l':
861 switch (ck) {
862 case ClassS: typeCode = poly ? "p64" : usgn ? "u64" : "s64"; break;
863 case ClassI: typeCode = "i64"; break;
864 case ClassW: typeCode = "64"; break;
865 default: break;
866 }
867 break;
868 case 'h':
869 switch (ck) {
870 case ClassS:
871 case ClassI: typeCode = "f16"; break;
872 case ClassW: typeCode = "16"; break;
873 default: break;
874 }
875 break;
876 case 'f':
877 switch (ck) {
878 case ClassS:
879 case ClassI: typeCode = "f32"; break;
880 case ClassW: typeCode = "32"; break;
881 default: break;
882 }
883 break;
884 case 'd':
885 switch (ck) {
886 case ClassS:
887 case ClassI:
888 typeCode += "f64";
889 break;
890 case ClassW:
891 PrintFatalError("unhandled type!");
892 default:
893 break;
894 }
895 break;
896 default:
897 PrintFatalError("unhandled type!");
898 }
899 }
900
Insert_BHSD_Suffix(StringRef typestr)901 static char Insert_BHSD_Suffix(StringRef typestr){
902 unsigned off = 0;
903 if(typestr[off++] == 'S'){
904 while(typestr[off] == 'Q' || typestr[off] == 'H'||
905 typestr[off] == 'P' || typestr[off] == 'U')
906 ++off;
907 switch (typestr[off]){
908 default : break;
909 case 'c' : return 'b';
910 case 's' : return 'h';
911 case 'i' :
912 case 'f' : return 's';
913 case 'l' :
914 case 'd' : return 'd';
915 }
916 }
917 return 0;
918 }
919
endsWith_xN(std::string const & name)920 static bool endsWith_xN(std::string const &name) {
921 if (name.length() > 3) {
922 if (name.compare(name.length() - 3, 3, "_x2") == 0 ||
923 name.compare(name.length() - 3, 3, "_x3") == 0 ||
924 name.compare(name.length() - 3, 3, "_x4") == 0)
925 return true;
926 }
927 return false;
928 }
929
930 /// MangleName - Append a type or width suffix to a base neon function name,
931 /// and insert a 'q' in the appropriate location if type string starts with 'Q'.
932 /// E.g. turn "vst2_lane" into "vst2q_lane_f32", etc.
933 /// Insert proper 'b' 'h' 's' 'd' if prefix 'S' is used.
MangleName(const std::string & name,StringRef typestr,ClassKind ck)934 static std::string MangleName(const std::string &name, StringRef typestr,
935 ClassKind ck) {
936 if (name == "vcvt_f32_f16" || name == "vcvt_f32_f64" ||
937 name == "vcvt_f64_f32")
938 return name;
939
940 bool quad = false;
941 std::string typeCode = "";
942
943 InstructionTypeCode(typestr, ck, quad, typeCode);
944
945 std::string s = name;
946
947 if (typeCode.size() > 0) {
948 // If the name is end with _xN (N = 2,3,4), insert the typeCode before _xN.
949 if (endsWith_xN(s))
950 s.insert(s.length() - 3, "_" + typeCode);
951 else
952 s += "_" + typeCode;
953 }
954
955 if (ck == ClassB)
956 s += "_v";
957
958 // Insert a 'q' before the first '_' character so that it ends up before
959 // _lane or _n on vector-scalar operations.
960 if (typestr.find("Q") != StringRef::npos) {
961 size_t pos = s.find('_');
962 s = s.insert(pos, "q");
963 }
964 char ins = Insert_BHSD_Suffix(typestr);
965 if(ins){
966 size_t pos = s.find('_');
967 s = s.insert(pos, &ins, 1);
968 }
969
970 return s;
971 }
972
PreprocessInstruction(const StringRef & Name,const std::string & InstName,std::string & Prefix,bool & HasNPostfix,bool & HasLanePostfix,bool & HasDupPostfix,bool & IsSpecialVCvt,size_t & TBNumber)973 static void PreprocessInstruction(const StringRef &Name,
974 const std::string &InstName,
975 std::string &Prefix,
976 bool &HasNPostfix,
977 bool &HasLanePostfix,
978 bool &HasDupPostfix,
979 bool &IsSpecialVCvt,
980 size_t &TBNumber) {
981 // All of our instruction name fields from arm_neon.td are of the form
982 // <instructionname>_...
983 // Thus we grab our instruction name via computation of said Prefix.
984 const size_t PrefixEnd = Name.find_first_of('_');
985 // If InstName is passed in, we use that instead of our name Prefix.
986 Prefix = InstName.size() == 0? Name.slice(0, PrefixEnd).str() : InstName;
987
988 const StringRef Postfix = Name.slice(PrefixEnd, Name.size());
989
990 HasNPostfix = Postfix.count("_n");
991 HasLanePostfix = Postfix.count("_lane");
992 HasDupPostfix = Postfix.count("_dup");
993 IsSpecialVCvt = Postfix.size() != 0 && Name.count("vcvt");
994
995 if (InstName.compare("vtbl") == 0 ||
996 InstName.compare("vtbx") == 0) {
997 // If we have a vtblN/vtbxN instruction, use the instruction's ASCII
998 // encoding to get its true value.
999 TBNumber = Name[Name.size()-1] - 48;
1000 }
1001 }
1002
1003 /// GenerateRegisterCheckPatternsForLoadStores - Given a bunch of data we have
1004 /// extracted, generate a FileCheck pattern for a Load Or Store
1005 static void
GenerateRegisterCheckPatternForLoadStores(const StringRef & NameRef,const std::string & OutTypeCode,const bool & IsQuad,const bool & HasDupPostfix,const bool & HasLanePostfix,const size_t Count,std::string & RegisterSuffix)1006 GenerateRegisterCheckPatternForLoadStores(const StringRef &NameRef,
1007 const std::string& OutTypeCode,
1008 const bool &IsQuad,
1009 const bool &HasDupPostfix,
1010 const bool &HasLanePostfix,
1011 const size_t Count,
1012 std::string &RegisterSuffix) {
1013 const bool IsLDSTOne = NameRef.count("vld1") || NameRef.count("vst1");
1014 // If N == 3 || N == 4 and we are dealing with a quad instruction, Clang
1015 // will output a series of v{ld,st}1s, so we have to handle it specially.
1016 if ((Count == 3 || Count == 4) && IsQuad) {
1017 RegisterSuffix += "{";
1018 for (size_t i = 0; i < Count; i++) {
1019 RegisterSuffix += "d{{[0-9]+}}";
1020 if (HasDupPostfix) {
1021 RegisterSuffix += "[]";
1022 }
1023 if (HasLanePostfix) {
1024 RegisterSuffix += "[{{[0-9]+}}]";
1025 }
1026 if (i < Count-1) {
1027 RegisterSuffix += ", ";
1028 }
1029 }
1030 RegisterSuffix += "}";
1031 } else {
1032
1033 // Handle normal loads and stores.
1034 RegisterSuffix += "{";
1035 for (size_t i = 0; i < Count; i++) {
1036 RegisterSuffix += "d{{[0-9]+}}";
1037 if (HasDupPostfix) {
1038 RegisterSuffix += "[]";
1039 }
1040 if (HasLanePostfix) {
1041 RegisterSuffix += "[{{[0-9]+}}]";
1042 }
1043 if (IsQuad && !HasLanePostfix) {
1044 RegisterSuffix += ", d{{[0-9]+}}";
1045 if (HasDupPostfix) {
1046 RegisterSuffix += "[]";
1047 }
1048 }
1049 if (i < Count-1) {
1050 RegisterSuffix += ", ";
1051 }
1052 }
1053 RegisterSuffix += "}, [r{{[0-9]+}}";
1054
1055 // We only include the alignment hint if we have a vld1.*64 or
1056 // a dup/lane instruction.
1057 if (IsLDSTOne) {
1058 if ((HasLanePostfix || HasDupPostfix) && OutTypeCode != "8") {
1059 RegisterSuffix += ":" + OutTypeCode;
1060 }
1061 }
1062
1063 RegisterSuffix += "]";
1064 }
1065 }
1066
HasNPostfixAndScalarArgs(const StringRef & NameRef,const bool & HasNPostfix)1067 static bool HasNPostfixAndScalarArgs(const StringRef &NameRef,
1068 const bool &HasNPostfix) {
1069 return (NameRef.count("vmla") ||
1070 NameRef.count("vmlal") ||
1071 NameRef.count("vmlsl") ||
1072 NameRef.count("vmull") ||
1073 NameRef.count("vqdmlal") ||
1074 NameRef.count("vqdmlsl") ||
1075 NameRef.count("vqdmulh") ||
1076 NameRef.count("vqdmull") ||
1077 NameRef.count("vqrdmulh")) && HasNPostfix;
1078 }
1079
IsFiveOperandLaneAccumulator(const StringRef & NameRef,const bool & HasLanePostfix)1080 static bool IsFiveOperandLaneAccumulator(const StringRef &NameRef,
1081 const bool &HasLanePostfix) {
1082 return (NameRef.count("vmla") ||
1083 NameRef.count("vmls") ||
1084 NameRef.count("vmlal") ||
1085 NameRef.count("vmlsl") ||
1086 (NameRef.count("vmul") && NameRef.size() == 3)||
1087 NameRef.count("vqdmlal") ||
1088 NameRef.count("vqdmlsl") ||
1089 NameRef.count("vqdmulh") ||
1090 NameRef.count("vqrdmulh")) && HasLanePostfix;
1091 }
1092
IsSpecialLaneMultiply(const StringRef & NameRef,const bool & HasLanePostfix,const bool & IsQuad)1093 static bool IsSpecialLaneMultiply(const StringRef &NameRef,
1094 const bool &HasLanePostfix,
1095 const bool &IsQuad) {
1096 const bool IsVMulOrMulh = (NameRef.count("vmul") || NameRef.count("mulh"))
1097 && IsQuad;
1098 const bool IsVMull = NameRef.count("mull") && !IsQuad;
1099 return (IsVMulOrMulh || IsVMull) && HasLanePostfix;
1100 }
1101
NormalizeProtoForRegisterPatternCreation(const std::string & Name,const std::string & Proto,const bool & HasNPostfix,const bool & IsQuad,const bool & HasLanePostfix,const bool & HasDupPostfix,std::string & NormedProto)1102 static void NormalizeProtoForRegisterPatternCreation(const std::string &Name,
1103 const std::string &Proto,
1104 const bool &HasNPostfix,
1105 const bool &IsQuad,
1106 const bool &HasLanePostfix,
1107 const bool &HasDupPostfix,
1108 std::string &NormedProto) {
1109 // Handle generic case.
1110 const StringRef NameRef(Name);
1111 for (size_t i = 0, end = Proto.size(); i < end; i++) {
1112 switch (Proto[i]) {
1113 case 'u':
1114 case 'f':
1115 case 'F':
1116 case 'd':
1117 case 's':
1118 case 'x':
1119 case 't':
1120 case 'n':
1121 NormedProto += IsQuad? 'q' : 'd';
1122 break;
1123 case 'w':
1124 case 'k':
1125 NormedProto += 'q';
1126 break;
1127 case 'g':
1128 case 'j':
1129 case 'h':
1130 case 'e':
1131 NormedProto += 'd';
1132 break;
1133 case 'i':
1134 NormedProto += HasLanePostfix? 'a' : 'i';
1135 break;
1136 case 'a':
1137 if (HasLanePostfix) {
1138 NormedProto += 'a';
1139 } else if (HasNPostfixAndScalarArgs(NameRef, HasNPostfix)) {
1140 NormedProto += IsQuad? 'q' : 'd';
1141 } else {
1142 NormedProto += 'i';
1143 }
1144 break;
1145 }
1146 }
1147
1148 // Handle Special Cases.
1149 const bool IsNotVExt = !NameRef.count("vext");
1150 const bool IsVPADAL = NameRef.count("vpadal");
1151 const bool Is5OpLaneAccum = IsFiveOperandLaneAccumulator(NameRef,
1152 HasLanePostfix);
1153 const bool IsSpecialLaneMul = IsSpecialLaneMultiply(NameRef, HasLanePostfix,
1154 IsQuad);
1155
1156 if (IsSpecialLaneMul) {
1157 // If
1158 NormedProto[2] = NormedProto[3];
1159 NormedProto.erase(3);
1160 } else if (NormedProto.size() == 4 &&
1161 NormedProto[0] == NormedProto[1] &&
1162 IsNotVExt) {
1163 // If NormedProto.size() == 4 and the first two proto characters are the
1164 // same, ignore the first.
1165 NormedProto = NormedProto.substr(1, 3);
1166 } else if (Is5OpLaneAccum) {
1167 // If we have a 5 op lane accumulator operation, we take characters 1,2,4
1168 std::string tmp = NormedProto.substr(1,2);
1169 tmp += NormedProto[4];
1170 NormedProto = tmp;
1171 } else if (IsVPADAL) {
1172 // If we have VPADAL, ignore the first character.
1173 NormedProto = NormedProto.substr(0, 2);
1174 } else if (NameRef.count("vdup") && NormedProto.size() > 2) {
1175 // If our instruction is a dup instruction, keep only the first and
1176 // last characters.
1177 std::string tmp = "";
1178 tmp += NormedProto[0];
1179 tmp += NormedProto[NormedProto.size()-1];
1180 NormedProto = tmp;
1181 }
1182 }
1183
1184 /// GenerateRegisterCheckPatterns - Given a bunch of data we have
1185 /// extracted, generate a FileCheck pattern to check that an
1186 /// instruction's arguments are correct.
GenerateRegisterCheckPattern(const std::string & Name,const std::string & Proto,const std::string & OutTypeCode,const bool & HasNPostfix,const bool & IsQuad,const bool & HasLanePostfix,const bool & HasDupPostfix,const size_t & TBNumber,std::string & RegisterSuffix)1187 static void GenerateRegisterCheckPattern(const std::string &Name,
1188 const std::string &Proto,
1189 const std::string &OutTypeCode,
1190 const bool &HasNPostfix,
1191 const bool &IsQuad,
1192 const bool &HasLanePostfix,
1193 const bool &HasDupPostfix,
1194 const size_t &TBNumber,
1195 std::string &RegisterSuffix) {
1196
1197 RegisterSuffix = "";
1198
1199 const StringRef NameRef(Name);
1200 const StringRef ProtoRef(Proto);
1201
1202 if ((NameRef.count("vdup") || NameRef.count("vmov")) && HasNPostfix) {
1203 return;
1204 }
1205
1206 const bool IsLoadStore = NameRef.count("vld") || NameRef.count("vst");
1207 const bool IsTBXOrTBL = NameRef.count("vtbl") || NameRef.count("vtbx");
1208
1209 if (IsLoadStore) {
1210 // Grab N value from v{ld,st}N using its ascii representation.
1211 const size_t Count = NameRef[3] - 48;
1212
1213 GenerateRegisterCheckPatternForLoadStores(NameRef, OutTypeCode, IsQuad,
1214 HasDupPostfix, HasLanePostfix,
1215 Count, RegisterSuffix);
1216 } else if (IsTBXOrTBL) {
1217 RegisterSuffix += "d{{[0-9]+}}, {";
1218 for (size_t i = 0; i < TBNumber-1; i++) {
1219 RegisterSuffix += "d{{[0-9]+}}, ";
1220 }
1221 RegisterSuffix += "d{{[0-9]+}}}, d{{[0-9]+}}";
1222 } else {
1223 // Handle a normal instruction.
1224 if (NameRef.count("vget") || NameRef.count("vset"))
1225 return;
1226
1227 // We first normalize our proto, since we only need to emit 4
1228 // different types of checks, yet have more than 4 proto types
1229 // that map onto those 4 patterns.
1230 std::string NormalizedProto("");
1231 NormalizeProtoForRegisterPatternCreation(Name, Proto, HasNPostfix, IsQuad,
1232 HasLanePostfix, HasDupPostfix,
1233 NormalizedProto);
1234
1235 for (size_t i = 0, end = NormalizedProto.size(); i < end; i++) {
1236 const char &c = NormalizedProto[i];
1237 switch (c) {
1238 case 'q':
1239 RegisterSuffix += "q{{[0-9]+}}, ";
1240 break;
1241
1242 case 'd':
1243 RegisterSuffix += "d{{[0-9]+}}, ";
1244 break;
1245
1246 case 'i':
1247 RegisterSuffix += "#{{[0-9]+}}, ";
1248 break;
1249
1250 case 'a':
1251 RegisterSuffix += "d{{[0-9]+}}[{{[0-9]}}], ";
1252 break;
1253 }
1254 }
1255
1256 // Remove extra ", ".
1257 RegisterSuffix = RegisterSuffix.substr(0, RegisterSuffix.size()-2);
1258 }
1259 }
1260
1261 /// GenerateChecksForIntrinsic - Given a specific instruction name +
1262 /// typestr + class kind, generate the proper set of FileCheck
1263 /// Patterns to check for. We could just return a string, but instead
1264 /// use a vector since it provides us with the extra flexibility of
1265 /// emitting multiple checks, which comes in handy for certain cases
1266 /// like mla where we want to check for 2 different instructions.
GenerateChecksForIntrinsic(const std::string & Name,const std::string & Proto,StringRef & OutTypeStr,StringRef & InTypeStr,ClassKind Ck,const std::string & InstName,bool IsHiddenLOp,std::vector<std::string> & Result)1267 static void GenerateChecksForIntrinsic(const std::string &Name,
1268 const std::string &Proto,
1269 StringRef &OutTypeStr,
1270 StringRef &InTypeStr,
1271 ClassKind Ck,
1272 const std::string &InstName,
1273 bool IsHiddenLOp,
1274 std::vector<std::string>& Result) {
1275
1276 // If Ck is a ClassNoTest instruction, just return so no test is
1277 // emitted.
1278 if(Ck == ClassNoTest)
1279 return;
1280
1281 if (Name == "vcvt_f32_f16") {
1282 Result.push_back("vcvt.f32.f16");
1283 return;
1284 }
1285
1286
1287 // Now we preprocess our instruction given the data we have to get the
1288 // data that we need.
1289 // Create a StringRef for String Manipulation of our Name.
1290 const StringRef NameRef(Name);
1291 // Instruction Prefix.
1292 std::string Prefix;
1293 // The type code for our out type string.
1294 std::string OutTypeCode;
1295 // To handle our different cases, we need to check for different postfixes.
1296 // Is our instruction a quad instruction.
1297 bool IsQuad = false;
1298 // Our instruction is of the form <instructionname>_n.
1299 bool HasNPostfix = false;
1300 // Our instruction is of the form <instructionname>_lane.
1301 bool HasLanePostfix = false;
1302 // Our instruction is of the form <instructionname>_dup.
1303 bool HasDupPostfix = false;
1304 // Our instruction is a vcvt instruction which requires special handling.
1305 bool IsSpecialVCvt = false;
1306 // If we have a vtbxN or vtblN instruction, this is set to N.
1307 size_t TBNumber = -1;
1308 // Register Suffix
1309 std::string RegisterSuffix;
1310
1311 PreprocessInstruction(NameRef, InstName, Prefix,
1312 HasNPostfix, HasLanePostfix, HasDupPostfix,
1313 IsSpecialVCvt, TBNumber);
1314
1315 InstructionTypeCode(OutTypeStr, Ck, IsQuad, OutTypeCode);
1316 GenerateRegisterCheckPattern(Name, Proto, OutTypeCode, HasNPostfix, IsQuad,
1317 HasLanePostfix, HasDupPostfix, TBNumber,
1318 RegisterSuffix);
1319
1320 // In the following section, we handle a bunch of special cases. You can tell
1321 // a special case by the fact we are returning early.
1322
1323 // If our instruction is a logical instruction without postfix or a
1324 // hidden LOp just return the current Prefix.
1325 if (Ck == ClassL || IsHiddenLOp) {
1326 Result.push_back(Prefix + " " + RegisterSuffix);
1327 return;
1328 }
1329
1330 // If we have a vmov, due to the many different cases, some of which
1331 // vary within the different intrinsics generated for a single
1332 // instruction type, just output a vmov. (e.g. given an instruction
1333 // A, A.u32 might be vmov and A.u8 might be vmov.8).
1334 //
1335 // FIXME: Maybe something can be done about this. The two cases that we care
1336 // about are vmov as an LType and vmov as a WType.
1337 if (Prefix == "vmov") {
1338 Result.push_back(Prefix + " " + RegisterSuffix);
1339 return;
1340 }
1341
1342 // In the following section, we handle special cases.
1343
1344 if (OutTypeCode == "64") {
1345 // If we have a 64 bit vdup/vext and are handling an uint64x1_t
1346 // type, the intrinsic will be optimized away, so just return
1347 // nothing. On the other hand if we are handling an uint64x2_t
1348 // (i.e. quad instruction), vdup/vmov instructions should be
1349 // emitted.
1350 if (Prefix == "vdup" || Prefix == "vext") {
1351 if (IsQuad) {
1352 Result.push_back("{{vmov|vdup}}");
1353 }
1354 return;
1355 }
1356
1357 // v{st,ld}{2,3,4}_{u,s}64 emit v{st,ld}1.64 instructions with
1358 // multiple register operands.
1359 bool MultiLoadPrefix = Prefix == "vld2" || Prefix == "vld3"
1360 || Prefix == "vld4";
1361 bool MultiStorePrefix = Prefix == "vst2" || Prefix == "vst3"
1362 || Prefix == "vst4";
1363 if (MultiLoadPrefix || MultiStorePrefix) {
1364 Result.push_back(NameRef.slice(0, 3).str() + "1.64");
1365 return;
1366 }
1367
1368 // v{st,ld}1_{lane,dup}_{u64,s64} use vldr/vstr/vmov/str instead of
1369 // emitting said instructions. So return a check for
1370 // vldr/vstr/vmov/str instead.
1371 if (HasLanePostfix || HasDupPostfix) {
1372 if (Prefix == "vst1") {
1373 Result.push_back("{{str|vstr|vmov}}");
1374 return;
1375 } else if (Prefix == "vld1") {
1376 Result.push_back("{{ldr|vldr|vmov}}");
1377 return;
1378 }
1379 }
1380 }
1381
1382 // vzip.32/vuzp.32 are the same instruction as vtrn.32 and are
1383 // sometimes disassembled as vtrn.32. We use a regex to handle both
1384 // cases.
1385 if ((Prefix == "vzip" || Prefix == "vuzp") && OutTypeCode == "32") {
1386 Result.push_back("{{vtrn|" + Prefix + "}}.32 " + RegisterSuffix);
1387 return;
1388 }
1389
1390 // Currently on most ARM processors, we do not use vmla/vmls for
1391 // quad floating point operations. Instead we output vmul + vadd. So
1392 // check if we have one of those instructions and just output a
1393 // check for vmul.
1394 if (OutTypeCode == "f32") {
1395 if (Prefix == "vmls") {
1396 Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
1397 Result.push_back("vsub." + OutTypeCode);
1398 return;
1399 } else if (Prefix == "vmla") {
1400 Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
1401 Result.push_back("vadd." + OutTypeCode);
1402 return;
1403 }
1404 }
1405
1406 // If we have vcvt, get the input type from the instruction name
1407 // (which should be of the form instname_inputtype) and append it
1408 // before the output type.
1409 if (Prefix == "vcvt") {
1410 const std::string inTypeCode = NameRef.substr(NameRef.find_last_of("_")+1);
1411 Prefix += "." + inTypeCode;
1412 }
1413
1414 // Append output type code to get our final mangled instruction.
1415 Prefix += "." + OutTypeCode;
1416
1417 Result.push_back(Prefix + " " + RegisterSuffix);
1418 }
1419
1420 /// UseMacro - Examine the prototype string to determine if the intrinsic
1421 /// should be defined as a preprocessor macro instead of an inline function.
UseMacro(const std::string & proto)1422 static bool UseMacro(const std::string &proto) {
1423 // If this builtin takes an immediate argument, we need to #define it rather
1424 // than use a standard declaration, so that SemaChecking can range check
1425 // the immediate passed by the user.
1426 if (proto.find('i') != std::string::npos)
1427 return true;
1428
1429 // Pointer arguments need to use macros to avoid hiding aligned attributes
1430 // from the pointer type.
1431 if (proto.find('p') != std::string::npos ||
1432 proto.find('c') != std::string::npos)
1433 return true;
1434
1435 return false;
1436 }
1437
1438 /// MacroArgUsedDirectly - Return true if argument i for an intrinsic that is
1439 /// defined as a macro should be accessed directly instead of being first
1440 /// assigned to a local temporary.
MacroArgUsedDirectly(const std::string & proto,unsigned i)1441 static bool MacroArgUsedDirectly(const std::string &proto, unsigned i) {
1442 // True for constant ints (i), pointers (p) and const pointers (c).
1443 return (proto[i] == 'i' || proto[i] == 'p' || proto[i] == 'c');
1444 }
1445
1446 // Generate the string "(argtype a, argtype b, ...)"
GenArgs(const std::string & proto,StringRef typestr,const std::string & name)1447 static std::string GenArgs(const std::string &proto, StringRef typestr,
1448 const std::string &name) {
1449 bool define = UseMacro(proto);
1450 char arg = 'a';
1451
1452 std::string s;
1453 s += "(";
1454
1455 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1456 if (define) {
1457 // Some macro arguments are used directly instead of being assigned
1458 // to local temporaries; prepend an underscore prefix to make their
1459 // names consistent with the local temporaries.
1460 if (MacroArgUsedDirectly(proto, i))
1461 s += "__";
1462 } else {
1463 s += TypeString(proto[i], typestr) + " __";
1464 }
1465 s.push_back(arg);
1466 //To avoid argument being multiple defined, add extra number for renaming.
1467 if (name == "vcopy_lane" || name == "vcopy_laneq")
1468 s.push_back('1');
1469 if ((i + 1) < e)
1470 s += ", ";
1471 }
1472
1473 s += ")";
1474 return s;
1475 }
1476
1477 // Macro arguments are not type-checked like inline function arguments, so
1478 // assign them to local temporaries to get the right type checking.
GenMacroLocals(const std::string & proto,StringRef typestr,const std::string & name)1479 static std::string GenMacroLocals(const std::string &proto, StringRef typestr,
1480 const std::string &name ) {
1481 char arg = 'a';
1482 std::string s;
1483 bool generatedLocal = false;
1484
1485 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1486 // Do not create a temporary for an immediate argument.
1487 // That would defeat the whole point of using a macro!
1488 if (MacroArgUsedDirectly(proto, i))
1489 continue;
1490 generatedLocal = true;
1491 bool extranumber = false;
1492 if (name == "vcopy_lane" || name == "vcopy_laneq")
1493 extranumber = true;
1494
1495 s += TypeString(proto[i], typestr) + " __";
1496 s.push_back(arg);
1497 if(extranumber)
1498 s.push_back('1');
1499 s += " = (";
1500 s.push_back(arg);
1501 if(extranumber)
1502 s.push_back('1');
1503 s += "); ";
1504 }
1505
1506 if (generatedLocal)
1507 s += "\\\n ";
1508 return s;
1509 }
1510
1511 // Use the vmovl builtin to sign-extend or zero-extend a vector.
Extend(StringRef typestr,const std::string & a,bool h=0)1512 static std::string Extend(StringRef typestr, const std::string &a, bool h=0) {
1513 std::string s, high;
1514 high = h ? "_high" : "";
1515 s = MangleName("vmovl" + high, typestr, ClassS);
1516 s += "(" + a + ")";
1517 return s;
1518 }
1519
1520 // Get the high 64-bit part of a vector
GetHigh(const std::string & a,StringRef typestr)1521 static std::string GetHigh(const std::string &a, StringRef typestr) {
1522 std::string s;
1523 s = MangleName("vget_high", typestr, ClassS);
1524 s += "(" + a + ")";
1525 return s;
1526 }
1527
1528 // Gen operation with two operands and get high 64-bit for both of two operands.
Gen2OpWith2High(StringRef typestr,const std::string & op,const std::string & a,const std::string & b)1529 static std::string Gen2OpWith2High(StringRef typestr,
1530 const std::string &op,
1531 const std::string &a,
1532 const std::string &b) {
1533 std::string s;
1534 std::string Op1 = GetHigh(a, typestr);
1535 std::string Op2 = GetHigh(b, typestr);
1536 s = MangleName(op, typestr, ClassS);
1537 s += "(" + Op1 + ", " + Op2 + ");";
1538 return s;
1539 }
1540
1541 // Gen operation with three operands and get high 64-bit of the latter
1542 // two operands.
Gen3OpWith2High(StringRef typestr,const std::string & op,const std::string & a,const std::string & b,const std::string & c)1543 static std::string Gen3OpWith2High(StringRef typestr,
1544 const std::string &op,
1545 const std::string &a,
1546 const std::string &b,
1547 const std::string &c) {
1548 std::string s;
1549 std::string Op1 = GetHigh(b, typestr);
1550 std::string Op2 = GetHigh(c, typestr);
1551 s = MangleName(op, typestr, ClassS);
1552 s += "(" + a + ", " + Op1 + ", " + Op2 + ");";
1553 return s;
1554 }
1555
1556 // Gen combine operation by putting a on low 64-bit, and b on high 64-bit.
GenCombine(std::string typestr,const std::string & a,const std::string & b)1557 static std::string GenCombine(std::string typestr,
1558 const std::string &a,
1559 const std::string &b) {
1560 std::string s;
1561 s = MangleName("vcombine", typestr, ClassS);
1562 s += "(" + a + ", " + b + ")";
1563 return s;
1564 }
1565
Duplicate(unsigned nElts,StringRef typestr,const std::string & a)1566 static std::string Duplicate(unsigned nElts, StringRef typestr,
1567 const std::string &a) {
1568 std::string s;
1569
1570 s = "(" + TypeString('d', typestr) + "){ ";
1571 for (unsigned i = 0; i != nElts; ++i) {
1572 s += a;
1573 if ((i + 1) < nElts)
1574 s += ", ";
1575 }
1576 s += " }";
1577
1578 return s;
1579 }
1580
SplatLane(unsigned nElts,const std::string & vec,const std::string & lane)1581 static std::string SplatLane(unsigned nElts, const std::string &vec,
1582 const std::string &lane) {
1583 std::string s = "__builtin_shufflevector(" + vec + ", " + vec;
1584 for (unsigned i = 0; i < nElts; ++i)
1585 s += ", " + lane;
1586 s += ")";
1587 return s;
1588 }
1589
RemoveHigh(const std::string & name)1590 static std::string RemoveHigh(const std::string &name) {
1591 std::string s = name;
1592 std::size_t found = s.find("_high_");
1593 if (found == std::string::npos)
1594 PrintFatalError("name should contain \"_high_\" for high intrinsics");
1595 s.replace(found, 5, "");
1596 return s;
1597 }
1598
GetNumElements(StringRef typestr,bool & quad)1599 static unsigned GetNumElements(StringRef typestr, bool &quad) {
1600 quad = false;
1601 bool dummy = false;
1602 char type = ClassifyType(typestr, quad, dummy, dummy);
1603 unsigned nElts = 0;
1604 switch (type) {
1605 case 'c': nElts = 8; break;
1606 case 's': nElts = 4; break;
1607 case 'i': nElts = 2; break;
1608 case 'l': nElts = 1; break;
1609 case 'h': nElts = 4; break;
1610 case 'f': nElts = 2; break;
1611 case 'd':
1612 nElts = 1;
1613 break;
1614 default:
1615 PrintFatalError("unhandled type!");
1616 }
1617 if (quad) nElts <<= 1;
1618 return nElts;
1619 }
1620
1621 // Generate the definition for this intrinsic, e.g. "a + b" for OpAdd.
GenOpString(const std::string & name,OpKind op,const std::string & proto,StringRef typestr)1622 static std::string GenOpString(const std::string &name, OpKind op,
1623 const std::string &proto, StringRef typestr) {
1624 bool quad;
1625 unsigned nElts = GetNumElements(typestr, quad);
1626 bool define = UseMacro(proto);
1627
1628 std::string ts = TypeString(proto[0], typestr);
1629 std::string s;
1630 if (!define) {
1631 s = "return ";
1632 }
1633
1634 switch(op) {
1635 case OpAdd:
1636 s += "__a + __b;";
1637 break;
1638 case OpAddl:
1639 s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";";
1640 break;
1641 case OpAddlHi:
1642 s += Extend(typestr, "__a", 1) + " + " + Extend(typestr, "__b", 1) + ";";
1643 break;
1644 case OpAddw:
1645 s += "__a + " + Extend(typestr, "__b") + ";";
1646 break;
1647 case OpAddwHi:
1648 s += "__a + " + Extend(typestr, "__b", 1) + ";";
1649 break;
1650 case OpSub:
1651 s += "__a - __b;";
1652 break;
1653 case OpSubl:
1654 s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";";
1655 break;
1656 case OpSublHi:
1657 s += Extend(typestr, "__a", 1) + " - " + Extend(typestr, "__b", 1) + ";";
1658 break;
1659 case OpSubw:
1660 s += "__a - " + Extend(typestr, "__b") + ";";
1661 break;
1662 case OpSubwHi:
1663 s += "__a - " + Extend(typestr, "__b", 1) + ";";
1664 break;
1665 case OpMulN:
1666 s += "__a * " + Duplicate(nElts, typestr, "__b") + ";";
1667 break;
1668 case OpMulLane:
1669 s += "__a * " + SplatLane(nElts, "__b", "__c") + ";";
1670 break;
1671 case OpMulXLane:
1672 s += MangleName("vmulx", typestr, ClassS) + "(__a, " +
1673 SplatLane(nElts, "__b", "__c") + ");";
1674 break;
1675 case OpMul:
1676 s += "__a * __b;";
1677 break;
1678 case OpFMlaN:
1679 s += MangleName("vfma", typestr, ClassS);
1680 s += "(__a, __b, " + Duplicate(nElts,typestr, "__c") + ");";
1681 break;
1682 case OpFMlsN:
1683 s += MangleName("vfms", typestr, ClassS);
1684 s += "(__a, __b, " + Duplicate(nElts,typestr, "__c") + ");";
1685 break;
1686 case OpMullLane:
1687 s += MangleName("vmull", typestr, ClassS) + "(__a, " +
1688 SplatLane(nElts, "__b", "__c") + ");";
1689 break;
1690 case OpMullHiLane:
1691 s += MangleName("vmull", typestr, ClassS) + "(" +
1692 GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");";
1693 break;
1694 case OpMlaN:
1695 s += "__a + (__b * " + Duplicate(nElts, typestr, "__c") + ");";
1696 break;
1697 case OpMlaLane:
1698 s += "__a + (__b * " + SplatLane(nElts, "__c", "__d") + ");";
1699 break;
1700 case OpMla:
1701 s += "__a + (__b * __c);";
1702 break;
1703 case OpMlalN:
1704 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1705 Duplicate(nElts, typestr, "__c") + ");";
1706 break;
1707 case OpMlalLane:
1708 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1709 SplatLane(nElts, "__c", "__d") + ");";
1710 break;
1711 case OpMlalHiLane:
1712 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(" +
1713 GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
1714 break;
1715 case OpMlal:
1716 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
1717 break;
1718 case OpMullHi:
1719 s += Gen2OpWith2High(typestr, "vmull", "__a", "__b");
1720 break;
1721 case OpMullHiN:
1722 s += MangleName("vmull_n", typestr, ClassS);
1723 s += "(" + GetHigh("__a", typestr) + ", __b);";
1724 return s;
1725 case OpMlalHi:
1726 s += Gen3OpWith2High(typestr, "vmlal", "__a", "__b", "__c");
1727 break;
1728 case OpMlalHiN:
1729 s += MangleName("vmlal_n", typestr, ClassS);
1730 s += "(__a, " + GetHigh("__b", typestr) + ", __c);";
1731 return s;
1732 case OpMlsN:
1733 s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");";
1734 break;
1735 case OpMlsLane:
1736 s += "__a - (__b * " + SplatLane(nElts, "__c", "__d") + ");";
1737 break;
1738 case OpFMSLane:
1739 s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n ";
1740 s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n ";
1741 s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n ";
1742 s += MangleName("vfma_lane", typestr, ClassS) + "(__a1, __b1, -__c1, __d);";
1743 break;
1744 case OpFMSLaneQ:
1745 s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n ";
1746 s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n ";
1747 s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n ";
1748 s += MangleName("vfma_laneq", typestr, ClassS) + "(__a1, __b1, -__c1, __d);";
1749 break;
1750 case OpMls:
1751 s += "__a - (__b * __c);";
1752 break;
1753 case OpMlslN:
1754 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1755 Duplicate(nElts, typestr, "__c") + ");";
1756 break;
1757 case OpMlslLane:
1758 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1759 SplatLane(nElts, "__c", "__d") + ");";
1760 break;
1761 case OpMlslHiLane:
1762 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(" +
1763 GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
1764 break;
1765 case OpMlsl:
1766 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
1767 break;
1768 case OpMlslHi:
1769 s += Gen3OpWith2High(typestr, "vmlsl", "__a", "__b", "__c");
1770 break;
1771 case OpMlslHiN:
1772 s += MangleName("vmlsl_n", typestr, ClassS);
1773 s += "(__a, " + GetHigh("__b", typestr) + ", __c);";
1774 break;
1775 case OpQDMullLane:
1776 s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
1777 SplatLane(nElts, "__b", "__c") + ");";
1778 break;
1779 case OpQDMullHiLane:
1780 s += MangleName("vqdmull", typestr, ClassS) + "(" +
1781 GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");";
1782 break;
1783 case OpQDMlalLane:
1784 s += MangleName("vqdmlal", typestr, ClassS) + "(__a, __b, " +
1785 SplatLane(nElts, "__c", "__d") + ");";
1786 break;
1787 case OpQDMlalHiLane:
1788 s += MangleName("vqdmlal", typestr, ClassS) + "(__a, " +
1789 GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
1790 break;
1791 case OpQDMlslLane:
1792 s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, __b, " +
1793 SplatLane(nElts, "__c", "__d") + ");";
1794 break;
1795 case OpQDMlslHiLane:
1796 s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, " +
1797 GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
1798 break;
1799 case OpQDMulhLane:
1800 s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
1801 SplatLane(nElts, "__b", "__c") + ");";
1802 break;
1803 case OpQRDMulhLane:
1804 s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
1805 SplatLane(nElts, "__b", "__c") + ");";
1806 break;
1807 case OpEq:
1808 s += "(" + ts + ")(__a == __b);";
1809 break;
1810 case OpGe:
1811 s += "(" + ts + ")(__a >= __b);";
1812 break;
1813 case OpLe:
1814 s += "(" + ts + ")(__a <= __b);";
1815 break;
1816 case OpGt:
1817 s += "(" + ts + ")(__a > __b);";
1818 break;
1819 case OpLt:
1820 s += "(" + ts + ")(__a < __b);";
1821 break;
1822 case OpNeg:
1823 s += " -__a;";
1824 break;
1825 case OpNot:
1826 s += " ~__a;";
1827 break;
1828 case OpAnd:
1829 s += "__a & __b;";
1830 break;
1831 case OpOr:
1832 s += "__a | __b;";
1833 break;
1834 case OpXor:
1835 s += "__a ^ __b;";
1836 break;
1837 case OpAndNot:
1838 s += "__a & ~__b;";
1839 break;
1840 case OpOrNot:
1841 s += "__a | ~__b;";
1842 break;
1843 case OpCast:
1844 s += "(" + ts + ")__a;";
1845 break;
1846 case OpConcat:
1847 s += "(" + ts + ")__builtin_shufflevector((int64x1_t)__a";
1848 s += ", (int64x1_t)__b, 0, 1);";
1849 break;
1850 case OpHi:
1851 // nElts is for the result vector, so the source is twice that number.
1852 s += "__builtin_shufflevector(__a, __a";
1853 for (unsigned i = nElts; i < nElts * 2; ++i)
1854 s += ", " + utostr(i);
1855 s+= ");";
1856 break;
1857 case OpLo:
1858 s += "__builtin_shufflevector(__a, __a";
1859 for (unsigned i = 0; i < nElts; ++i)
1860 s += ", " + utostr(i);
1861 s+= ");";
1862 break;
1863 case OpDup:
1864 s += Duplicate(nElts, typestr, "__a") + ";";
1865 break;
1866 case OpDupLane:
1867 s += SplatLane(nElts, "__a", "__b") + ";";
1868 break;
1869 case OpSelect:
1870 // ((0 & 1) | (~0 & 2))
1871 s += "(" + ts + ")";
1872 ts = TypeString(proto[1], typestr);
1873 s += "((__a & (" + ts + ")__b) | ";
1874 s += "(~__a & (" + ts + ")__c));";
1875 break;
1876 case OpRev16:
1877 s += "__builtin_shufflevector(__a, __a";
1878 for (unsigned i = 2; i <= nElts; i += 2)
1879 for (unsigned j = 0; j != 2; ++j)
1880 s += ", " + utostr(i - j - 1);
1881 s += ");";
1882 break;
1883 case OpRev32: {
1884 unsigned WordElts = nElts >> (1 + (int)quad);
1885 s += "__builtin_shufflevector(__a, __a";
1886 for (unsigned i = WordElts; i <= nElts; i += WordElts)
1887 for (unsigned j = 0; j != WordElts; ++j)
1888 s += ", " + utostr(i - j - 1);
1889 s += ");";
1890 break;
1891 }
1892 case OpRev64: {
1893 unsigned DblWordElts = nElts >> (int)quad;
1894 s += "__builtin_shufflevector(__a, __a";
1895 for (unsigned i = DblWordElts; i <= nElts; i += DblWordElts)
1896 for (unsigned j = 0; j != DblWordElts; ++j)
1897 s += ", " + utostr(i - j - 1);
1898 s += ");";
1899 break;
1900 }
1901 case OpXtnHi: {
1902 s = TypeString(proto[1], typestr) + " __a1 = " +
1903 MangleName("vmovn", typestr, ClassS) + "(__b);\n " +
1904 "return __builtin_shufflevector(__a, __a1";
1905 for (unsigned i = 0; i < nElts * 4; ++i)
1906 s += ", " + utostr(i);
1907 s += ");";
1908 break;
1909 }
1910 case OpSqxtunHi: {
1911 s = TypeString(proto[1], typestr) + " __a1 = " +
1912 MangleName("vqmovun", typestr, ClassS) + "(__b);\n " +
1913 "return __builtin_shufflevector(__a, __a1";
1914 for (unsigned i = 0; i < nElts * 4; ++i)
1915 s += ", " + utostr(i);
1916 s += ");";
1917 break;
1918 }
1919 case OpQxtnHi: {
1920 s = TypeString(proto[1], typestr) + " __a1 = " +
1921 MangleName("vqmovn", typestr, ClassS) + "(__b);\n " +
1922 "return __builtin_shufflevector(__a, __a1";
1923 for (unsigned i = 0; i < nElts * 4; ++i)
1924 s += ", " + utostr(i);
1925 s += ");";
1926 break;
1927 }
1928 case OpFcvtnHi: {
1929 std::string FName = (nElts == 1) ? "vcvt_f32" : "vcvt_f16";
1930 s = TypeString(proto[1], typestr) + " __a1 = " +
1931 MangleName(FName, typestr, ClassS) + "(__b);\n " +
1932 "return __builtin_shufflevector(__a, __a1";
1933 for (unsigned i = 0; i < nElts * 4; ++i)
1934 s += ", " + utostr(i);
1935 s += ");";
1936 break;
1937 }
1938 case OpFcvtlHi: {
1939 std::string FName = (nElts == 2) ? "vcvt_f64" : "vcvt_f32";
1940 s = TypeString('d', typestr) + " __a1 = " + GetHigh("__a", typestr) +
1941 ";\n return " + MangleName(FName, typestr, ClassS) + "(__a1);";
1942 break;
1943 }
1944 case OpFcvtxnHi: {
1945 s = TypeString(proto[1], typestr) + " __a1 = " +
1946 MangleName("vcvtx_f32", typestr, ClassS) + "(__b);\n " +
1947 "return __builtin_shufflevector(__a, __a1";
1948 for (unsigned i = 0; i < nElts * 4; ++i)
1949 s += ", " + utostr(i);
1950 s += ");";
1951 break;
1952 }
1953 case OpUzp1:
1954 s += "__builtin_shufflevector(__a, __b";
1955 for (unsigned i = 0; i < nElts; i++)
1956 s += ", " + utostr(2*i);
1957 s += ");";
1958 break;
1959 case OpUzp2:
1960 s += "__builtin_shufflevector(__a, __b";
1961 for (unsigned i = 0; i < nElts; i++)
1962 s += ", " + utostr(2*i+1);
1963 s += ");";
1964 break;
1965 case OpZip1:
1966 s += "__builtin_shufflevector(__a, __b";
1967 for (unsigned i = 0; i < (nElts/2); i++)
1968 s += ", " + utostr(i) + ", " + utostr(i+nElts);
1969 s += ");";
1970 break;
1971 case OpZip2:
1972 s += "__builtin_shufflevector(__a, __b";
1973 for (unsigned i = nElts/2; i < nElts; i++)
1974 s += ", " + utostr(i) + ", " + utostr(i+nElts);
1975 s += ");";
1976 break;
1977 case OpTrn1:
1978 s += "__builtin_shufflevector(__a, __b";
1979 for (unsigned i = 0; i < (nElts/2); i++)
1980 s += ", " + utostr(2*i) + ", " + utostr(2*i+nElts);
1981 s += ");";
1982 break;
1983 case OpTrn2:
1984 s += "__builtin_shufflevector(__a, __b";
1985 for (unsigned i = 0; i < (nElts/2); i++)
1986 s += ", " + utostr(2*i+1) + ", " + utostr(2*i+1+nElts);
1987 s += ");";
1988 break;
1989 case OpAbdl: {
1990 std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)";
1991 if (typestr[0] != 'U') {
1992 // vabd results are always unsigned and must be zero-extended.
1993 std::string utype = "U" + typestr.str();
1994 s += "(" + TypeString(proto[0], typestr) + ")";
1995 abd = "(" + TypeString('d', utype) + ")" + abd;
1996 s += Extend(utype, abd) + ";";
1997 } else {
1998 s += Extend(typestr, abd) + ";";
1999 }
2000 break;
2001 }
2002 case OpAbdlHi:
2003 s += Gen2OpWith2High(typestr, "vabdl", "__a", "__b");
2004 break;
2005 case OpAddhnHi: {
2006 std::string addhn = MangleName("vaddhn", typestr, ClassS) + "(__b, __c)";
2007 s += GenCombine(GetNarrowTypestr(typestr), "__a", addhn);
2008 s += ";";
2009 break;
2010 }
2011 case OpRAddhnHi: {
2012 std::string raddhn = MangleName("vraddhn", typestr, ClassS) + "(__b, __c)";
2013 s += GenCombine(GetNarrowTypestr(typestr), "__a", raddhn);
2014 s += ";";
2015 break;
2016 }
2017 case OpSubhnHi: {
2018 std::string subhn = MangleName("vsubhn", typestr, ClassS) + "(__b, __c)";
2019 s += GenCombine(GetNarrowTypestr(typestr), "__a", subhn);
2020 s += ";";
2021 break;
2022 }
2023 case OpRSubhnHi: {
2024 std::string rsubhn = MangleName("vrsubhn", typestr, ClassS) + "(__b, __c)";
2025 s += GenCombine(GetNarrowTypestr(typestr), "__a", rsubhn);
2026 s += ";";
2027 break;
2028 }
2029 case OpAba:
2030 s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);";
2031 break;
2032 case OpAbal:
2033 s += "__a + " + MangleName("vabdl", typestr, ClassS) + "(__b, __c);";
2034 break;
2035 case OpAbalHi:
2036 s += Gen3OpWith2High(typestr, "vabal", "__a", "__b", "__c");
2037 break;
2038 case OpQDMullHi:
2039 s += Gen2OpWith2High(typestr, "vqdmull", "__a", "__b");
2040 break;
2041 case OpQDMullHiN:
2042 s += MangleName("vqdmull_n", typestr, ClassS);
2043 s += "(" + GetHigh("__a", typestr) + ", __b);";
2044 return s;
2045 case OpQDMlalHi:
2046 s += Gen3OpWith2High(typestr, "vqdmlal", "__a", "__b", "__c");
2047 break;
2048 case OpQDMlalHiN:
2049 s += MangleName("vqdmlal_n", typestr, ClassS);
2050 s += "(__a, " + GetHigh("__b", typestr) + ", __c);";
2051 return s;
2052 case OpQDMlslHi:
2053 s += Gen3OpWith2High(typestr, "vqdmlsl", "__a", "__b", "__c");
2054 break;
2055 case OpQDMlslHiN:
2056 s += MangleName("vqdmlsl_n", typestr, ClassS);
2057 s += "(__a, " + GetHigh("__b", typestr) + ", __c);";
2058 return s;
2059 case OpDiv:
2060 s += "__a / __b;";
2061 break;
2062 case OpMovlHi: {
2063 s = TypeString(proto[1], typestr.drop_front()) + " __a1 = " +
2064 MangleName("vget_high", typestr, ClassS) + "(__a);\n " + s;
2065 s += "(" + ts + ")" + MangleName("vshll_n", typestr, ClassS);
2066 s += "(__a1, 0);";
2067 break;
2068 }
2069 case OpLongHi: {
2070 // Another local variable __a1 is needed for calling a Macro,
2071 // or using __a will have naming conflict when Macro expanding.
2072 s += TypeString(proto[1], typestr.drop_front()) + " __a1 = " +
2073 MangleName("vget_high", typestr, ClassS) + "(__a); \\\n";
2074 s += " (" + ts + ")" + MangleName(RemoveHigh(name), typestr, ClassS) +
2075 "(__a1, __b);";
2076 break;
2077 }
2078 case OpNarrowHi: {
2079 s += "(" + ts + ")" + MangleName("vcombine", typestr, ClassS) + "(__a, " +
2080 MangleName(RemoveHigh(name), typestr, ClassS) + "(__b, __c));";
2081 break;
2082 }
2083 case OpCopyLane: {
2084 s += TypeString('s', typestr) + " __c2 = " +
2085 MangleName("vget_lane", typestr, ClassS) + "(__c1, __d1); \\\n " +
2086 MangleName("vset_lane", typestr, ClassS) + "(__c2, __a1, __b1);";
2087 break;
2088 }
2089 case OpCopyQLane: {
2090 std::string typeCode = "";
2091 InstructionTypeCode(typestr, ClassS, quad, typeCode);
2092 s += TypeString('s', typestr) + " __c2 = vget_lane_" + typeCode +
2093 "(__c1, __d1); \\\n vsetq_lane_" + typeCode + "(__c2, __a1, __b1);";
2094 break;
2095 }
2096 case OpCopyLaneQ: {
2097 std::string typeCode = "";
2098 InstructionTypeCode(typestr, ClassS, quad, typeCode);
2099 s += TypeString('s', typestr) + " __c2 = vgetq_lane_" + typeCode +
2100 "(__c1, __d1); \\\n vset_lane_" + typeCode + "(__c2, __a1, __b1);";
2101 break;
2102 }
2103 case OpScalarMulLane: {
2104 std::string typeCode = "";
2105 InstructionTypeCode(typestr, ClassS, quad, typeCode);
2106 s += TypeString('s', typestr) + " __d1 = vget_lane_" + typeCode +
2107 "(__b, __c);\\\n __a * __d1;";
2108 break;
2109 }
2110 case OpScalarMulLaneQ: {
2111 std::string typeCode = "";
2112 InstructionTypeCode(typestr, ClassS, quad, typeCode);
2113 s += TypeString('s', typestr) + " __d1 = vgetq_lane_" + typeCode +
2114 "(__b, __c);\\\n __a * __d1;";
2115 break;
2116 }
2117 case OpScalarMulXLane: {
2118 bool dummy = false;
2119 char type = ClassifyType(typestr, dummy, dummy, dummy);
2120 if (type == 'f') type = 's';
2121 std::string typeCode = "";
2122 InstructionTypeCode(typestr, ClassS, quad, typeCode);
2123 s += TypeString('s', typestr) + " __d1 = vget_lane_" + typeCode +
2124 "(__b, __c);\\\n vmulx" + type + "_" +
2125 typeCode + "(__a, __d1);";
2126 break;
2127 }
2128 case OpScalarMulXLaneQ: {
2129 bool dummy = false;
2130 char type = ClassifyType(typestr, dummy, dummy, dummy);
2131 if (type == 'f') type = 's';
2132 std::string typeCode = "";
2133 InstructionTypeCode(typestr, ClassS, quad, typeCode);
2134 s += TypeString('s', typestr) + " __d1 = vgetq_lane_" +
2135 typeCode + "(__b, __c);\\\n vmulx" + type +
2136 "_" + typeCode + "(__a, __d1);";
2137 break;
2138 }
2139
2140 case OpScalarVMulXLane: {
2141 bool dummy = false;
2142 char type = ClassifyType(typestr, dummy, dummy, dummy);
2143 if (type == 'f') type = 's';
2144 std::string typeCode = "";
2145 InstructionTypeCode(typestr, ClassS, quad, typeCode);
2146 s += TypeString('s', typestr) + " __d1 = vget_lane_" +
2147 typeCode + "(__a, 0);\\\n" +
2148 " " + TypeString('s', typestr) + " __e1 = vget_lane_" +
2149 typeCode + "(__b, __c);\\\n" +
2150 " " + TypeString('s', typestr) + " __f1 = vmulx" + type + "_" +
2151 typeCode + "(__d1, __e1);\\\n" +
2152 " " + TypeString('d', typestr) + " __g1;\\\n" +
2153 " vset_lane_" + typeCode + "(__f1, __g1, __c);";
2154 break;
2155 }
2156
2157 case OpScalarVMulXLaneQ: {
2158 bool dummy = false;
2159 char type = ClassifyType(typestr, dummy, dummy, dummy);
2160 if (type == 'f') type = 's';
2161 std::string typeCode = "";
2162 InstructionTypeCode(typestr, ClassS, quad, typeCode);
2163 s += TypeString('s', typestr) + " __d1 = vget_lane_" +
2164 typeCode + "(__a, 0);\\\n" +
2165 " " + TypeString('s', typestr) + " __e1 = vgetq_lane_" +
2166 typeCode + "(__b, __c);\\\n" +
2167 " " + TypeString('s', typestr) + " __f1 = vmulx" + type + "_" +
2168 typeCode + "(__d1, __e1);\\\n" +
2169 " " + TypeString('d', typestr) + " __g1;\\\n" +
2170 " vset_lane_" + typeCode + "(__f1, __g1, 0);";
2171 break;
2172 }
2173 case OpScalarQDMullLane: {
2174 std::string typeCode = "";
2175 InstructionTypeCode(typestr, ClassS, quad, typeCode);
2176 s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
2177 "vget_lane_" + typeCode + "(b, __c));";
2178 break;
2179 }
2180 case OpScalarQDMullLaneQ: {
2181 std::string typeCode = "";
2182 InstructionTypeCode(typestr, ClassS, quad, typeCode);
2183 s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
2184 "vgetq_lane_" + typeCode + "(b, __c));";
2185 break;
2186 }
2187 case OpScalarQDMulHiLane: {
2188 std::string typeCode = "";
2189 InstructionTypeCode(typestr, ClassS, quad, typeCode);
2190 s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
2191 "vget_lane_" + typeCode + "(__b, __c));";
2192 break;
2193 }
2194 case OpScalarQDMulHiLaneQ: {
2195 std::string typeCode = "";
2196 InstructionTypeCode(typestr, ClassS, quad, typeCode);
2197 s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
2198 "vgetq_lane_" + typeCode + "(__b, __c));";
2199 break;
2200 }
2201 case OpScalarQRDMulHiLane: {
2202 std::string typeCode = "";
2203 InstructionTypeCode(typestr, ClassS, quad, typeCode);
2204 s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
2205 "vget_lane_" + typeCode + "(__b, __c));";
2206 break;
2207 }
2208 case OpScalarQRDMulHiLaneQ: {
2209 std::string typeCode = "";
2210 InstructionTypeCode(typestr, ClassS, quad, typeCode);
2211 s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
2212 "vgetq_lane_" + typeCode + "(__b, __c));";
2213 break;
2214 }
2215 case OpScalarGetLane:{
2216 std::string typeCode = "";
2217 InstructionTypeCode(typestr, ClassS, quad, typeCode);
2218 if (quad) {
2219 s += "int16x8_t __a1 = vreinterpretq_s16_f16(__a);\\\n";
2220 s += " vgetq_lane_s16(__a1, __b);";
2221 } else {
2222 s += "int16x4_t __a1 = vreinterpret_s16_f16(__a);\\\n";
2223 s += " vget_lane_s16(__a1, __b);";
2224 }
2225 break;
2226 }
2227 case OpScalarSetLane:{
2228 std::string typeCode = "";
2229 InstructionTypeCode(typestr, ClassS, quad, typeCode);
2230 s += "int16_t __a1 = (int16_t)__a;\\\n";
2231 if (quad) {
2232 s += " int16x8_t __b1 = vreinterpretq_s16_f16(b);\\\n";
2233 s += " int16x8_t __b2 = vsetq_lane_s16(__a1, __b1, __c);\\\n";
2234 s += " vreinterpretq_f16_s16(__b2);";
2235 } else {
2236 s += " int16x4_t __b1 = vreinterpret_s16_f16(b);\\\n";
2237 s += " int16x4_t __b2 = vset_lane_s16(__a1, __b1, __c);\\\n";
2238 s += " vreinterpret_f16_s16(__b2);";
2239 }
2240 break;
2241 }
2242
2243 default:
2244 PrintFatalError("unknown OpKind!");
2245 }
2246 return s;
2247 }
2248
GetNeonEnum(const std::string & proto,StringRef typestr)2249 static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) {
2250 unsigned mod = proto[0];
2251
2252 if (mod == 'v' || mod == 'f' || mod == 'F')
2253 mod = proto[1];
2254
2255 bool quad = false;
2256 bool poly = false;
2257 bool usgn = false;
2258 bool scal = false;
2259 bool cnst = false;
2260 bool pntr = false;
2261
2262 // Base type to get the type string for.
2263 char type = ClassifyType(typestr, quad, poly, usgn);
2264
2265 // Based on the modifying character, change the type and width if necessary.
2266 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
2267
2268 NeonTypeFlags::EltType ET;
2269 switch (type) {
2270 case 'c':
2271 ET = poly ? NeonTypeFlags::Poly8 : NeonTypeFlags::Int8;
2272 break;
2273 case 's':
2274 ET = poly ? NeonTypeFlags::Poly16 : NeonTypeFlags::Int16;
2275 break;
2276 case 'i':
2277 ET = NeonTypeFlags::Int32;
2278 break;
2279 case 'l':
2280 ET = poly ? NeonTypeFlags::Poly64 : NeonTypeFlags::Int64;
2281 break;
2282 case 'h':
2283 ET = NeonTypeFlags::Float16;
2284 break;
2285 case 'f':
2286 ET = NeonTypeFlags::Float32;
2287 break;
2288 case 'd':
2289 ET = NeonTypeFlags::Float64;
2290 break;
2291 default:
2292 PrintFatalError("unhandled type!");
2293 }
2294 NeonTypeFlags Flags(ET, usgn, quad && proto[1] != 'g');
2295 return Flags.getFlags();
2296 }
2297
2298 // We don't check 'a' in this function, because for builtin function the
2299 // argument matching to 'a' uses a vector type splatted from a scalar type.
ProtoHasScalar(const std::string proto)2300 static bool ProtoHasScalar(const std::string proto)
2301 {
2302 return (proto.find('s') != std::string::npos
2303 || proto.find('z') != std::string::npos
2304 || proto.find('r') != std::string::npos
2305 || proto.find('b') != std::string::npos
2306 || proto.find('$') != std::string::npos
2307 || proto.find('y') != std::string::npos
2308 || proto.find('o') != std::string::npos);
2309 }
2310
2311 // Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a)
GenBuiltin(const std::string & name,const std::string & proto,StringRef typestr,ClassKind ck)2312 static std::string GenBuiltin(const std::string &name, const std::string &proto,
2313 StringRef typestr, ClassKind ck) {
2314 std::string s;
2315
2316 // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
2317 // sret-like argument.
2318 bool sret = IsMultiVecProto(proto[0]);
2319
2320 bool define = UseMacro(proto);
2321
2322 // Check if the prototype has a scalar operand with the type of the vector
2323 // elements. If not, bitcasting the args will take care of arg checking.
2324 // The actual signedness etc. will be taken care of with special enums.
2325 if (!ProtoHasScalar(proto))
2326 ck = ClassB;
2327
2328 if (proto[0] != 'v') {
2329 std::string ts = TypeString(proto[0], typestr);
2330
2331 if (define) {
2332 if (sret)
2333 s += ts + " r; ";
2334 else
2335 s += "(" + ts + ")";
2336 } else if (sret) {
2337 s += ts + " r; ";
2338 } else {
2339 s += "return (" + ts + ")";
2340 }
2341 }
2342
2343 bool splat = proto.find('a') != std::string::npos;
2344
2345 s += "__builtin_neon_";
2346 if (splat) {
2347 // Call the non-splat builtin: chop off the "_n" suffix from the name.
2348 std::string vname(name, 0, name.size()-2);
2349 s += MangleName(vname, typestr, ck);
2350 } else {
2351 s += MangleName(name, typestr, ck);
2352 }
2353 s += "(";
2354
2355 // Pass the address of the return variable as the first argument to sret-like
2356 // builtins.
2357 if (sret)
2358 s += "&r, ";
2359
2360 char arg = 'a';
2361 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
2362 std::string args = std::string(&arg, 1);
2363
2364 // Use the local temporaries instead of the macro arguments.
2365 args = "__" + args;
2366
2367 bool argQuad = false;
2368 bool argPoly = false;
2369 bool argUsgn = false;
2370 bool argScalar = false;
2371 bool dummy = false;
2372 char argType = ClassifyType(typestr, argQuad, argPoly, argUsgn);
2373 argType = ModType(proto[i], argType, argQuad, argPoly, argUsgn, argScalar,
2374 dummy, dummy);
2375
2376 // Handle multiple-vector values specially, emitting each subvector as an
2377 // argument to the __builtin.
2378 unsigned NumOfVec = 0;
2379 if (proto[i] >= '2' && proto[i] <= '4') {
2380 NumOfVec = proto[i] - '0';
2381 } else if (proto[i] >= 'B' && proto[i] <= 'D') {
2382 NumOfVec = proto[i] - 'A' + 1;
2383 }
2384
2385 if (NumOfVec > 0) {
2386 // Check if an explicit cast is needed.
2387 if (argType != 'c' || argPoly || argUsgn)
2388 args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args;
2389
2390 for (unsigned vi = 0, ve = NumOfVec; vi != ve; ++vi) {
2391 s += args + ".val[" + utostr(vi) + "]";
2392 if ((vi + 1) < ve)
2393 s += ", ";
2394 }
2395 if ((i + 1) < e)
2396 s += ", ";
2397
2398 continue;
2399 }
2400
2401 if (splat && (i + 1) == e)
2402 args = Duplicate(GetNumElements(typestr, argQuad), typestr, args);
2403
2404 // Check if an explicit cast is needed.
2405 if ((splat || !argScalar) &&
2406 ((ck == ClassB && argType != 'c') || argPoly || argUsgn)) {
2407 std::string argTypeStr = "c";
2408 if (ck != ClassB)
2409 argTypeStr = argType;
2410 if (argQuad)
2411 argTypeStr = "Q" + argTypeStr;
2412 args = "(" + TypeString('d', argTypeStr) + ")" + args;
2413 }
2414
2415 s += args;
2416 if ((i + 1) < e)
2417 s += ", ";
2418 }
2419
2420 // Extra constant integer to hold type class enum for this function, e.g. s8
2421 if (ck == ClassB)
2422 s += ", " + utostr(GetNeonEnum(proto, typestr));
2423
2424 s += ");";
2425
2426 if (proto[0] != 'v' && sret) {
2427 if (define)
2428 s += " r;";
2429 else
2430 s += " return r;";
2431 }
2432 return s;
2433 }
2434
GenBuiltinDef(const std::string & name,const std::string & proto,StringRef typestr,ClassKind ck)2435 static std::string GenBuiltinDef(const std::string &name,
2436 const std::string &proto,
2437 StringRef typestr, ClassKind ck) {
2438 std::string s("BUILTIN(__builtin_neon_");
2439
2440 // If all types are the same size, bitcasting the args will take care
2441 // of arg checking. The actual signedness etc. will be taken care of with
2442 // special enums.
2443 if (!ProtoHasScalar(proto))
2444 ck = ClassB;
2445
2446 s += MangleName(name, typestr, ck);
2447 s += ", \"";
2448
2449 for (unsigned i = 0, e = proto.size(); i != e; ++i)
2450 s += BuiltinTypeString(proto[i], typestr, ck, i == 0);
2451
2452 // Extra constant integer to hold type class enum for this function, e.g. s8
2453 if (ck == ClassB)
2454 s += "i";
2455
2456 s += "\", \"n\")";
2457 return s;
2458 }
2459
GenIntrinsic(const std::string & name,const std::string & proto,StringRef outTypeStr,StringRef inTypeStr,OpKind kind,ClassKind classKind)2460 static std::string GenIntrinsic(const std::string &name,
2461 const std::string &proto,
2462 StringRef outTypeStr, StringRef inTypeStr,
2463 OpKind kind, ClassKind classKind) {
2464 assert(!proto.empty() && "");
2465 bool define = UseMacro(proto) && kind != OpUnavailable;
2466 std::string s;
2467
2468 // static always inline + return type
2469 if (define)
2470 s += "#define ";
2471 else
2472 s += "__ai " + TypeString(proto[0], outTypeStr) + " ";
2473
2474 // Function name with type suffix
2475 std::string mangledName = MangleName(name, outTypeStr, ClassS);
2476 if (outTypeStr != inTypeStr) {
2477 // If the input type is different (e.g., for vreinterpret), append a suffix
2478 // for the input type. String off a "Q" (quad) prefix so that MangleName
2479 // does not insert another "q" in the name.
2480 unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
2481 StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
2482 mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
2483 }
2484 s += mangledName;
2485
2486 // Function arguments
2487 s += GenArgs(proto, inTypeStr, name);
2488
2489 // Definition.
2490 if (define) {
2491 s += " __extension__ ({ \\\n ";
2492 s += GenMacroLocals(proto, inTypeStr, name);
2493 } else if (kind == OpUnavailable) {
2494 s += " __attribute__((unavailable));\n";
2495 return s;
2496 } else
2497 s += " {\n ";
2498
2499 if (kind != OpNone)
2500 s += GenOpString(name, kind, proto, outTypeStr);
2501 else
2502 s += GenBuiltin(name, proto, outTypeStr, classKind);
2503 if (define)
2504 s += " })";
2505 else
2506 s += " }";
2507 s += "\n";
2508 return s;
2509 }
2510
2511 /// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h
2512 /// is comprised of type definitions and function declarations.
run(raw_ostream & OS)2513 void NeonEmitter::run(raw_ostream &OS) {
2514 OS <<
2515 "/*===---- arm_neon.h - ARM Neon intrinsics ------------------------------"
2516 "---===\n"
2517 " *\n"
2518 " * Permission is hereby granted, free of charge, to any person obtaining "
2519 "a copy\n"
2520 " * of this software and associated documentation files (the \"Software\"),"
2521 " to deal\n"
2522 " * in the Software without restriction, including without limitation the "
2523 "rights\n"
2524 " * to use, copy, modify, merge, publish, distribute, sublicense, "
2525 "and/or sell\n"
2526 " * copies of the Software, and to permit persons to whom the Software is\n"
2527 " * furnished to do so, subject to the following conditions:\n"
2528 " *\n"
2529 " * The above copyright notice and this permission notice shall be "
2530 "included in\n"
2531 " * all copies or substantial portions of the Software.\n"
2532 " *\n"
2533 " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
2534 "EXPRESS OR\n"
2535 " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
2536 "MERCHANTABILITY,\n"
2537 " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
2538 "SHALL THE\n"
2539 " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
2540 "OTHER\n"
2541 " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
2542 "ARISING FROM,\n"
2543 " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
2544 "DEALINGS IN\n"
2545 " * THE SOFTWARE.\n"
2546 " *\n"
2547 " *===--------------------------------------------------------------------"
2548 "---===\n"
2549 " */\n\n";
2550
2551 OS << "#ifndef __ARM_NEON_H\n";
2552 OS << "#define __ARM_NEON_H\n\n";
2553
2554 OS << "#if !defined(__ARM_NEON__) && !defined(__ARM_NEON)\n";
2555 OS << "#error \"NEON support not enabled\"\n";
2556 OS << "#endif\n\n";
2557
2558 OS << "#include <stdint.h>\n\n";
2559
2560 // Emit NEON-specific scalar typedefs.
2561 OS << "typedef float float32_t;\n";
2562 OS << "typedef __fp16 float16_t;\n";
2563
2564 OS << "#ifdef __aarch64__\n";
2565 OS << "typedef double float64_t;\n";
2566 OS << "#endif\n\n";
2567
2568 // For now, signedness of polynomial types depends on target
2569 OS << "#ifdef __aarch64__\n";
2570 OS << "typedef uint8_t poly8_t;\n";
2571 OS << "typedef uint16_t poly16_t;\n";
2572 OS << "typedef uint64_t poly64_t;\n";
2573 OS << "#else\n";
2574 OS << "typedef int8_t poly8_t;\n";
2575 OS << "typedef int16_t poly16_t;\n";
2576 OS << "#endif\n";
2577
2578 // Emit Neon vector typedefs.
2579 std::string TypedefTypes(
2580 "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl");
2581 SmallVector<StringRef, 24> TDTypeVec;
2582 ParseTypes(0, TypedefTypes, TDTypeVec);
2583
2584 // Emit vector typedefs.
2585 bool isA64 = false;
2586 bool preinsert;
2587 bool postinsert;
2588 for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
2589 bool dummy, quad = false, poly = false;
2590 char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
2591 preinsert = false;
2592 postinsert = false;
2593
2594 if (type == 'd' || (type == 'l' && poly)) {
2595 preinsert = isA64? false: true;
2596 isA64 = true;
2597 } else {
2598 postinsert = isA64? true: false;
2599 isA64 = false;
2600 }
2601 if (postinsert)
2602 OS << "#endif\n";
2603 if (preinsert)
2604 OS << "#ifdef __aarch64__\n";
2605
2606 if (poly)
2607 OS << "typedef __attribute__((neon_polyvector_type(";
2608 else
2609 OS << "typedef __attribute__((neon_vector_type(";
2610
2611 unsigned nElts = GetNumElements(TDTypeVec[i], quad);
2612 OS << utostr(nElts) << "))) ";
2613 if (nElts < 10)
2614 OS << " ";
2615
2616 OS << TypeString('s', TDTypeVec[i]);
2617 OS << " " << TypeString('d', TDTypeVec[i]) << ";\n";
2618
2619 }
2620 postinsert = isA64? true: false;
2621 if (postinsert)
2622 OS << "#endif\n";
2623 OS << "\n";
2624
2625 // Emit struct typedefs.
2626 isA64 = false;
2627 for (unsigned vi = 2; vi != 5; ++vi) {
2628 for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
2629 bool dummy, quad = false, poly = false;
2630 char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
2631 preinsert = false;
2632 postinsert = false;
2633
2634 if (type == 'd' || (type == 'l' && poly)) {
2635 preinsert = isA64? false: true;
2636 isA64 = true;
2637 } else {
2638 postinsert = isA64? true: false;
2639 isA64 = false;
2640 }
2641 if (postinsert)
2642 OS << "#endif\n";
2643 if (preinsert)
2644 OS << "#ifdef __aarch64__\n";
2645
2646 std::string ts = TypeString('d', TDTypeVec[i]);
2647 std::string vs = TypeString('0' + vi, TDTypeVec[i]);
2648 OS << "typedef struct " << vs << " {\n";
2649 OS << " " << ts << " val";
2650 OS << "[" << utostr(vi) << "]";
2651 OS << ";\n} ";
2652 OS << vs << ";\n";
2653 OS << "\n";
2654 }
2655 }
2656 postinsert = isA64? true: false;
2657 if (postinsert)
2658 OS << "#endif\n";
2659 OS << "\n";
2660
2661 OS<<"#define __ai static inline __attribute__((__always_inline__, __nodebug__))\n\n";
2662
2663 std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
2664
2665 StringMap<ClassKind> EmittedMap;
2666
2667 // Emit vmovl, vmull and vabd intrinsics first so they can be used by other
2668 // intrinsics. (Some of the saturating multiply instructions are also
2669 // used to implement the corresponding "_lane" variants, but tablegen
2670 // sorts the records into alphabetical order so that the "_lane" variants
2671 // come after the intrinsics they use.)
2672 emitIntrinsic(OS, Records.getDef("VMOVL"), EmittedMap);
2673 emitIntrinsic(OS, Records.getDef("VMULL"), EmittedMap);
2674 emitIntrinsic(OS, Records.getDef("VABD"), EmittedMap);
2675 emitIntrinsic(OS, Records.getDef("VABDL"), EmittedMap);
2676
2677 // ARM intrinsics must be emitted before AArch64 intrinsics to ensure
2678 // common intrinsics appear only once in the output stream.
2679 // The check for uniquiness is done in emitIntrinsic.
2680 // Emit ARM intrinsics.
2681 for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2682 Record *R = RV[i];
2683
2684 // Skip AArch64 intrinsics; they will be emitted at the end.
2685 bool isA64 = R->getValueAsBit("isA64");
2686 if (isA64)
2687 continue;
2688
2689 if (R->getName() != "VMOVL" && R->getName() != "VMULL" &&
2690 R->getName() != "VABD")
2691 emitIntrinsic(OS, R, EmittedMap);
2692 }
2693
2694 // Emit AArch64-specific intrinsics.
2695 OS << "#ifdef __aarch64__\n";
2696
2697 emitIntrinsic(OS, Records.getDef("VMOVL_HIGH"), EmittedMap);
2698 emitIntrinsic(OS, Records.getDef("VMULL_HIGH"), EmittedMap);
2699 emitIntrinsic(OS, Records.getDef("VABDL_HIGH"), EmittedMap);
2700
2701 for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2702 Record *R = RV[i];
2703
2704 // Skip ARM intrinsics already included above.
2705 bool isA64 = R->getValueAsBit("isA64");
2706 if (!isA64)
2707 continue;
2708
2709 // Skip crypto temporarily, and will emit them all together at the end.
2710 bool isCrypto = R->getValueAsBit("isCrypto");
2711 if (isCrypto)
2712 continue;
2713
2714 emitIntrinsic(OS, R, EmittedMap);
2715 }
2716
2717 OS << "#ifdef __ARM_FEATURE_CRYPTO\n";
2718
2719 for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2720 Record *R = RV[i];
2721
2722 // Skip crypto temporarily, and will emit them all together at the end.
2723 bool isCrypto = R->getValueAsBit("isCrypto");
2724 if (!isCrypto)
2725 continue;
2726
2727 emitIntrinsic(OS, R, EmittedMap);
2728 }
2729
2730 OS << "#endif\n\n";
2731
2732 OS << "#endif\n\n";
2733
2734 OS << "#undef __ai\n\n";
2735 OS << "#endif /* __ARM_NEON_H */\n";
2736 }
2737
2738 /// emitIntrinsic - Write out the arm_neon.h header file definitions for the
2739 /// intrinsics specified by record R checking for intrinsic uniqueness.
emitIntrinsic(raw_ostream & OS,Record * R,StringMap<ClassKind> & EmittedMap)2740 void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R,
2741 StringMap<ClassKind> &EmittedMap) {
2742 std::string name = R->getValueAsString("Name");
2743 std::string Proto = R->getValueAsString("Prototype");
2744 std::string Types = R->getValueAsString("Types");
2745
2746 SmallVector<StringRef, 16> TypeVec;
2747 ParseTypes(R, Types, TypeVec);
2748
2749 OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
2750
2751 ClassKind classKind = ClassNone;
2752 if (R->getSuperClasses().size() >= 2)
2753 classKind = ClassMap[R->getSuperClasses()[1]];
2754 if (classKind == ClassNone && kind == OpNone)
2755 PrintFatalError(R->getLoc(), "Builtin has no class kind");
2756
2757 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2758 if (kind == OpReinterpret) {
2759 bool outQuad = false;
2760 bool dummy = false;
2761 (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
2762 for (unsigned srcti = 0, srcte = TypeVec.size();
2763 srcti != srcte; ++srcti) {
2764 bool inQuad = false;
2765 (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
2766 if (srcti == ti || inQuad != outQuad)
2767 continue;
2768 std::string s = GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti],
2769 OpCast, ClassS);
2770 if (EmittedMap.count(s))
2771 continue;
2772 EmittedMap[s] = ClassS;
2773 OS << s;
2774 }
2775 } else {
2776 std::string s =
2777 GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti], kind, classKind);
2778 if (EmittedMap.count(s))
2779 continue;
2780 EmittedMap[s] = classKind;
2781 OS << s;
2782 }
2783 }
2784 OS << "\n";
2785 }
2786
RangeFromType(const char mod,StringRef typestr)2787 static unsigned RangeFromType(const char mod, StringRef typestr) {
2788 // base type to get the type string for.
2789 bool quad = false, dummy = false;
2790 char type = ClassifyType(typestr, quad, dummy, dummy);
2791 type = ModType(mod, type, quad, dummy, dummy, dummy, dummy, dummy);
2792
2793 switch (type) {
2794 case 'c':
2795 return (8 << (int)quad) - 1;
2796 case 'h':
2797 case 's':
2798 return (4 << (int)quad) - 1;
2799 case 'f':
2800 case 'i':
2801 return (2 << (int)quad) - 1;
2802 case 'd':
2803 case 'l':
2804 return (1 << (int)quad) - 1;
2805 default:
2806 PrintFatalError("unhandled type!");
2807 }
2808 }
2809
RangeScalarShiftImm(const char mod,StringRef typestr)2810 static unsigned RangeScalarShiftImm(const char mod, StringRef typestr) {
2811 // base type to get the type string for.
2812 bool dummy = false;
2813 char type = ClassifyType(typestr, dummy, dummy, dummy);
2814 type = ModType(mod, type, dummy, dummy, dummy, dummy, dummy, dummy);
2815
2816 switch (type) {
2817 case 'c':
2818 return 7;
2819 case 'h':
2820 case 's':
2821 return 15;
2822 case 'f':
2823 case 'i':
2824 return 31;
2825 case 'd':
2826 case 'l':
2827 return 63;
2828 default:
2829 PrintFatalError("unhandled type!");
2830 }
2831 }
2832
2833 /// Generate the ARM and AArch64 intrinsic range checking code for
2834 /// shift/lane immediates, checking for unique declarations.
2835 void
genIntrinsicRangeCheckCode(raw_ostream & OS,StringMap<ClassKind> & A64IntrinsicMap,bool isA64RangeCheck)2836 NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS,
2837 StringMap<ClassKind> &A64IntrinsicMap,
2838 bool isA64RangeCheck) {
2839 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2840 StringMap<OpKind> EmittedMap;
2841
2842 // Generate the intrinsic range checking code for shift/lane immediates.
2843 if (isA64RangeCheck)
2844 OS << "#ifdef GET_NEON_AARCH64_IMMEDIATE_CHECK\n";
2845 else
2846 OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
2847
2848 for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2849 Record *R = RV[i];
2850
2851 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
2852 if (k != OpNone)
2853 continue;
2854
2855 std::string name = R->getValueAsString("Name");
2856 std::string Proto = R->getValueAsString("Prototype");
2857 std::string Types = R->getValueAsString("Types");
2858 std::string Rename = name + "@" + Proto;
2859
2860 // Functions with 'a' (the splat code) in the type prototype should not get
2861 // their own builtin as they use the non-splat variant.
2862 if (Proto.find('a') != std::string::npos)
2863 continue;
2864
2865 // Functions which do not have an immediate do not need to have range
2866 // checking code emitted.
2867 size_t immPos = Proto.find('i');
2868 if (immPos == std::string::npos)
2869 continue;
2870
2871 SmallVector<StringRef, 16> TypeVec;
2872 ParseTypes(R, Types, TypeVec);
2873
2874 if (R->getSuperClasses().size() < 2)
2875 PrintFatalError(R->getLoc(), "Builtin has no class kind");
2876
2877 ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2878 if (!ProtoHasScalar(Proto))
2879 ck = ClassB;
2880
2881 // Do not include AArch64 range checks if not generating code for AArch64.
2882 bool isA64 = R->getValueAsBit("isA64");
2883 if (!isA64RangeCheck && isA64)
2884 continue;
2885
2886 // Include ARM range checks in AArch64 but only if ARM intrinsics are not
2887 // redefined by AArch64 to handle new types.
2888 if (isA64RangeCheck && !isA64 && A64IntrinsicMap.count(Rename)) {
2889 ClassKind &A64CK = A64IntrinsicMap[Rename];
2890 if (A64CK == ck && ck != ClassNone)
2891 continue;
2892 }
2893
2894 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2895 std::string namestr, shiftstr, rangestr;
2896
2897 if (R->getValueAsBit("isVCVT_N")) {
2898 // VCVT between floating- and fixed-point values takes an immediate
2899 // in the range [1, 32] for f32, or [1, 64] for f64.
2900 ck = ClassB;
2901 if (name.find("32") != std::string::npos)
2902 rangestr = "l = 1; u = 31"; // upper bound = l + u
2903 else if (name.find("64") != std::string::npos)
2904 rangestr = "l = 1; u = 63";
2905 else
2906 PrintFatalError(R->getLoc(),
2907 "Fixed point convert name should contains \"32\" or \"64\"");
2908
2909 } else if (R->getValueAsBit("isScalarShift")) {
2910 // Right shifts have an 'r' in the name, left shifts do not. Convert
2911 // instructions have the same bounds and right shifts.
2912 if (name.find('r') != std::string::npos ||
2913 name.find("cvt") != std::string::npos)
2914 rangestr = "l = 1; ";
2915
2916 unsigned upBound = RangeScalarShiftImm(Proto[immPos - 1], TypeVec[ti]);
2917 // Narrow shift has half the upper bound
2918 if (R->getValueAsBit("isScalarNarrowShift"))
2919 upBound /= 2;
2920
2921 rangestr += "u = " + utostr(upBound);
2922 } else if (R->getValueAsBit("isShift")) {
2923 // Builtins which are overloaded by type will need to have their upper
2924 // bound computed at Sema time based on the type constant.
2925 shiftstr = ", true";
2926
2927 // Right shifts have an 'r' in the name, left shifts do not.
2928 if (name.find('r') != std::string::npos)
2929 rangestr = "l = 1; ";
2930
2931 rangestr += "u = RFT(TV" + shiftstr + ")";
2932 } else {
2933 // The immediate generally refers to a lane in the preceding argument.
2934 assert(immPos > 0 && "unexpected immediate operand");
2935 rangestr =
2936 "u = " + utostr(RangeFromType(Proto[immPos - 1], TypeVec[ti]));
2937 }
2938 // Make sure cases appear only once by uniquing them in a string map.
2939 namestr = MangleName(name, TypeVec[ti], ck);
2940 if (EmittedMap.count(namestr))
2941 continue;
2942 EmittedMap[namestr] = OpNone;
2943
2944 // Calculate the index of the immediate that should be range checked.
2945 unsigned immidx = 0;
2946
2947 // Builtins that return a struct of multiple vectors have an extra
2948 // leading arg for the struct return.
2949 if (IsMultiVecProto(Proto[0]))
2950 ++immidx;
2951
2952 // Add one to the index for each argument until we reach the immediate
2953 // to be checked. Structs of vectors are passed as multiple arguments.
2954 for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) {
2955 switch (Proto[ii]) {
2956 default:
2957 immidx += 1;
2958 break;
2959 case '2':
2960 case 'B':
2961 immidx += 2;
2962 break;
2963 case '3':
2964 case 'C':
2965 immidx += 3;
2966 break;
2967 case '4':
2968 case 'D':
2969 immidx += 4;
2970 break;
2971 case 'i':
2972 ie = ii + 1;
2973 break;
2974 }
2975 }
2976 if (isA64RangeCheck)
2977 OS << "case AArch64::BI__builtin_neon_";
2978 else
2979 OS << "case ARM::BI__builtin_neon_";
2980 OS << MangleName(name, TypeVec[ti], ck) << ": i = " << immidx << "; "
2981 << rangestr << "; break;\n";
2982 }
2983 }
2984 OS << "#endif\n\n";
2985 }
2986
2987 /// Generate the ARM and AArch64 overloaded type checking code for
2988 /// SemaChecking.cpp, checking for unique builtin declarations.
2989 void
genOverloadTypeCheckCode(raw_ostream & OS,StringMap<ClassKind> & A64IntrinsicMap,bool isA64TypeCheck)2990 NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
2991 StringMap<ClassKind> &A64IntrinsicMap,
2992 bool isA64TypeCheck) {
2993 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2994 StringMap<OpKind> EmittedMap;
2995
2996 // Generate the overloaded type checking code for SemaChecking.cpp
2997 if (isA64TypeCheck)
2998 OS << "#ifdef GET_NEON_AARCH64_OVERLOAD_CHECK\n";
2999 else
3000 OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
3001
3002 for (unsigned i = 0, e = RV.size(); i != e; ++i) {
3003 Record *R = RV[i];
3004 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
3005 if (k != OpNone)
3006 continue;
3007
3008 std::string Proto = R->getValueAsString("Prototype");
3009 std::string Types = R->getValueAsString("Types");
3010 std::string name = R->getValueAsString("Name");
3011 std::string Rename = name + "@" + Proto;
3012
3013 // Functions with 'a' (the splat code) in the type prototype should not get
3014 // their own builtin as they use the non-splat variant.
3015 if (Proto.find('a') != std::string::npos)
3016 continue;
3017
3018 // Functions which have a scalar argument cannot be overloaded, no need to
3019 // check them if we are emitting the type checking code.
3020 if (ProtoHasScalar(Proto))
3021 continue;
3022
3023 SmallVector<StringRef, 16> TypeVec;
3024 ParseTypes(R, Types, TypeVec);
3025
3026 if (R->getSuperClasses().size() < 2)
3027 PrintFatalError(R->getLoc(), "Builtin has no class kind");
3028
3029 // Do not include AArch64 type checks if not generating code for AArch64.
3030 bool isA64 = R->getValueAsBit("isA64");
3031 if (!isA64TypeCheck && isA64)
3032 continue;
3033
3034 // Include ARM type check in AArch64 but only if ARM intrinsics
3035 // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr
3036 // redefined in AArch64 to handle an additional 2 x f64 type.
3037 ClassKind ck = ClassMap[R->getSuperClasses()[1]];
3038 if (isA64TypeCheck && !isA64 && A64IntrinsicMap.count(Rename)) {
3039 ClassKind &A64CK = A64IntrinsicMap[Rename];
3040 if (A64CK == ck && ck != ClassNone)
3041 continue;
3042 }
3043
3044 int si = -1, qi = -1;
3045 uint64_t mask = 0, qmask = 0;
3046 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
3047 // Generate the switch case(s) for this builtin for the type validation.
3048 bool quad = false, poly = false, usgn = false;
3049 (void) ClassifyType(TypeVec[ti], quad, poly, usgn);
3050
3051 if (quad) {
3052 qi = ti;
3053 qmask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
3054 } else {
3055 si = ti;
3056 mask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
3057 }
3058 }
3059
3060 // Check if the builtin function has a pointer or const pointer argument.
3061 int PtrArgNum = -1;
3062 bool HasConstPtr = false;
3063 for (unsigned arg = 1, arge = Proto.size(); arg != arge; ++arg) {
3064 char ArgType = Proto[arg];
3065 if (ArgType == 'c') {
3066 HasConstPtr = true;
3067 PtrArgNum = arg - 1;
3068 break;
3069 }
3070 if (ArgType == 'p') {
3071 PtrArgNum = arg - 1;
3072 break;
3073 }
3074 }
3075 // For sret builtins, adjust the pointer argument index.
3076 if (PtrArgNum >= 0 && IsMultiVecProto(Proto[0]))
3077 PtrArgNum += 1;
3078
3079 // Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
3080 // and vst1_lane intrinsics. Using a pointer to the vector element
3081 // type with one of those operations causes codegen to select an aligned
3082 // load/store instruction. If you want an unaligned operation,
3083 // the pointer argument needs to have less alignment than element type,
3084 // so just accept any pointer type.
3085 if (name == "vld1_lane" || name == "vld1_dup" || name == "vst1_lane") {
3086 PtrArgNum = -1;
3087 HasConstPtr = false;
3088 }
3089
3090 if (mask) {
3091 if (isA64TypeCheck)
3092 OS << "case AArch64::BI__builtin_neon_";
3093 else
3094 OS << "case ARM::BI__builtin_neon_";
3095 OS << MangleName(name, TypeVec[si], ClassB) << ": mask = "
3096 << "0x" << utohexstr(mask) << "ULL";
3097 if (PtrArgNum >= 0)
3098 OS << "; PtrArgNum = " << PtrArgNum;
3099 if (HasConstPtr)
3100 OS << "; HasConstPtr = true";
3101 OS << "; break;\n";
3102 }
3103 if (qmask) {
3104 if (isA64TypeCheck)
3105 OS << "case AArch64::BI__builtin_neon_";
3106 else
3107 OS << "case ARM::BI__builtin_neon_";
3108 OS << MangleName(name, TypeVec[qi], ClassB) << ": mask = "
3109 << "0x" << utohexstr(qmask) << "ULL";
3110 if (PtrArgNum >= 0)
3111 OS << "; PtrArgNum = " << PtrArgNum;
3112 if (HasConstPtr)
3113 OS << "; HasConstPtr = true";
3114 OS << "; break;\n";
3115 }
3116 }
3117 OS << "#endif\n\n";
3118 }
3119
3120 /// genBuiltinsDef: Generate the BuiltinsARM.def and BuiltinsAArch64.def
3121 /// declaration of builtins, checking for unique builtin declarations.
genBuiltinsDef(raw_ostream & OS,StringMap<ClassKind> & A64IntrinsicMap,bool isA64GenBuiltinDef)3122 void NeonEmitter::genBuiltinsDef(raw_ostream &OS,
3123 StringMap<ClassKind> &A64IntrinsicMap,
3124 bool isA64GenBuiltinDef) {
3125 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
3126 StringMap<OpKind> EmittedMap;
3127
3128 // Generate BuiltinsARM.def and BuiltinsAArch64.def
3129 if (isA64GenBuiltinDef)
3130 OS << "#ifdef GET_NEON_AARCH64_BUILTINS\n";
3131 else
3132 OS << "#ifdef GET_NEON_BUILTINS\n";
3133
3134 for (unsigned i = 0, e = RV.size(); i != e; ++i) {
3135 Record *R = RV[i];
3136 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
3137 if (k != OpNone)
3138 continue;
3139
3140 std::string Proto = R->getValueAsString("Prototype");
3141 std::string name = R->getValueAsString("Name");
3142 std::string Rename = name + "@" + Proto;
3143
3144 // Functions with 'a' (the splat code) in the type prototype should not get
3145 // their own builtin as they use the non-splat variant.
3146 if (Proto.find('a') != std::string::npos)
3147 continue;
3148
3149 std::string Types = R->getValueAsString("Types");
3150 SmallVector<StringRef, 16> TypeVec;
3151 ParseTypes(R, Types, TypeVec);
3152
3153 if (R->getSuperClasses().size() < 2)
3154 PrintFatalError(R->getLoc(), "Builtin has no class kind");
3155
3156 ClassKind ck = ClassMap[R->getSuperClasses()[1]];
3157
3158 // Do not include AArch64 BUILTIN() macros if not generating
3159 // code for AArch64
3160 bool isA64 = R->getValueAsBit("isA64");
3161 if (!isA64GenBuiltinDef && isA64)
3162 continue;
3163
3164 // Include ARM BUILTIN() macros in AArch64 but only if ARM intrinsics
3165 // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr
3166 // redefined in AArch64 to handle an additional 2 x f64 type.
3167 if (isA64GenBuiltinDef && !isA64 && A64IntrinsicMap.count(Rename)) {
3168 ClassKind &A64CK = A64IntrinsicMap[Rename];
3169 if (A64CK == ck && ck != ClassNone)
3170 continue;
3171 }
3172
3173 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
3174 // Generate the declaration for this builtin, ensuring
3175 // that each unique BUILTIN() macro appears only once in the output
3176 // stream.
3177 std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck);
3178 if (EmittedMap.count(bd))
3179 continue;
3180
3181 EmittedMap[bd] = OpNone;
3182 OS << bd << "\n";
3183 }
3184 }
3185 OS << "#endif\n\n";
3186 }
3187
3188 /// runHeader - Emit a file with sections defining:
3189 /// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def.
3190 /// 2. the SemaChecking code for the type overload checking.
3191 /// 3. the SemaChecking code for validation of intrinsic immediate arguments.
runHeader(raw_ostream & OS)3192 void NeonEmitter::runHeader(raw_ostream &OS) {
3193 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
3194
3195 // build a map of AArch64 intriniscs to be used in uniqueness checks.
3196 StringMap<ClassKind> A64IntrinsicMap;
3197 for (unsigned i = 0, e = RV.size(); i != e; ++i) {
3198 Record *R = RV[i];
3199
3200 bool isA64 = R->getValueAsBit("isA64");
3201 if (!isA64)
3202 continue;
3203
3204 ClassKind CK = ClassNone;
3205 if (R->getSuperClasses().size() >= 2)
3206 CK = ClassMap[R->getSuperClasses()[1]];
3207
3208 std::string Name = R->getValueAsString("Name");
3209 std::string Proto = R->getValueAsString("Prototype");
3210 std::string Rename = Name + "@" + Proto;
3211 if (A64IntrinsicMap.count(Rename))
3212 continue;
3213 A64IntrinsicMap[Rename] = CK;
3214 }
3215
3216 // Generate BuiltinsARM.def for ARM
3217 genBuiltinsDef(OS, A64IntrinsicMap, false);
3218
3219 // Generate BuiltinsAArch64.def for AArch64
3220 genBuiltinsDef(OS, A64IntrinsicMap, true);
3221
3222 // Generate ARM overloaded type checking code for SemaChecking.cpp
3223 genOverloadTypeCheckCode(OS, A64IntrinsicMap, false);
3224
3225 // Generate AArch64 overloaded type checking code for SemaChecking.cpp
3226 genOverloadTypeCheckCode(OS, A64IntrinsicMap, true);
3227
3228 // Generate ARM range checking code for shift/lane immediates.
3229 genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, false);
3230
3231 // Generate the AArch64 range checking code for shift/lane immediates.
3232 genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, true);
3233 }
3234
3235 /// GenTest - Write out a test for the intrinsic specified by the name and
3236 /// type strings, including the embedded patterns for FileCheck to match.
GenTest(const std::string & name,const std::string & proto,StringRef outTypeStr,StringRef inTypeStr,bool isShift,bool isHiddenLOp,ClassKind ck,const std::string & InstName,bool isA64,std::string & testFuncProto)3237 static std::string GenTest(const std::string &name,
3238 const std::string &proto,
3239 StringRef outTypeStr, StringRef inTypeStr,
3240 bool isShift, bool isHiddenLOp,
3241 ClassKind ck, const std::string &InstName,
3242 bool isA64,
3243 std::string & testFuncProto) {
3244 assert(!proto.empty() && "");
3245 std::string s;
3246
3247 // Function name with type suffix
3248 std::string mangledName = MangleName(name, outTypeStr, ClassS);
3249 if (outTypeStr != inTypeStr) {
3250 // If the input type is different (e.g., for vreinterpret), append a suffix
3251 // for the input type. String off a "Q" (quad) prefix so that MangleName
3252 // does not insert another "q" in the name.
3253 unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
3254 StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
3255 mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
3256 }
3257
3258 // todo: GenerateChecksForIntrinsic does not generate CHECK
3259 // for aarch64 instructions yet
3260 std::vector<std::string> FileCheckPatterns;
3261 if (!isA64) {
3262 GenerateChecksForIntrinsic(name, proto, outTypeStr, inTypeStr, ck, InstName,
3263 isHiddenLOp, FileCheckPatterns);
3264 s+= "// CHECK_ARM: test_" + mangledName + "\n";
3265 }
3266 s += "// CHECK_AARCH64: test_" + mangledName + "\n";
3267
3268 // Emit the FileCheck patterns.
3269 // If for any reason we do not want to emit a check, mangledInst
3270 // will be the empty string.
3271 if (FileCheckPatterns.size()) {
3272 for (std::vector<std::string>::const_iterator i = FileCheckPatterns.begin(),
3273 e = FileCheckPatterns.end();
3274 i != e;
3275 ++i) {
3276 s += "// CHECK_ARM: " + *i + "\n";
3277 }
3278 }
3279
3280 // Emit the start of the test function.
3281
3282 testFuncProto = TypeString(proto[0], outTypeStr) + " test_" + mangledName + "(";
3283 char arg = 'a';
3284 std::string comma;
3285 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
3286 // Do not create arguments for values that must be immediate constants.
3287 if (proto[i] == 'i')
3288 continue;
3289 testFuncProto += comma + TypeString(proto[i], inTypeStr) + " ";
3290 testFuncProto.push_back(arg);
3291 comma = ", ";
3292 }
3293 testFuncProto += ")";
3294
3295 s+= testFuncProto;
3296 s+= " {\n ";
3297
3298 if (proto[0] != 'v')
3299 s += "return ";
3300 s += mangledName + "(";
3301 arg = 'a';
3302 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
3303 if (proto[i] == 'i') {
3304 // For immediate operands, test the maximum value.
3305 if (isShift)
3306 s += "1"; // FIXME
3307 else
3308 // The immediate generally refers to a lane in the preceding argument.
3309 s += utostr(RangeFromType(proto[i-1], inTypeStr));
3310 } else {
3311 s.push_back(arg);
3312 }
3313 if ((i + 1) < e)
3314 s += ", ";
3315 }
3316 s += ");\n}\n\n";
3317 return s;
3318 }
3319
3320 /// Write out all intrinsic tests for the specified target, checking
3321 /// for intrinsic test uniqueness.
genTargetTest(raw_ostream & OS,StringMap<OpKind> & EmittedMap,bool isA64GenTest)3322 void NeonEmitter::genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap,
3323 bool isA64GenTest) {
3324 if (isA64GenTest)
3325 OS << "#ifdef __aarch64__\n";
3326
3327 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
3328 for (unsigned i = 0, e = RV.size(); i != e; ++i) {
3329 Record *R = RV[i];
3330 std::string name = R->getValueAsString("Name");
3331 std::string Proto = R->getValueAsString("Prototype");
3332 std::string Types = R->getValueAsString("Types");
3333 bool isShift = R->getValueAsBit("isShift");
3334 std::string InstName = R->getValueAsString("InstName");
3335 bool isHiddenLOp = R->getValueAsBit("isHiddenLInst");
3336 bool isA64 = R->getValueAsBit("isA64");
3337
3338 // do not include AArch64 intrinsic test if not generating
3339 // code for AArch64
3340 if (!isA64GenTest && isA64)
3341 continue;
3342
3343 SmallVector<StringRef, 16> TypeVec;
3344 ParseTypes(R, Types, TypeVec);
3345
3346 ClassKind ck = ClassMap[R->getSuperClasses()[1]];
3347 OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
3348 if (kind == OpUnavailable)
3349 continue;
3350 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
3351 if (kind == OpReinterpret) {
3352 bool outQuad = false;
3353 bool dummy = false;
3354 (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
3355 for (unsigned srcti = 0, srcte = TypeVec.size();
3356 srcti != srcte; ++srcti) {
3357 bool inQuad = false;
3358 (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
3359 if (srcti == ti || inQuad != outQuad)
3360 continue;
3361 std::string testFuncProto;
3362 std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[srcti],
3363 isShift, isHiddenLOp, ck, InstName, isA64,
3364 testFuncProto);
3365 if (EmittedMap.count(testFuncProto))
3366 continue;
3367 EmittedMap[testFuncProto] = kind;
3368 OS << s << "\n";
3369 }
3370 } else {
3371 std::string testFuncProto;
3372 std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift,
3373 isHiddenLOp, ck, InstName, isA64, testFuncProto);
3374 if (EmittedMap.count(testFuncProto))
3375 continue;
3376 EmittedMap[testFuncProto] = kind;
3377 OS << s << "\n";
3378 }
3379 }
3380 }
3381
3382 if (isA64GenTest)
3383 OS << "#endif\n";
3384 }
3385 /// runTests - Write out a complete set of tests for all of the Neon
3386 /// intrinsics.
runTests(raw_ostream & OS)3387 void NeonEmitter::runTests(raw_ostream &OS) {
3388 OS << "// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi "
3389 "apcs-gnu\\\n"
3390 "// RUN: -target-cpu swift -ffreestanding -Os -S -o - %s\\\n"
3391 "// RUN: | FileCheck %s -check-prefix=CHECK_ARM\n"
3392 "\n"
3393 "// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \\\n"
3394 "// RUN -target-feature +neon -ffreestanding -S -o - %s \\\n"
3395 "// RUN: | FileCheck %s -check-prefix=CHECK_AARCH64\n"
3396 "\n"
3397 "// REQUIRES: long_tests\n"
3398 "\n"
3399 "#include <arm_neon.h>\n"
3400 "\n";
3401
3402 // ARM tests must be emitted before AArch64 tests to ensure
3403 // tests for intrinsics that are common to ARM and AArch64
3404 // appear only once in the output stream.
3405 // The check for uniqueness is done in genTargetTest.
3406 StringMap<OpKind> EmittedMap;
3407
3408 genTargetTest(OS, EmittedMap, false);
3409
3410 genTargetTest(OS, EmittedMap, true);
3411 }
3412
3413 namespace clang {
EmitNeon(RecordKeeper & Records,raw_ostream & OS)3414 void EmitNeon(RecordKeeper &Records, raw_ostream &OS) {
3415 NeonEmitter(Records).run(OS);
3416 }
EmitNeonSema(RecordKeeper & Records,raw_ostream & OS)3417 void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) {
3418 NeonEmitter(Records).runHeader(OS);
3419 }
EmitNeonTest(RecordKeeper & Records,raw_ostream & OS)3420 void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) {
3421 NeonEmitter(Records).runTests(OS);
3422 }
3423 } // End namespace clang
3424