1 /*        $NetBSD: sljitNativeARM_64.c,v 1.5 2024/04/02 22:29:57 riastradh Exp $          */
2 
3 /*
4  *    Stack-less Just-In-Time compiler
5  *
6  *    Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without modification, are
9  * permitted provided that the following conditions are met:
10  *
11  *   1. Redistributions of source code must retain the above copyright notice, this list of
12  *      conditions and the following disclaimer.
13  *
14  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
15  *      of conditions and the following disclaimer in the documentation and/or other materials
16  *      provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
19  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
21  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
23  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
24  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
sljit_get_platform_name(void)29 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
30 {
31           return "ARM-64" SLJIT_CPUINFO;
32 }
33 
34 /* Length of an instruction word */
35 typedef sljit_u32 sljit_ins;
36 
37 #define TMP_ZERO    (0)
38 
39 #define TMP_REG1    (SLJIT_NUMBER_OF_REGISTERS + 2)
40 #define TMP_REG2    (SLJIT_NUMBER_OF_REGISTERS + 3)
41 #define TMP_REG3    (SLJIT_NUMBER_OF_REGISTERS + 4)
42 #define TMP_LR                (SLJIT_NUMBER_OF_REGISTERS + 5)
43 #define TMP_SP                (SLJIT_NUMBER_OF_REGISTERS + 6)
44 
45 #define TMP_FREG1   (0)
46 #define TMP_FREG2   (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
47 
48 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 8] = {
49   31, 0, 1, 2, 3, 4, 5, 6, 7, 12, 13, 14, 15, 16, 17, 8, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 29, 9, 10, 11, 30, 31
50 };
51 
52 #define W_OP (1 << 31)
53 #define RD(rd) (reg_map[rd])
54 #define RT(rt) (reg_map[rt])
55 #define RN(rn) (reg_map[rn] << 5)
56 #define RT2(rt2) (reg_map[rt2] << 10)
57 #define RM(rm) (reg_map[rm] << 16)
58 #define VD(vd) (vd)
59 #define VT(vt) (vt)
60 #define VN(vn) ((vn) << 5)
61 #define VM(vm) ((vm) << 16)
62 
63 /* --------------------------------------------------------------------- */
64 /*  Instrucion forms                                                     */
65 /* --------------------------------------------------------------------- */
66 
67 #define ADC 0x9a000000
68 #define ADD 0x8b000000
69 #define ADDI 0x91000000
70 #define AND 0x8a000000
71 #define ANDI 0x92000000
72 #define ASRV 0x9ac02800
73 #define B 0x14000000
74 #define B_CC 0x54000000
75 #define BL 0x94000000
76 #define BLR 0xd63f0000
77 #define BR 0xd61f0000
78 #define BRK 0xd4200000
79 #define CBZ 0xb4000000
80 #define CLZ 0xdac01000
81 #define CSINC 0x9a800400
82 #define EOR 0xca000000
83 #define EORI 0xd2000000
84 #define FABS 0x1e60c000
85 #define FADD 0x1e602800
86 #define FCMP 0x1e602000
87 #define FCVT 0x1e224000
88 #define FCVTZS 0x9e780000
89 #define FDIV 0x1e601800
90 #define FMOV 0x1e604000
91 #define FMUL 0x1e600800
92 #define FNEG 0x1e614000
93 #define FSUB 0x1e603800
94 #define LDRI 0xf9400000
95 #define LDP 0xa9400000
96 #define LDP_PST 0xa8c00000
97 #define LSLV 0x9ac02000
98 #define LSRV 0x9ac02400
99 #define MADD 0x9b000000
100 #define MOVK 0xf2800000
101 #define MOVN 0x92800000
102 #define MOVZ 0xd2800000
103 #define NOP 0xd503201f
104 #define ORN 0xaa200000
105 #define ORR 0xaa000000
106 #define ORRI 0xb2000000
107 #define RET 0xd65f0000
108 #define SBC 0xda000000
109 #define SBFM 0x93000000
110 #define SCVTF 0x9e620000
111 #define SDIV 0x9ac00c00
112 #define SMADDL 0x9b200000
113 #define SMULH 0x9b403c00
114 #define STP 0xa9000000
115 #define STP_PRE 0xa9800000
116 #define STRI 0xf9000000
117 #define STR_FI 0x3d000000
118 #define STR_FR 0x3c206800
119 #define STUR_FI 0x3c000000
120 #define SUB 0xcb000000
121 #define SUBI 0xd1000000
122 #define SUBS 0xeb000000
123 #define UBFM 0xd3000000
124 #define UDIV 0x9ac00800
125 #define UMULH 0x9bc03c00
126 
127 /* dest_reg is the absolute name of the register
128    Useful for reordering instructions in the delay slot. */
push_inst(struct sljit_compiler * compiler,sljit_ins ins)129 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
130 {
131           sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
132           FAIL_IF(!ptr);
133           *ptr = ins;
134           compiler->size++;
135           return SLJIT_SUCCESS;
136 }
137 
emit_imm64_const(struct sljit_compiler * compiler,sljit_s32 dst,sljit_uw imm)138 static SLJIT_INLINE sljit_s32 emit_imm64_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_uw imm)
139 {
140           FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((imm & 0xffff) << 5)));
141           FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((imm >> 16) & 0xffff) << 5) | (1 << 21)));
142           FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((imm >> 32) & 0xffff) << 5) | (2 << 21)));
143           return push_inst(compiler, MOVK | RD(dst) | ((imm >> 48) << 5) | (3 << 21));
144 }
145 
modify_imm64_const(sljit_ins * inst,sljit_uw new_imm)146 static SLJIT_INLINE void modify_imm64_const(sljit_ins* inst, sljit_uw new_imm)
147 {
148           sljit_s32 dst = inst[0] & 0x1f;
149           SLJIT_ASSERT((inst[0] & 0xffe00000) == MOVZ && (inst[1] & 0xffe00000) == (MOVK | (1 << 21)));
150           inst[0] = MOVZ | dst | ((new_imm & 0xffff) << 5);
151           inst[1] = MOVK | dst | (((new_imm >> 16) & 0xffff) << 5) | (1 << 21);
152           inst[2] = MOVK | dst | (((new_imm >> 32) & 0xffff) << 5) | (2 << 21);
153           inst[3] = MOVK | dst | ((new_imm >> 48) << 5) | (3 << 21);
154 }
155 
detect_jump_type(struct sljit_jump * jump,sljit_ins * code_ptr,sljit_ins * code,sljit_sw executable_offset)156 static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset)
157 {
158           sljit_sw diff;
159           sljit_uw target_addr;
160 
161           if (jump->flags & SLJIT_REWRITABLE_JUMP) {
162                     jump->flags |= PATCH_ABS64;
163                     return 0;
164           }
165 
166           if (jump->flags & JUMP_ADDR)
167                     target_addr = jump->u.target;
168           else {
169                     SLJIT_ASSERT(jump->flags & JUMP_LABEL);
170                     target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset;
171           }
172 
173           diff = (sljit_sw)target_addr - (sljit_sw)(code_ptr + 4) - executable_offset;
174 
175           if (jump->flags & IS_COND) {
176                     diff += sizeof(sljit_ins);
177                     if (diff <= 0xfffff && diff >= -0x100000) {
178                               code_ptr[-5] ^= (jump->flags & IS_CBZ) ? (0x1 << 24) : 0x1;
179                               jump->addr -= sizeof(sljit_ins);
180                               jump->flags |= PATCH_COND;
181                               return 5;
182                     }
183                     diff -= sizeof(sljit_ins);
184           }
185 
186           if (diff <= 0x7ffffff && diff >= -0x8000000) {
187                     jump->flags |= PATCH_B;
188                     return 4;
189           }
190 
191           if (target_addr <= 0xffffffffl) {
192                     if (jump->flags & IS_COND)
193                               code_ptr[-5] -= (2 << 5);
194                     code_ptr[-2] = code_ptr[0];
195                     return 2;
196           }
197           if (target_addr <= 0xffffffffffffl) {
198                     if (jump->flags & IS_COND)
199                               code_ptr[-5] -= (1 << 5);
200                     jump->flags |= PATCH_ABS48;
201                     code_ptr[-1] = code_ptr[0];
202                     return 1;
203           }
204 
205           jump->flags |= PATCH_ABS64;
206           return 0;
207 }
208 
sljit_generate_code(struct sljit_compiler * compiler)209 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
210 {
211           struct sljit_memory_fragment *buf;
212           sljit_ins *code;
213           sljit_ins *code_ptr;
214           sljit_ins *buf_ptr;
215           sljit_ins *buf_end;
216           sljit_uw word_count;
217           sljit_sw executable_offset;
218           sljit_uw addr;
219           sljit_s32 dst;
220 
221           struct sljit_label *label;
222           struct sljit_jump *jump;
223           struct sljit_const *const_;
224 
225           CHECK_ERROR_PTR();
226           CHECK_PTR(check_sljit_generate_code(compiler));
227           reverse_buf(compiler);
228 
229           code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins));
230           PTR_FAIL_WITH_EXEC_IF(code);
231           buf = compiler->buf;
232 
233           code_ptr = code;
234           word_count = 0;
235           executable_offset = SLJIT_EXEC_OFFSET(code);
236 
237           label = compiler->labels;
238           jump = compiler->jumps;
239           const_ = compiler->consts;
240 
241           do {
242                     buf_ptr = (sljit_ins*)buf->memory;
243                     buf_end = buf_ptr + (buf->used_size >> 2);
244                     do {
245                               *code_ptr = *buf_ptr++;
246                               /* These structures are ordered by their address. */
247                               SLJIT_ASSERT(!label || label->size >= word_count);
248                               SLJIT_ASSERT(!jump || jump->addr >= word_count);
249                               SLJIT_ASSERT(!const_ || const_->addr >= word_count);
250                               if (label && label->size == word_count) {
251                                         label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
252                                         label->size = code_ptr - code;
253                                         label = label->next;
254                               }
255                               if (jump && jump->addr == word_count) {
256                                                   jump->addr = (sljit_uw)(code_ptr - 4);
257                                                   code_ptr -= detect_jump_type(jump, code_ptr, code, executable_offset);
258                                                   jump = jump->next;
259                               }
260                               if (const_ && const_->addr == word_count) {
261                                         const_->addr = (sljit_uw)code_ptr;
262                                         const_ = const_->next;
263                               }
264                               code_ptr ++;
265                               word_count ++;
266                     } while (buf_ptr < buf_end);
267 
268                     buf = buf->next;
269           } while (buf);
270 
271           if (label && label->size == word_count) {
272                     label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
273                     label->size = code_ptr - code;
274                     label = label->next;
275           }
276 
277           SLJIT_ASSERT(!label);
278           SLJIT_ASSERT(!jump);
279           SLJIT_ASSERT(!const_);
280           SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
281 
282           jump = compiler->jumps;
283           while (jump) {
284                     do {
285                               addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
286                               buf_ptr = (sljit_ins *)jump->addr;
287 
288                               if (jump->flags & PATCH_B) {
289                                         addr = (sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2;
290                                         SLJIT_ASSERT((sljit_sw)addr <= 0x1ffffff && (sljit_sw)addr >= -0x2000000);
291                                         buf_ptr[0] = ((jump->flags & IS_BL) ? BL : B) | (addr & 0x3ffffff);
292                                         if (jump->flags & IS_COND)
293                                                   buf_ptr[-1] -= (4 << 5);
294                                         break;
295                               }
296                               if (jump->flags & PATCH_COND) {
297                                         addr = (sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2;
298                                         SLJIT_ASSERT((sljit_sw)addr <= 0x3ffff && (sljit_sw)addr >= -0x40000);
299                                         buf_ptr[0] = (buf_ptr[0] & ~0xffffe0) | ((addr & 0x7ffff) << 5);
300                                         break;
301                               }
302 
303                               SLJIT_ASSERT((jump->flags & (PATCH_ABS48 | PATCH_ABS64)) || addr <= 0xffffffffl);
304                               SLJIT_ASSERT((jump->flags & PATCH_ABS64) || addr <= 0xffffffffffffl);
305 
306                               dst = buf_ptr[0] & 0x1f;
307                               buf_ptr[0] = MOVZ | dst | ((addr & 0xffff) << 5);
308                               buf_ptr[1] = MOVK | dst | (((addr >> 16) & 0xffff) << 5) | (1 << 21);
309                               if (jump->flags & (PATCH_ABS48 | PATCH_ABS64))
310                                         buf_ptr[2] = MOVK | dst | (((addr >> 32) & 0xffff) << 5) | (2 << 21);
311                               if (jump->flags & PATCH_ABS64)
312                                         buf_ptr[3] = MOVK | dst | (((addr >> 48) & 0xffff) << 5) | (3 << 21);
313                     } while (0);
314                     jump = jump->next;
315           }
316 
317           compiler->error = SLJIT_ERR_COMPILED;
318           compiler->executable_offset = executable_offset;
319           compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
320 
321           code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
322           code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
323 
324           SLJIT_CACHE_FLUSH(code, code_ptr);
325           return code;
326 }
327 
328 /* --------------------------------------------------------------------- */
329 /*  Core code generator functions.                                       */
330 /* --------------------------------------------------------------------- */
331 
332 #define COUNT_TRAILING_ZERO(value, result) \
333           result = 0; \
334           if (!(value & 0xffffffff)) { \
335                     result += 32; \
336                     value >>= 32; \
337           } \
338           if (!(value & 0xffff)) { \
339                     result += 16; \
340                     value >>= 16; \
341           } \
342           if (!(value & 0xff)) { \
343                     result += 8; \
344                     value >>= 8; \
345           } \
346           if (!(value & 0xf)) { \
347                     result += 4; \
348                     value >>= 4; \
349           } \
350           if (!(value & 0x3)) { \
351                     result += 2; \
352                     value >>= 2; \
353           } \
354           if (!(value & 0x1)) { \
355                     result += 1; \
356                     value >>= 1; \
357           }
358 
359 #define LOGICAL_IMM_CHECK 0x100
360 
logical_imm(sljit_sw imm,sljit_s32 len)361 static sljit_ins logical_imm(sljit_sw imm, sljit_s32 len)
362 {
363           sljit_s32 negated, ones, right;
364           sljit_uw mask, uimm;
365           sljit_ins ins;
366 
367           if (len & LOGICAL_IMM_CHECK) {
368                     len &= ~LOGICAL_IMM_CHECK;
369                     if (len == 32 && (imm == 0 || imm == -1))
370                               return 0;
371                     if (len == 16 && ((sljit_s32)imm == 0 || (sljit_s32)imm == -1))
372                               return 0;
373           }
374 
375           SLJIT_ASSERT((len == 32 && imm != 0 && imm != -1)
376                     || (len == 16 && (sljit_s32)imm != 0 && (sljit_s32)imm != -1));
377           uimm = (sljit_uw)imm;
378           while (1) {
379                     if (len <= 0) {
380                               SLJIT_UNREACHABLE();
381                               return 0;
382                     }
383                     mask = ((sljit_uw)1 << len) - 1;
384                     if ((uimm & mask) != ((uimm >> len) & mask))
385                               break;
386                     len >>= 1;
387           }
388 
389           len <<= 1;
390 
391           negated = 0;
392           if (uimm & 0x1) {
393                     negated = 1;
394                     uimm = ~uimm;
395           }
396 
397           if (len < 64)
398                     uimm &= ((sljit_uw)1 << len) - 1;
399 
400           /* Unsigned right shift. */
401           COUNT_TRAILING_ZERO(uimm, right);
402 
403           /* Signed shift. We also know that the highest bit is set. */
404           imm = (sljit_sw)~uimm;
405           SLJIT_ASSERT(imm < 0);
406 
407           COUNT_TRAILING_ZERO(imm, ones);
408 
409           if (~imm)
410                     return 0;
411 
412           if (len == 64)
413                     ins = 1 << 22;
414           else
415                     ins = (0x3f - ((len << 1) - 1)) << 10;
416 
417           if (negated)
418                     return ins | ((len - ones - 1) << 10) | ((len - ones - right) << 16);
419 
420           return ins | ((ones - 1) << 10) | ((len - right) << 16);
421 }
422 
423 #undef COUNT_TRAILING_ZERO
424 
load_immediate(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw simm)425 static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw simm)
426 {
427           sljit_uw imm = (sljit_uw)simm;
428           sljit_s32 i, zeros, ones, first;
429           sljit_ins bitmask;
430 
431           if (imm <= 0xffff)
432                     return push_inst(compiler, MOVZ | RD(dst) | (imm << 5));
433 
434           if (simm >= -0x10000 && simm < 0)
435                     return push_inst(compiler, MOVN | RD(dst) | ((~imm & 0xffff) << 5));
436 
437           if (imm <= 0xffffffffl) {
438                     if ((imm & 0xffff0000l) == 0xffff0000)
439                               return push_inst(compiler, (MOVN ^ W_OP) | RD(dst) | ((~imm & 0xffff) << 5));
440                     if ((imm & 0xffff) == 0xffff)
441                               return push_inst(compiler, (MOVN ^ W_OP) | RD(dst) | ((~imm & 0xffff0000l) >> (16 - 5)) | (1 << 21));
442                     bitmask = logical_imm(simm, 16);
443                     if (bitmask != 0)
444                               return push_inst(compiler, (ORRI ^ W_OP) | RD(dst) | RN(TMP_ZERO) | bitmask);
445           }
446           else {
447                     bitmask = logical_imm(simm, 32);
448                     if (bitmask != 0)
449                               return push_inst(compiler, ORRI | RD(dst) | RN(TMP_ZERO) | bitmask);
450           }
451 
452           if (imm <= 0xffffffffl) {
453                     FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((imm & 0xffff) << 5)));
454                     return push_inst(compiler, MOVK | RD(dst) | ((imm & 0xffff0000l) >> (16 - 5)) | (1 << 21));
455           }
456 
457           if (simm >= -0x100000000l && simm < 0) {
458                     FAIL_IF(push_inst(compiler, MOVN | RD(dst) | ((~imm & 0xffff) << 5)));
459                     return push_inst(compiler, MOVK | RD(dst) | ((imm & 0xffff0000l) >> (16 - 5)) | (1 << 21));
460           }
461 
462           /* A large amount of number can be constructed from ORR and MOVx,
463           but computing them is costly. We don't  */
464 
465           zeros = 0;
466           ones = 0;
467           for (i = 4; i > 0; i--) {
468                     if ((simm & 0xffff) == 0)
469                               zeros++;
470                     if ((simm & 0xffff) == 0xffff)
471                               ones++;
472                     simm >>= 16;
473           }
474 
475           simm = (sljit_sw)imm;
476           first = 1;
477           if (ones > zeros) {
478                     simm = ~simm;
479                     for (i = 0; i < 4; i++) {
480                               if (!(simm & 0xffff)) {
481                                         simm >>= 16;
482                                         continue;
483                               }
484                               if (first) {
485                                         first = 0;
486                                         FAIL_IF(push_inst(compiler, MOVN | RD(dst) | ((simm & 0xffff) << 5) | (i << 21)));
487                               }
488                               else
489                                         FAIL_IF(push_inst(compiler, MOVK | RD(dst) | ((~simm & 0xffff) << 5) | (i << 21)));
490                               simm >>= 16;
491                     }
492                     return SLJIT_SUCCESS;
493           }
494 
495           for (i = 0; i < 4; i++) {
496                     if (!(simm & 0xffff)) {
497                               simm >>= 16;
498                               continue;
499                     }
500                     if (first) {
501                               first = 0;
502                               FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((simm & 0xffff) << 5) | (i << 21)));
503                     }
504                     else
505                               FAIL_IF(push_inst(compiler, MOVK | RD(dst) | ((simm & 0xffff) << 5) | (i << 21)));
506                     simm >>= 16;
507           }
508           return SLJIT_SUCCESS;
509 }
510 
511 #define ARG1_IMM    0x0010000
512 #define ARG2_IMM    0x0020000
513 #define INT_OP                0x0040000
514 #define SET_FLAGS   0x0080000
515 #define UNUSED_RETURN         0x0100000
516 #define SLOW_DEST   0x0200000
517 #define SLOW_SRC1   0x0400000
518 #define SLOW_SRC2   0x0800000
519 
520 #define CHECK_FLAGS(flag_bits) \
521           if (flags & SET_FLAGS) { \
522                     inv_bits |= flag_bits; \
523                     if (flags & UNUSED_RETURN) \
524                               dst = TMP_ZERO; \
525           }
526 
emit_op_imm(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 dst,sljit_sw arg1,sljit_sw arg2)527 static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 dst, sljit_sw arg1, sljit_sw arg2)
528 {
529           /* dst must be register, TMP_REG1
530              arg1 must be register, TMP_REG1, imm
531              arg2 must be register, TMP_REG2, imm */
532           sljit_ins inv_bits = (flags & INT_OP) ? (1 << 31) : 0;
533           sljit_ins inst_bits;
534           sljit_s32 op = (flags & 0xffff);
535           sljit_s32 reg;
536           sljit_sw imm, nimm;
537 
538           if (SLJIT_UNLIKELY((flags & (ARG1_IMM | ARG2_IMM)) == (ARG1_IMM | ARG2_IMM))) {
539                     /* Both are immediates. */
540                     flags &= ~ARG1_IMM;
541                     if (arg1 == 0 && op != SLJIT_ADD && op != SLJIT_SUB)
542                               arg1 = TMP_ZERO;
543                     else {
544                               FAIL_IF(load_immediate(compiler, TMP_REG1, arg1));
545                               arg1 = TMP_REG1;
546                     }
547           }
548 
549           if (flags & (ARG1_IMM | ARG2_IMM)) {
550                     reg = (flags & ARG2_IMM) ? arg1 : arg2;
551                     imm = (flags & ARG2_IMM) ? arg2 : arg1;
552 
553                     switch (op) {
554                     case SLJIT_MUL:
555                     case SLJIT_NEG:
556                     case SLJIT_CLZ:
557                     case SLJIT_ADDC:
558                     case SLJIT_SUBC:
559                               /* No form with immediate operand (except imm 0, which
560                               is represented by a ZERO register). */
561                               break;
562                     case SLJIT_MOV:
563                               SLJIT_ASSERT(!(flags & SET_FLAGS) && (flags & ARG2_IMM) && arg1 == TMP_REG1);
564                               return load_immediate(compiler, dst, imm);
565                     case SLJIT_NOT:
566                               SLJIT_ASSERT(flags & ARG2_IMM);
567                               FAIL_IF(load_immediate(compiler, dst, (flags & INT_OP) ? (~imm & 0xffffffff) : ~imm));
568                               goto set_flags;
569                     case SLJIT_SUB:
570                               if (flags & ARG1_IMM)
571                                         break;
572                               imm = -imm;
573                               /* Fall through. */
574                     case SLJIT_ADD:
575                               if (imm == 0) {
576                                         CHECK_FLAGS(1 << 29);
577                                         return push_inst(compiler, ((op == SLJIT_ADD ? ADDI : SUBI) ^ inv_bits) | RD(dst) | RN(reg));
578                               }
579                               if (imm > 0 && imm <= 0xfff) {
580                                         CHECK_FLAGS(1 << 29);
581                                         return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | (imm << 10));
582                               }
583                               nimm = -imm;
584                               if (nimm > 0 && nimm <= 0xfff) {
585                                         CHECK_FLAGS(1 << 29);
586                                         return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | (nimm << 10));
587                               }
588                               if (imm > 0 && imm <= 0xffffff && !(imm & 0xfff)) {
589                                         CHECK_FLAGS(1 << 29);
590                                         return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | ((imm >> 12) << 10) | (1 << 22));
591                               }
592                               if (nimm > 0 && nimm <= 0xffffff && !(nimm & 0xfff)) {
593                                         CHECK_FLAGS(1 << 29);
594                                         return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | ((nimm >> 12) << 10) | (1 << 22));
595                               }
596                               if (imm > 0 && imm <= 0xffffff && !(flags & SET_FLAGS)) {
597                                         FAIL_IF(push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | ((imm >> 12) << 10) | (1 << 22)));
598                                         return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(dst) | ((imm & 0xfff) << 10));
599                               }
600                               if (nimm > 0 && nimm <= 0xffffff && !(flags & SET_FLAGS)) {
601                                         FAIL_IF(push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | ((nimm >> 12) << 10) | (1 << 22)));
602                                         return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(dst) | ((nimm & 0xfff) << 10));
603                               }
604                               break;
605                     case SLJIT_AND:
606                               inst_bits = logical_imm(imm, LOGICAL_IMM_CHECK | ((flags & INT_OP) ? 16 : 32));
607                               if (!inst_bits)
608                                         break;
609                               CHECK_FLAGS(3 << 29);
610                               return push_inst(compiler, (ANDI ^ inv_bits) | RD(dst) | RN(reg) | inst_bits);
611                     case SLJIT_OR:
612                     case SLJIT_XOR:
613                               inst_bits = logical_imm(imm, LOGICAL_IMM_CHECK | ((flags & INT_OP) ? 16 : 32));
614                               if (!inst_bits)
615                                         break;
616                               if (op == SLJIT_OR)
617                                         inst_bits |= ORRI;
618                               else
619                                         inst_bits |= EORI;
620                               FAIL_IF(push_inst(compiler, (inst_bits ^ inv_bits) | RD(dst) | RN(reg)));
621                               goto set_flags;
622                     case SLJIT_SHL:
623                               if (flags & ARG1_IMM)
624                                         break;
625                               if (flags & INT_OP) {
626                                         imm &= 0x1f;
627                                         FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | ((-imm & 0x1f) << 16) | ((31 - imm) << 10)));
628                               }
629                               else {
630                                         imm &= 0x3f;
631                                         FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (1 << 22) | ((-imm & 0x3f) << 16) | ((63 - imm) << 10)));
632                               }
633                               goto set_flags;
634                     case SLJIT_LSHR:
635                     case SLJIT_ASHR:
636                               if (flags & ARG1_IMM)
637                                         break;
638                               if (op == SLJIT_ASHR)
639                                         inv_bits |= 1 << 30;
640                               if (flags & INT_OP) {
641                                         imm &= 0x1f;
642                                         FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (imm << 16) | (31 << 10)));
643                               }
644                               else {
645                                         imm &= 0x3f;
646                                         FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (1 << 22) | (imm << 16) | (63 << 10)));
647                               }
648                               goto set_flags;
649                     default:
650                               SLJIT_UNREACHABLE();
651                               break;
652                     }
653 
654                     if (flags & ARG2_IMM) {
655                               if (arg2 == 0)
656                                         arg2 = TMP_ZERO;
657                               else {
658                                         FAIL_IF(load_immediate(compiler, TMP_REG2, arg2));
659                                         arg2 = TMP_REG2;
660                               }
661                     }
662                     else {
663                               if (arg1 == 0)
664                                         arg1 = TMP_ZERO;
665                               else {
666                                         FAIL_IF(load_immediate(compiler, TMP_REG1, arg1));
667                                         arg1 = TMP_REG1;
668                               }
669                     }
670           }
671 
672           /* Both arguments are registers. */
673           switch (op) {
674           case SLJIT_MOV:
675           case SLJIT_MOV_P:
676           case SLJIT_MOVU:
677           case SLJIT_MOVU_P:
678                     SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
679                     if (dst == arg2)
680                               return SLJIT_SUCCESS;
681                     return push_inst(compiler, ORR | RD(dst) | RN(TMP_ZERO) | RM(arg2));
682           case SLJIT_MOV_U8:
683           case SLJIT_MOVU_U8:
684                     SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
685                     return push_inst(compiler, (UBFM ^ (1 << 31)) | RD(dst) | RN(arg2) | (7 << 10));
686           case SLJIT_MOV_S8:
687           case SLJIT_MOVU_S8:
688                     SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
689                     if (!(flags & INT_OP))
690                               inv_bits |= 1 << 22;
691                     return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(arg2) | (7 << 10));
692           case SLJIT_MOV_U16:
693           case SLJIT_MOVU_U16:
694                     SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
695                     return push_inst(compiler, (UBFM ^ (1 << 31)) | RD(dst) | RN(arg2) | (15 << 10));
696           case SLJIT_MOV_S16:
697           case SLJIT_MOVU_S16:
698                     SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
699                     if (!(flags & INT_OP))
700                               inv_bits |= 1 << 22;
701                     return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(arg2) | (15 << 10));
702           case SLJIT_MOV_U32:
703           case SLJIT_MOVU_U32:
704                     SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
705                     if ((flags & INT_OP) && dst == arg2)
706                               return SLJIT_SUCCESS;
707                     return push_inst(compiler, (ORR ^ (1 << 31)) | RD(dst) | RN(TMP_ZERO) | RM(arg2));
708           case SLJIT_MOV_S32:
709           case SLJIT_MOVU_S32:
710                     SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
711                     if ((flags & INT_OP) && dst == arg2)
712                               return SLJIT_SUCCESS;
713                     return push_inst(compiler, SBFM | (1 << 22) | RD(dst) | RN(arg2) | (31 << 10));
714           case SLJIT_NOT:
715                     SLJIT_ASSERT(arg1 == TMP_REG1);
716                     FAIL_IF(push_inst(compiler, (ORN ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(arg2)));
717                     goto set_flags;
718           case SLJIT_NEG:
719                     SLJIT_ASSERT(arg1 == TMP_REG1);
720                     if (flags & SET_FLAGS)
721                               inv_bits |= 1 << 29;
722                     return push_inst(compiler, (SUB ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(arg2));
723           case SLJIT_CLZ:
724                     SLJIT_ASSERT(arg1 == TMP_REG1);
725                     FAIL_IF(push_inst(compiler, (CLZ ^ inv_bits) | RD(dst) | RN(arg2)));
726                     goto set_flags;
727           case SLJIT_ADD:
728                     CHECK_FLAGS(1 << 29);
729                     return push_inst(compiler, (ADD ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
730           case SLJIT_ADDC:
731                     CHECK_FLAGS(1 << 29);
732                     return push_inst(compiler, (ADC ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
733           case SLJIT_SUB:
734                     CHECK_FLAGS(1 << 29);
735                     return push_inst(compiler, (SUB ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
736           case SLJIT_SUBC:
737                     CHECK_FLAGS(1 << 29);
738                     return push_inst(compiler, (SBC ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
739           case SLJIT_MUL:
740                     if (!(flags & SET_FLAGS))
741                               return push_inst(compiler, (MADD ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2) | RT2(TMP_ZERO));
742                     if (flags & INT_OP) {
743                               FAIL_IF(push_inst(compiler, SMADDL | RD(dst) | RN(arg1) | RM(arg2) | (31 << 10)));
744                               FAIL_IF(push_inst(compiler, ADD | RD(TMP_LR) | RN(TMP_ZERO) | RM(dst) | (2 << 22) | (31 << 10)));
745                               return push_inst(compiler, SUBS | RD(TMP_ZERO) | RN(TMP_LR) | RM(dst) | (2 << 22) | (63 << 10));
746                     }
747                     FAIL_IF(push_inst(compiler, SMULH | RD(TMP_LR) | RN(arg1) | RM(arg2)));
748                     FAIL_IF(push_inst(compiler, MADD | RD(dst) | RN(arg1) | RM(arg2) | RT2(TMP_ZERO)));
749                     return push_inst(compiler, SUBS | RD(TMP_ZERO) | RN(TMP_LR) | RM(dst) | (2 << 22) | (63 << 10));
750           case SLJIT_AND:
751                     CHECK_FLAGS(3 << 29);
752                     return push_inst(compiler, (AND ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
753           case SLJIT_OR:
754                     FAIL_IF(push_inst(compiler, (ORR ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
755                     goto set_flags;
756           case SLJIT_XOR:
757                     FAIL_IF(push_inst(compiler, (EOR ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
758                     goto set_flags;
759           case SLJIT_SHL:
760                     FAIL_IF(push_inst(compiler, (LSLV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
761                     goto set_flags;
762           case SLJIT_LSHR:
763                     FAIL_IF(push_inst(compiler, (LSRV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
764                     goto set_flags;
765           case SLJIT_ASHR:
766                     FAIL_IF(push_inst(compiler, (ASRV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
767                     goto set_flags;
768           }
769 
770           SLJIT_UNREACHABLE();
771           return SLJIT_SUCCESS;
772 
773 set_flags:
774           if (flags & SET_FLAGS)
775                     return push_inst(compiler, (SUBS ^ inv_bits) | RD(TMP_ZERO) | RN(dst) | RM(TMP_ZERO));
776           return SLJIT_SUCCESS;
777 }
778 
779 #define STORE                 0x01
780 #define SIGNED                0x02
781 
782 #define UPDATE                0x04
783 #define ARG_TEST    0x08
784 
785 #define BYTE_SIZE   0x000
786 #define HALF_SIZE   0x100
787 #define INT_SIZE    0x200
788 #define WORD_SIZE   0x300
789 
790 #define MEM_SIZE_SHIFT(flags) ((flags) >> 8)
791 
792 static const sljit_ins sljit_mem_imm[4] = {
793 /* u l */ 0x39400000 /* ldrb [reg,imm] */,
794 /* u s */ 0x39000000 /* strb [reg,imm] */,
795 /* s l */ 0x39800000 /* ldrsb [reg,imm] */,
796 /* s s */ 0x39000000 /* strb [reg,imm] */,
797 };
798 
799 static const sljit_ins sljit_mem_simm[4] = {
800 /* u l */ 0x38400000 /* ldurb [reg,imm] */,
801 /* u s */ 0x38000000 /* sturb [reg,imm] */,
802 /* s l */ 0x38800000 /* ldursb [reg,imm] */,
803 /* s s */ 0x38000000 /* sturb [reg,imm] */,
804 };
805 
806 static const sljit_ins sljit_mem_pre_simm[4] = {
807 /* u l */ 0x38400c00 /* ldrb [reg,imm]! */,
808 /* u s */ 0x38000c00 /* strb [reg,imm]! */,
809 /* s l */ 0x38800c00 /* ldrsb [reg,imm]! */,
810 /* s s */ 0x38000c00 /* strb [reg,imm]! */,
811 };
812 
813 static const sljit_ins sljit_mem_reg[4] = {
814 /* u l */ 0x38606800 /* ldrb [reg,reg] */,
815 /* u s */ 0x38206800 /* strb [reg,reg] */,
816 /* s l */ 0x38a06800 /* ldrsb [reg,reg] */,
817 /* s s */ 0x38206800 /* strb [reg,reg] */,
818 };
819 
820 /* Helper function. Dst should be reg + value, using at most 1 instruction, flags does not set. */
emit_set_delta(struct sljit_compiler * compiler,sljit_s32 dst,sljit_s32 reg,sljit_sw value)821 static sljit_s32 emit_set_delta(struct sljit_compiler *compiler, sljit_s32 dst, sljit_s32 reg, sljit_sw value)
822 {
823           if (value >= 0) {
824                     if (value <= 0xfff)
825                               return push_inst(compiler, ADDI | RD(dst) | RN(reg) | (value << 10));
826                     if (value <= 0xffffff && !(value & 0xfff))
827                               return push_inst(compiler, ADDI | (1 << 22) | RD(dst) | RN(reg) | (value >> 2));
828           }
829           else {
830                     value = -value;
831                     if (value <= 0xfff)
832                               return push_inst(compiler, SUBI | RD(dst) | RN(reg) | (value << 10));
833                     if (value <= 0xffffff && !(value & 0xfff))
834                               return push_inst(compiler, SUBI | (1 << 22) | RD(dst) | RN(reg) | (value >> 2));
835           }
836           return SLJIT_ERR_UNSUPPORTED;
837 }
838 
839 /* Can perform an operation using at most 1 instruction. */
getput_arg_fast(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg,sljit_sw argw)840 static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
841 {
842           sljit_u32 shift = MEM_SIZE_SHIFT(flags);
843 
844           SLJIT_ASSERT(arg & SLJIT_MEM);
845 
846           if (SLJIT_UNLIKELY(flags & UPDATE)) {
847                     if ((arg & REG_MASK) && !(arg & OFFS_REG_MASK) && argw <= 255 && argw >= -256) {
848                               if (SLJIT_UNLIKELY(flags & ARG_TEST))
849                                         return 1;
850 
851                               arg &= REG_MASK;
852                               argw &= 0x1ff;
853                               FAIL_IF(push_inst(compiler, sljit_mem_pre_simm[flags & 0x3]
854                                         | (shift << 30) | RT(reg) | RN(arg) | (argw << 12)));
855                               return -1;
856                     }
857                     return 0;
858           }
859 
860           if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
861                     argw &= 0x3;
862                     if (argw && argw != shift)
863                               return 0;
864 
865                     if (SLJIT_UNLIKELY(flags & ARG_TEST))
866                               return 1;
867 
868                     FAIL_IF(push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg)
869                               | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw ? (1 << 12) : 0)));
870                     return -1;
871           }
872 
873           arg &= REG_MASK;
874           if (argw >= 0 && (argw >> shift) <= 0xfff && (argw & ((1 << shift) - 1)) == 0) {
875                     if (SLJIT_UNLIKELY(flags & ARG_TEST))
876                               return 1;
877 
878                     FAIL_IF(push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30)
879                               | RT(reg) | RN(arg) | (argw << (10 - shift))));
880                     return -1;
881           }
882 
883           if (argw > 255 || argw < -256)
884                     return 0;
885 
886           if (SLJIT_UNLIKELY(flags & ARG_TEST))
887                     return 1;
888 
889           FAIL_IF(push_inst(compiler, sljit_mem_simm[flags & 0x3] | (shift << 30)
890                     | RT(reg) | RN(arg) | ((argw & 0x1ff) << 12)));
891           return -1;
892 }
893 
894 /* see getput_arg below.
895    Note: can_cache is called only for binary operators. Those
896    operators always uses word arguments without write back. */
can_cache(sljit_s32 arg,sljit_sw argw,sljit_s32 next_arg,sljit_sw next_argw)897 static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
898 {
899           sljit_sw diff;
900           if ((arg & OFFS_REG_MASK) || !(next_arg & SLJIT_MEM))
901                     return 0;
902 
903           if (!(arg & REG_MASK)) {
904                     diff = argw - next_argw;
905                     if (diff <= 0xfff && diff >= -0xfff)
906                               return 1;
907                     return 0;
908           }
909 
910           if (argw == next_argw)
911                     return 1;
912 
913           diff = argw - next_argw;
914           if (arg == next_arg && diff <= 0xfff && diff >= -0xfff)
915                     return 1;
916 
917           return 0;
918 }
919 
920 /* Emit the necessary instructions. See can_cache above. */
getput_arg(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg,sljit_sw argw,sljit_s32 next_arg,sljit_sw next_argw)921 static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg,
922           sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
923 {
924           sljit_u32 shift = MEM_SIZE_SHIFT(flags);
925           sljit_s32 tmp_r, other_r;
926           sljit_sw diff;
927 
928           SLJIT_ASSERT(arg & SLJIT_MEM);
929           if (!(next_arg & SLJIT_MEM)) {
930                     next_arg = 0;
931                     next_argw = 0;
932           }
933 
934           tmp_r = (flags & STORE) ? TMP_REG3 : reg;
935 
936           if (SLJIT_UNLIKELY((flags & UPDATE) && (arg & REG_MASK))) {
937                     /* Update only applies if a base register exists. */
938                     other_r = OFFS_REG(arg);
939                     if (!other_r) {
940                               other_r = arg & REG_MASK;
941                               SLJIT_ASSERT(other_r != reg);
942 
943                               if (argw >= 0 && argw <= 0xffffff) {
944                                         if ((argw & 0xfff) != 0)
945                                                   FAIL_IF(push_inst(compiler, ADDI | RD(other_r) | RN(other_r) | ((argw & 0xfff) << 10)));
946                                         if (argw >> 12)
947                                                   FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(other_r) | RN(other_r) | ((argw >> 12) << 10)));
948                                         return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(other_r));
949                               }
950                               else if (argw < 0 && argw >= -0xffffff) {
951                                         argw = -argw;
952                                         if ((argw & 0xfff) != 0)
953                                                   FAIL_IF(push_inst(compiler, SUBI | RD(other_r) | RN(other_r) | ((argw & 0xfff) << 10)));
954                                         if (argw >> 12)
955                                                   FAIL_IF(push_inst(compiler, SUBI | (1 << 22) | RD(other_r) | RN(other_r) | ((argw >> 12) << 10)));
956                                         return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(other_r));
957                               }
958 
959                               if (compiler->cache_arg == SLJIT_MEM) {
960                                         if (argw == compiler->cache_argw) {
961                                                   other_r = TMP_REG3;
962                                                   argw = 0;
963                                         }
964                                         else if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) {
965                                                   FAIL_IF(compiler->error);
966                                                   compiler->cache_argw = argw;
967                                                   other_r = TMP_REG3;
968                                                   argw = 0;
969                                         }
970                               }
971 
972                               if (argw) {
973                                         FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
974                                         compiler->cache_arg = SLJIT_MEM;
975                                         compiler->cache_argw = argw;
976                                         other_r = TMP_REG3;
977                                         argw = 0;
978                               }
979                     }
980 
981                     /* No caching here. */
982                     arg &= REG_MASK;
983                     FAIL_IF(push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(other_r)));
984                     return push_inst(compiler, ADD | RD(arg) | RN(arg) | RM(other_r));
985           }
986 
987           if (arg & OFFS_REG_MASK) {
988                     other_r = OFFS_REG(arg);
989                     arg &= REG_MASK;
990                     FAIL_IF(push_inst(compiler, ADD | RD(tmp_r) | RN(arg) | RM(other_r) | ((argw & 0x3) << 10)));
991                     return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(tmp_r));
992           }
993 
994           if (compiler->cache_arg == arg) {
995                     diff = argw - compiler->cache_argw;
996                     if (diff <= 255 && diff >= -256)
997                               return push_inst(compiler, sljit_mem_simm[flags & 0x3] | (shift << 30)
998                                         | RT(reg) | RN(TMP_REG3) | ((diff & 0x1ff) << 12));
999                     if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, diff) != SLJIT_ERR_UNSUPPORTED) {
1000                               FAIL_IF(compiler->error);
1001                               return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg));
1002                     }
1003           }
1004 
1005           if (argw >= 0 && argw <= 0xffffff && (argw & ((1 << shift) - 1)) == 0) {
1006                     FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(tmp_r) | RN(arg & REG_MASK) | ((argw >> 12) << 10)));
1007                     return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30)
1008                               | RT(reg) | RN(tmp_r) | ((argw & 0xfff) << (10 - shift)));
1009           }
1010 
1011           diff = argw - next_argw;
1012           next_arg = (arg & REG_MASK) && (arg == next_arg) && diff <= 0xfff && diff >= -0xfff && diff != 0;
1013           arg &= REG_MASK;
1014 
1015           if (arg && compiler->cache_arg == SLJIT_MEM) {
1016                     if (compiler->cache_argw == argw)
1017                               return push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(TMP_REG3));
1018                     if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) {
1019                               FAIL_IF(compiler->error);
1020                               compiler->cache_argw = argw;
1021                               return push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(TMP_REG3));
1022                     }
1023           }
1024 
1025           compiler->cache_argw = argw;
1026           if (next_arg && emit_set_delta(compiler, TMP_REG3, arg, argw) != SLJIT_ERR_UNSUPPORTED) {
1027                     FAIL_IF(compiler->error);
1028                     compiler->cache_arg = SLJIT_MEM | arg;
1029                     arg = 0;
1030           }
1031           else {
1032                     FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
1033                     compiler->cache_arg = SLJIT_MEM;
1034 
1035                     if (next_arg) {
1036                               FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG3) | RN(TMP_REG3) | RM(arg)));
1037                               compiler->cache_arg = SLJIT_MEM | arg;
1038                               arg = 0;
1039                     }
1040           }
1041 
1042           if (arg)
1043                     return push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(TMP_REG3));
1044           return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(TMP_REG3));
1045 }
1046 
emit_op_mem(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg,sljit_sw argw)1047 static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
1048 {
1049           if (getput_arg_fast(compiler, flags, reg, arg, argw))
1050                     return compiler->error;
1051           compiler->cache_arg = 0;
1052           compiler->cache_argw = 0;
1053           return getput_arg(compiler, flags, reg, arg, argw, 0, 0);
1054 }
1055 
emit_op_mem2(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg1,sljit_sw arg1w,sljit_s32 arg2,sljit_sw arg2w)1056 static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w)
1057 {
1058           if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
1059                     return compiler->error;
1060           return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
1061 }
1062 
1063 /* --------------------------------------------------------------------- */
1064 /*  Entry, exit                                                          */
1065 /* --------------------------------------------------------------------- */
1066 
sljit_emit_enter(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 args,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)1067 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
1068           sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
1069           sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1070 {
1071           sljit_s32 i, tmp, offs, prev, saved_regs_size;
1072 
1073           CHECK_ERROR();
1074           CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
1075           set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
1076 
1077           saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 0);
1078           local_size += saved_regs_size + SLJIT_LOCALS_OFFSET;
1079           local_size = (local_size + 15) & ~0xf;
1080           compiler->local_size = local_size;
1081 
1082           SLJIT_ASSERT(local_size >= 0);
1083           if ((size_t)local_size <= (63 * sizeof(sljit_sw))) {
1084                     FAIL_IF(push_inst(compiler, STP_PRE | 29 | RT2(TMP_LR)
1085                               | RN(TMP_SP) | ((-(local_size >> 3) & 0x7f) << 15)));
1086                     FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_SP) | (0 << 10)));
1087                     offs = (local_size - saved_regs_size) << (15 - 3);
1088           } else {
1089                     offs = 0 << 15;
1090                     if (saved_regs_size & 0x8) {
1091                               offs = 1 << 15;
1092                               saved_regs_size += sizeof(sljit_sw);
1093                     }
1094                     local_size -= saved_regs_size + SLJIT_LOCALS_OFFSET;
1095                     if (saved_regs_size > 0)
1096                               FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | (saved_regs_size << 10)));
1097           }
1098 
1099           tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
1100           prev = -1;
1101           for (i = SLJIT_S0; i >= tmp; i--) {
1102                     if (prev == -1) {
1103                               if (!(offs & (1 << 15))) {
1104                                         prev = i;
1105                                         continue;
1106                               }
1107                               FAIL_IF(push_inst(compiler, STRI | RT(i) | RN(TMP_SP) | (offs >> 5)));
1108                               offs += 1 << 15;
1109                               continue;
1110                     }
1111                     FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
1112                     offs += 2 << 15;
1113                     prev = -1;
1114           }
1115 
1116           for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
1117                     if (prev == -1) {
1118                               if (!(offs & (1 << 15))) {
1119                                         prev = i;
1120                                         continue;
1121                               }
1122                               FAIL_IF(push_inst(compiler, STRI | RT(i) | RN(TMP_SP) | (offs >> 5)));
1123                               offs += 1 << 15;
1124                               continue;
1125                     }
1126                     FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
1127                     offs += 2 << 15;
1128                     prev = -1;
1129           }
1130 
1131           SLJIT_ASSERT(prev == -1);
1132 
1133           SLJIT_ASSERT(compiler->local_size >= 0);
1134           if ((size_t)compiler->local_size > (63 * sizeof(sljit_sw))) {
1135                     /* The local_size is already adjusted by the saved registers. */
1136                     if (local_size > 0xfff) {
1137                               FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | ((local_size >> 12) << 10) | (1 << 22)));
1138                               local_size &= 0xfff;
1139                     }
1140                     if (local_size)
1141                               FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | (local_size << 10)));
1142                     FAIL_IF(push_inst(compiler, STP_PRE | 29 | RT2(TMP_LR)
1143                               | RN(TMP_SP) | ((-(16 >> 3) & 0x7f) << 15)));
1144                     FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_SP) | (0 << 10)));
1145           }
1146 
1147           if (args >= 1)
1148                     FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S0) | RN(TMP_ZERO) | RM(SLJIT_R0)));
1149           if (args >= 2)
1150                     FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S1) | RN(TMP_ZERO) | RM(SLJIT_R1)));
1151           if (args >= 3)
1152                     FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S2) | RN(TMP_ZERO) | RM(SLJIT_R2)));
1153 
1154           return SLJIT_SUCCESS;
1155 }
1156 
sljit_set_context(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 args,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)1157 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
1158           sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
1159           sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1160 {
1161           CHECK_ERROR();
1162           CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
1163           set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
1164 
1165           local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 0) + SLJIT_LOCALS_OFFSET;
1166           local_size = (local_size + 15) & ~0xf;
1167           compiler->local_size = local_size;
1168           return SLJIT_SUCCESS;
1169 }
1170 
sljit_emit_return(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)1171 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
1172 {
1173           sljit_s32 local_size;
1174           sljit_s32 i, tmp, offs, prev, saved_regs_size;
1175 
1176           CHECK_ERROR();
1177           CHECK(check_sljit_emit_return(compiler, op, src, srcw));
1178 
1179           FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
1180 
1181           local_size = compiler->local_size;
1182 
1183           saved_regs_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 0);
1184           SLJIT_ASSERT(local_size >= 0);
1185           if ((size_t)local_size <= (63 * sizeof(sljit_sw)))
1186                     offs = (local_size - saved_regs_size) << (15 - 3);
1187           else {
1188                     FAIL_IF(push_inst(compiler, LDP_PST | 29 | RT2(TMP_LR)
1189                               | RN(TMP_SP) | (((16 >> 3) & 0x7f) << 15)));
1190                     offs = 0 << 15;
1191                     if (saved_regs_size & 0x8) {
1192                               offs = 1 << 15;
1193                               saved_regs_size += sizeof(sljit_sw);
1194                     }
1195                     local_size -= saved_regs_size + SLJIT_LOCALS_OFFSET;
1196                     if (local_size > 0xfff) {
1197                               FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | ((local_size >> 12) << 10) | (1 << 22)));
1198                               local_size &= 0xfff;
1199                     }
1200                     if (local_size)
1201                               FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | (local_size << 10)));
1202           }
1203 
1204           tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
1205           prev = -1;
1206           for (i = SLJIT_S0; i >= tmp; i--) {
1207                     if (prev == -1) {
1208                               if (!(offs & (1 << 15))) {
1209                                         prev = i;
1210                                         continue;
1211                               }
1212                               FAIL_IF(push_inst(compiler, LDRI | RT(i) | RN(TMP_SP) | (offs >> 5)));
1213                               offs += 1 << 15;
1214                               continue;
1215                     }
1216                     FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
1217                     offs += 2 << 15;
1218                     prev = -1;
1219           }
1220 
1221           for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
1222                     if (prev == -1) {
1223                               if (!(offs & (1 << 15))) {
1224                                         prev = i;
1225                                         continue;
1226                               }
1227                               FAIL_IF(push_inst(compiler, LDRI | RT(i) | RN(TMP_SP) | (offs >> 5)));
1228                               offs += 1 << 15;
1229                               continue;
1230                     }
1231                     FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
1232                     offs += 2 << 15;
1233                     prev = -1;
1234           }
1235 
1236           SLJIT_ASSERT(prev == -1);
1237 
1238           SLJIT_ASSERT(compiler->local_size >= 0);
1239           if ((size_t)compiler->local_size <= (63 * sizeof(sljit_sw))) {
1240                     FAIL_IF(push_inst(compiler, LDP_PST | 29 | RT2(TMP_LR)
1241                               | RN(TMP_SP) | (((local_size >> 3) & 0x7f) << 15)));
1242           } else if (saved_regs_size > 0) {
1243                     FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | (saved_regs_size << 10)));
1244           }
1245 
1246           FAIL_IF(push_inst(compiler, RET | RN(TMP_LR)));
1247           return SLJIT_SUCCESS;
1248 }
1249 
1250 /* --------------------------------------------------------------------- */
1251 /*  Operators                                                            */
1252 /* --------------------------------------------------------------------- */
1253 
sljit_emit_op0(struct sljit_compiler * compiler,sljit_s32 op)1254 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
1255 {
1256           sljit_ins inv_bits = (op & SLJIT_I32_OP) ? (1 << 31) : 0;
1257 
1258           CHECK_ERROR();
1259           CHECK(check_sljit_emit_op0(compiler, op));
1260 
1261           op = GET_OPCODE(op);
1262           switch (op) {
1263           case SLJIT_BREAKPOINT:
1264                     return push_inst(compiler, BRK);
1265           case SLJIT_NOP:
1266                     return push_inst(compiler, NOP);
1267           case SLJIT_LMUL_UW:
1268           case SLJIT_LMUL_SW:
1269                     FAIL_IF(push_inst(compiler, ORR | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_R0)));
1270                     FAIL_IF(push_inst(compiler, MADD | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO)));
1271                     return push_inst(compiler, (op == SLJIT_LMUL_UW ? UMULH : SMULH) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1));
1272           case SLJIT_DIVMOD_UW:
1273           case SLJIT_DIVMOD_SW:
1274                     FAIL_IF(push_inst(compiler, (ORR ^ inv_bits) | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_R0)));
1275                     FAIL_IF(push_inst(compiler, ((op == SLJIT_DIVMOD_UW ? UDIV : SDIV) ^ inv_bits) | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1)));
1276                     FAIL_IF(push_inst(compiler, (MADD ^ inv_bits) | RD(SLJIT_R1) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO)));
1277                     return push_inst(compiler, (SUB ^ inv_bits) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1));
1278           case SLJIT_DIV_UW:
1279           case SLJIT_DIV_SW:
1280                     return push_inst(compiler, ((op == SLJIT_DIV_UW ? UDIV : SDIV) ^ inv_bits) | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1));
1281           }
1282 
1283           return SLJIT_SUCCESS;
1284 }
1285 
sljit_emit_op1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1286 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
1287           sljit_s32 dst, sljit_sw dstw,
1288           sljit_s32 src, sljit_sw srcw)
1289 {
1290           sljit_s32 dst_r, flags, mem_flags;
1291           sljit_s32 op_flags = GET_ALL_FLAGS(op);
1292 
1293           CHECK_ERROR();
1294           CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1295           ADJUST_LOCAL_OFFSET(dst, dstw);
1296           ADJUST_LOCAL_OFFSET(src, srcw);
1297 
1298           compiler->cache_arg = 0;
1299           compiler->cache_argw = 0;
1300 
1301           dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
1302 
1303           op = GET_OPCODE(op);
1304           if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) {
1305                     switch (op) {
1306                     case SLJIT_MOV:
1307                     case SLJIT_MOV_P:
1308                               flags = WORD_SIZE;
1309                               break;
1310                     case SLJIT_MOV_U8:
1311                               flags = BYTE_SIZE;
1312                               if (src & SLJIT_IMM)
1313                                         srcw = (sljit_u8)srcw;
1314                               break;
1315                     case SLJIT_MOV_S8:
1316                               flags = BYTE_SIZE | SIGNED;
1317                               if (src & SLJIT_IMM)
1318                                         srcw = (sljit_s8)srcw;
1319                               break;
1320                     case SLJIT_MOV_U16:
1321                               flags = HALF_SIZE;
1322                               if (src & SLJIT_IMM)
1323                                         srcw = (sljit_u16)srcw;
1324                               break;
1325                     case SLJIT_MOV_S16:
1326                               flags = HALF_SIZE | SIGNED;
1327                               if (src & SLJIT_IMM)
1328                                         srcw = (sljit_s16)srcw;
1329                               break;
1330                     case SLJIT_MOV_U32:
1331                               flags = INT_SIZE;
1332                               if (src & SLJIT_IMM)
1333                                         srcw = (sljit_u32)srcw;
1334                               break;
1335                     case SLJIT_MOV_S32:
1336                               flags = INT_SIZE | SIGNED;
1337                               if (src & SLJIT_IMM)
1338                                         srcw = (sljit_s32)srcw;
1339                               break;
1340                     case SLJIT_MOVU:
1341                     case SLJIT_MOVU_P:
1342                               flags = WORD_SIZE | UPDATE;
1343                               break;
1344                     case SLJIT_MOVU_U8:
1345                               flags = BYTE_SIZE | UPDATE;
1346                               if (src & SLJIT_IMM)
1347                                         srcw = (sljit_u8)srcw;
1348                               break;
1349                     case SLJIT_MOVU_S8:
1350                               flags = BYTE_SIZE | SIGNED | UPDATE;
1351                               if (src & SLJIT_IMM)
1352                                         srcw = (sljit_s8)srcw;
1353                               break;
1354                     case SLJIT_MOVU_U16:
1355                               flags = HALF_SIZE | UPDATE;
1356                               if (src & SLJIT_IMM)
1357                                         srcw = (sljit_u16)srcw;
1358                               break;
1359                     case SLJIT_MOVU_S16:
1360                               flags = HALF_SIZE | SIGNED | UPDATE;
1361                               if (src & SLJIT_IMM)
1362                                         srcw = (sljit_s16)srcw;
1363                               break;
1364                     case SLJIT_MOVU_U32:
1365                               flags = INT_SIZE | UPDATE;
1366                               if (src & SLJIT_IMM)
1367                                         srcw = (sljit_u32)srcw;
1368                               break;
1369                     case SLJIT_MOVU_S32:
1370                               flags = INT_SIZE | SIGNED | UPDATE;
1371                               if (src & SLJIT_IMM)
1372                                         srcw = (sljit_s32)srcw;
1373                               break;
1374                     default:
1375                               SLJIT_UNREACHABLE();
1376                               flags = 0;
1377                               break;
1378                     }
1379 
1380                     if (src & SLJIT_IMM)
1381                               FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG1, srcw));
1382                     else if (src & SLJIT_MEM) {
1383                               if (getput_arg_fast(compiler, flags, dst_r, src, srcw))
1384                                         FAIL_IF(compiler->error);
1385                               else
1386                                         FAIL_IF(getput_arg(compiler, flags, dst_r, src, srcw, dst, dstw));
1387                     } else {
1388                               if (dst_r != TMP_REG1)
1389                                         return emit_op_imm(compiler, op | ((op_flags & SLJIT_I32_OP) ? INT_OP : 0), dst_r, TMP_REG1, src);
1390                               dst_r = src;
1391                     }
1392 
1393                     if (dst & SLJIT_MEM) {
1394                               if (getput_arg_fast(compiler, flags | STORE, dst_r, dst, dstw))
1395                                         return compiler->error;
1396                               else
1397                                         return getput_arg(compiler, flags | STORE, dst_r, dst, dstw, 0, 0);
1398                     }
1399                     return SLJIT_SUCCESS;
1400           }
1401 
1402           flags = HAS_FLAGS(op_flags) ? SET_FLAGS : 0;
1403           mem_flags = WORD_SIZE;
1404           if (op_flags & SLJIT_I32_OP) {
1405                     flags |= INT_OP;
1406                     mem_flags = INT_SIZE;
1407           }
1408 
1409           if (dst == SLJIT_UNUSED)
1410                     flags |= UNUSED_RETURN;
1411 
1412           if (src & SLJIT_MEM) {
1413                     if (getput_arg_fast(compiler, mem_flags, TMP_REG2, src, srcw))
1414                               FAIL_IF(compiler->error);
1415                     else
1416                               FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG2, src, srcw, dst, dstw));
1417                     src = TMP_REG2;
1418           }
1419 
1420           if (src & SLJIT_IMM) {
1421                     flags |= ARG2_IMM;
1422                     if (op_flags & SLJIT_I32_OP)
1423                               srcw = (sljit_s32)srcw;
1424           } else
1425                     srcw = src;
1426 
1427           emit_op_imm(compiler, flags | op, dst_r, TMP_REG1, srcw);
1428 
1429           if (dst & SLJIT_MEM) {
1430                     if (getput_arg_fast(compiler, mem_flags | STORE, dst_r, dst, dstw))
1431                               return compiler->error;
1432                     else
1433                               return getput_arg(compiler, mem_flags | STORE, dst_r, dst, dstw, 0, 0);
1434           }
1435           return SLJIT_SUCCESS;
1436 }
1437 
sljit_emit_op2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1438 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
1439           sljit_s32 dst, sljit_sw dstw,
1440           sljit_s32 src1, sljit_sw src1w,
1441           sljit_s32 src2, sljit_sw src2w)
1442 {
1443           sljit_s32 dst_r, flags, mem_flags;
1444 
1445           CHECK_ERROR();
1446           CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
1447           ADJUST_LOCAL_OFFSET(dst, dstw);
1448           ADJUST_LOCAL_OFFSET(src1, src1w);
1449           ADJUST_LOCAL_OFFSET(src2, src2w);
1450 
1451           compiler->cache_arg = 0;
1452           compiler->cache_argw = 0;
1453 
1454           dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
1455           flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
1456           mem_flags = WORD_SIZE;
1457           if (op & SLJIT_I32_OP) {
1458                     flags |= INT_OP;
1459                     mem_flags = INT_SIZE;
1460           }
1461 
1462           if (dst == SLJIT_UNUSED)
1463                     flags |= UNUSED_RETURN;
1464 
1465           if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, mem_flags | STORE | ARG_TEST, TMP_REG1, dst, dstw))
1466                     flags |= SLOW_DEST;
1467 
1468           if (src1 & SLJIT_MEM) {
1469                     if (getput_arg_fast(compiler, mem_flags, TMP_REG1, src1, src1w))
1470                               FAIL_IF(compiler->error);
1471                     else
1472                               flags |= SLOW_SRC1;
1473           }
1474           if (src2 & SLJIT_MEM) {
1475                     if (getput_arg_fast(compiler, mem_flags, TMP_REG2, src2, src2w))
1476                               FAIL_IF(compiler->error);
1477                     else
1478                               flags |= SLOW_SRC2;
1479           }
1480 
1481           if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
1482                     if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
1483                               FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG2, src2, src2w, src1, src1w));
1484                               FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG1, src1, src1w, dst, dstw));
1485                     }
1486                     else {
1487                               FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG1, src1, src1w, src2, src2w));
1488                               FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG2, src2, src2w, dst, dstw));
1489                     }
1490           }
1491           else if (flags & SLOW_SRC1)
1492                     FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG1, src1, src1w, dst, dstw));
1493           else if (flags & SLOW_SRC2)
1494                     FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG2, src2, src2w, dst, dstw));
1495 
1496           if (src1 & SLJIT_MEM)
1497                     src1 = TMP_REG1;
1498           if (src2 & SLJIT_MEM)
1499                     src2 = TMP_REG2;
1500 
1501           if (src1 & SLJIT_IMM)
1502                     flags |= ARG1_IMM;
1503           else
1504                     src1w = src1;
1505           if (src2 & SLJIT_IMM)
1506                     flags |= ARG2_IMM;
1507           else
1508                     src2w = src2;
1509 
1510           emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src1w, src2w);
1511 
1512           if (dst & SLJIT_MEM) {
1513                     if (!(flags & SLOW_DEST)) {
1514                               getput_arg_fast(compiler, mem_flags | STORE, dst_r, dst, dstw);
1515                               return compiler->error;
1516                     }
1517                     return getput_arg(compiler, mem_flags | STORE, TMP_REG1, dst, dstw, 0, 0);
1518           }
1519 
1520           return SLJIT_SUCCESS;
1521 }
1522 
sljit_get_register_index(sljit_s32 reg)1523 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
1524 {
1525           CHECK_REG_INDEX(check_sljit_get_register_index(reg));
1526           return reg_map[reg];
1527 }
1528 
sljit_get_float_register_index(sljit_s32 reg)1529 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
1530 {
1531           CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
1532           return reg;
1533 }
1534 
sljit_emit_op_custom(struct sljit_compiler * compiler,void * instruction,sljit_s32 size)1535 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
1536           void *instruction, sljit_s32 size)
1537 {
1538           CHECK_ERROR();
1539           CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
1540 
1541           return push_inst(compiler, *(sljit_ins*)instruction);
1542 }
1543 
1544 /* --------------------------------------------------------------------- */
1545 /*  Floating point operators                                             */
1546 /* --------------------------------------------------------------------- */
1547 
sljit_is_fpu_available(void)1548 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void)
1549 {
1550 #ifdef SLJIT_IS_FPU_AVAILABLE
1551           return SLJIT_IS_FPU_AVAILABLE;
1552 #else
1553           /* Available by default. */
1554           return 1;
1555 #endif
1556 }
1557 
emit_fop_mem(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg,sljit_sw argw)1558 static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
1559 {
1560           sljit_u32 shift = MEM_SIZE_SHIFT(flags);
1561           sljit_ins ins_bits = (shift << 30);
1562           sljit_s32 other_r;
1563           sljit_sw diff;
1564 
1565           SLJIT_ASSERT(arg & SLJIT_MEM);
1566 
1567           if (!(flags & STORE))
1568                     ins_bits |= 1 << 22;
1569 
1570           if (arg & OFFS_REG_MASK) {
1571                     argw &= 3;
1572                     if (!argw || argw == shift)
1573                               return push_inst(compiler, STR_FR | ins_bits | VT(reg)
1574                                         | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw ? (1 << 12) : 0));
1575                     other_r = OFFS_REG(arg);
1576                     arg &= REG_MASK;
1577                     FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg) | RM(other_r) | (argw << 10)));
1578                     arg = TMP_REG1;
1579                     argw = 0;
1580           }
1581 
1582           arg &= REG_MASK;
1583           if (arg && argw >= 0 && ((argw >> shift) <= 0xfff) && (argw & ((1 << shift) - 1)) == 0)
1584                     return push_inst(compiler, STR_FI | ins_bits | VT(reg) | RN(arg) | (argw << (10 - shift)));
1585 
1586           if (arg && argw <= 255 && argw >= -256)
1587                     return push_inst(compiler, STUR_FI | ins_bits | VT(reg) | RN(arg) | ((argw & 0x1ff) << 12));
1588 
1589           /* Slow cases */
1590           if (compiler->cache_arg == SLJIT_MEM && argw != compiler->cache_argw) {
1591                     diff = argw - compiler->cache_argw;
1592                     if (!arg && diff <= 255 && diff >= -256)
1593                               return push_inst(compiler, STUR_FI | ins_bits | VT(reg) | RN(TMP_REG3) | ((diff & 0x1ff) << 12));
1594                     if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) {
1595                               FAIL_IF(compiler->error);
1596                               compiler->cache_argw = argw;
1597                     }
1598           }
1599 
1600           if (compiler->cache_arg != SLJIT_MEM || argw != compiler->cache_argw) {
1601                     compiler->cache_arg = SLJIT_MEM;
1602                     compiler->cache_argw = argw;
1603                     FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
1604           }
1605 
1606           if (arg & REG_MASK)
1607                     return push_inst(compiler, STR_FR | ins_bits | VT(reg) | RN(arg) | RM(TMP_REG3));
1608           return push_inst(compiler, STR_FI | ins_bits | VT(reg) | RN(TMP_REG3));
1609 }
1610 
sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1611 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
1612           sljit_s32 dst, sljit_sw dstw,
1613           sljit_s32 src, sljit_sw srcw)
1614 {
1615           sljit_s32 dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
1616           sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0;
1617 
1618           if (GET_OPCODE(op) == SLJIT_CONV_S32_FROM_F64)
1619                     inv_bits |= (1 << 31);
1620 
1621           if (src & SLJIT_MEM) {
1622                     emit_fop_mem(compiler, (op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE, TMP_FREG1, src, srcw);
1623                     src = TMP_FREG1;
1624           }
1625 
1626           FAIL_IF(push_inst(compiler, (FCVTZS ^ inv_bits) | RD(dst_r) | VN(src)));
1627 
1628           if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED)
1629                     return emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONV_S32_FROM_F64) ? INT_SIZE : WORD_SIZE) | STORE, TMP_REG1, dst, dstw);
1630           return SLJIT_SUCCESS;
1631 }
1632 
sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1633 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
1634           sljit_s32 dst, sljit_sw dstw,
1635           sljit_s32 src, sljit_sw srcw)
1636 {
1637           sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
1638           sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0;
1639 
1640           if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
1641                     inv_bits |= (1 << 31);
1642 
1643           if (src & SLJIT_MEM) {
1644                     emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) ? INT_SIZE : WORD_SIZE), TMP_REG1, src, srcw);
1645                     src = TMP_REG1;
1646           } else if (src & SLJIT_IMM) {
1647 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1648                     if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
1649                               srcw = (sljit_s32)srcw;
1650 #endif
1651                     FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
1652                     src = TMP_REG1;
1653           }
1654 
1655           FAIL_IF(push_inst(compiler, (SCVTF ^ inv_bits) | VD(dst_r) | RN(src)));
1656 
1657           if (dst & SLJIT_MEM)
1658                     return emit_fop_mem(compiler, ((op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE) | STORE, TMP_FREG1, dst, dstw);
1659           return SLJIT_SUCCESS;
1660 }
1661 
sljit_emit_fop1_cmp(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1662 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
1663           sljit_s32 src1, sljit_sw src1w,
1664           sljit_s32 src2, sljit_sw src2w)
1665 {
1666           sljit_s32 mem_flags = (op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE;
1667           sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0;
1668 
1669           if (src1 & SLJIT_MEM) {
1670                     emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w);
1671                     src1 = TMP_FREG1;
1672           }
1673 
1674           if (src2 & SLJIT_MEM) {
1675                     emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w);
1676                     src2 = TMP_FREG2;
1677           }
1678 
1679           return push_inst(compiler, (FCMP ^ inv_bits) | VN(src1) | VM(src2));
1680 }
1681 
sljit_emit_fop1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1682 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
1683           sljit_s32 dst, sljit_sw dstw,
1684           sljit_s32 src, sljit_sw srcw)
1685 {
1686           sljit_s32 dst_r, mem_flags = (op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE;
1687           sljit_ins inv_bits;
1688 
1689           CHECK_ERROR();
1690           compiler->cache_arg = 0;
1691           compiler->cache_argw = 0;
1692 
1693           SLJIT_COMPILE_ASSERT((INT_SIZE ^ 0x100) == WORD_SIZE, must_be_one_bit_difference);
1694           SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
1695 
1696           inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0;
1697           dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
1698 
1699           if (src & SLJIT_MEM) {
1700                     emit_fop_mem(compiler, (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) ? (mem_flags ^ 0x100) : mem_flags, dst_r, src, srcw);
1701                     src = dst_r;
1702           }
1703 
1704           switch (GET_OPCODE(op)) {
1705           case SLJIT_MOV_F64:
1706                     if (src != dst_r) {
1707                               if (dst_r != TMP_FREG1)
1708                                         FAIL_IF(push_inst(compiler, (FMOV ^ inv_bits) | VD(dst_r) | VN(src)));
1709                               else
1710                                         dst_r = src;
1711                     }
1712                     break;
1713           case SLJIT_NEG_F64:
1714                     FAIL_IF(push_inst(compiler, (FNEG ^ inv_bits) | VD(dst_r) | VN(src)));
1715                     break;
1716           case SLJIT_ABS_F64:
1717                     FAIL_IF(push_inst(compiler, (FABS ^ inv_bits) | VD(dst_r) | VN(src)));
1718                     break;
1719           case SLJIT_CONV_F64_FROM_F32:
1720                     FAIL_IF(push_inst(compiler, FCVT | ((op & SLJIT_F32_OP) ? (1 << 22) : (1 << 15)) | VD(dst_r) | VN(src)));
1721                     break;
1722           }
1723 
1724           if (dst & SLJIT_MEM)
1725                     return emit_fop_mem(compiler, mem_flags | STORE, dst_r, dst, dstw);
1726           return SLJIT_SUCCESS;
1727 }
1728 
sljit_emit_fop2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1729 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
1730           sljit_s32 dst, sljit_sw dstw,
1731           sljit_s32 src1, sljit_sw src1w,
1732           sljit_s32 src2, sljit_sw src2w)
1733 {
1734           sljit_s32 dst_r, mem_flags = (op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE;
1735           sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0;
1736 
1737           CHECK_ERROR();
1738           CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
1739           ADJUST_LOCAL_OFFSET(dst, dstw);
1740           ADJUST_LOCAL_OFFSET(src1, src1w);
1741           ADJUST_LOCAL_OFFSET(src2, src2w);
1742 
1743           compiler->cache_arg = 0;
1744           compiler->cache_argw = 0;
1745 
1746           dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
1747           if (src1 & SLJIT_MEM) {
1748                     emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w);
1749                     src1 = TMP_FREG1;
1750           }
1751           if (src2 & SLJIT_MEM) {
1752                     emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w);
1753                     src2 = TMP_FREG2;
1754           }
1755 
1756           switch (GET_OPCODE(op)) {
1757           case SLJIT_ADD_F64:
1758                     FAIL_IF(push_inst(compiler, (FADD ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2)));
1759                     break;
1760           case SLJIT_SUB_F64:
1761                     FAIL_IF(push_inst(compiler, (FSUB ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2)));
1762                     break;
1763           case SLJIT_MUL_F64:
1764                     FAIL_IF(push_inst(compiler, (FMUL ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2)));
1765                     break;
1766           case SLJIT_DIV_F64:
1767                     FAIL_IF(push_inst(compiler, (FDIV ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2)));
1768                     break;
1769           }
1770 
1771           if (!(dst & SLJIT_MEM))
1772                     return SLJIT_SUCCESS;
1773           return emit_fop_mem(compiler, mem_flags | STORE, TMP_FREG1, dst, dstw);
1774 }
1775 
1776 /* --------------------------------------------------------------------- */
1777 /*  Other instructions                                                   */
1778 /* --------------------------------------------------------------------- */
1779 
sljit_emit_fast_enter(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)1780 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
1781 {
1782           CHECK_ERROR();
1783           CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
1784           ADJUST_LOCAL_OFFSET(dst, dstw);
1785 
1786           /* For UNUSED dst. Uncommon, but possible. */
1787           if (dst == SLJIT_UNUSED)
1788                     return SLJIT_SUCCESS;
1789 
1790           if (FAST_IS_REG(dst))
1791                     return push_inst(compiler, ORR | RD(dst) | RN(TMP_ZERO) | RM(TMP_LR));
1792 
1793           /* Memory. */
1794           return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_LR, dst, dstw);
1795 }
1796 
sljit_emit_fast_return(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)1797 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
1798 {
1799           CHECK_ERROR();
1800           CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
1801           ADJUST_LOCAL_OFFSET(src, srcw);
1802 
1803           if (FAST_IS_REG(src))
1804                     FAIL_IF(push_inst(compiler, ORR | RD(TMP_LR) | RN(TMP_ZERO) | RM(src)));
1805           else if (src & SLJIT_MEM)
1806                     FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_LR, src, srcw));
1807           else if (src & SLJIT_IMM)
1808                     FAIL_IF(load_immediate(compiler, TMP_LR, srcw));
1809 
1810           return push_inst(compiler, RET | RN(TMP_LR));
1811 }
1812 
1813 /* --------------------------------------------------------------------- */
1814 /*  Conditional instructions                                             */
1815 /* --------------------------------------------------------------------- */
1816 
get_cc(sljit_s32 type)1817 static sljit_uw get_cc(sljit_s32 type)
1818 {
1819           switch (type) {
1820           case SLJIT_EQUAL:
1821           case SLJIT_MUL_NOT_OVERFLOW:
1822           case SLJIT_EQUAL_F64:
1823                     return 0x1;
1824 
1825           case SLJIT_NOT_EQUAL:
1826           case SLJIT_MUL_OVERFLOW:
1827           case SLJIT_NOT_EQUAL_F64:
1828                     return 0x0;
1829 
1830           case SLJIT_LESS:
1831           case SLJIT_LESS_F64:
1832                     return 0x2;
1833 
1834           case SLJIT_GREATER_EQUAL:
1835           case SLJIT_GREATER_EQUAL_F64:
1836                     return 0x3;
1837 
1838           case SLJIT_GREATER:
1839           case SLJIT_GREATER_F64:
1840                     return 0x9;
1841 
1842           case SLJIT_LESS_EQUAL:
1843           case SLJIT_LESS_EQUAL_F64:
1844                     return 0x8;
1845 
1846           case SLJIT_SIG_LESS:
1847                     return 0xa;
1848 
1849           case SLJIT_SIG_GREATER_EQUAL:
1850                     return 0xb;
1851 
1852           case SLJIT_SIG_GREATER:
1853                     return 0xd;
1854 
1855           case SLJIT_SIG_LESS_EQUAL:
1856                     return 0xc;
1857 
1858           case SLJIT_OVERFLOW:
1859           case SLJIT_UNORDERED_F64:
1860                     return 0x7;
1861 
1862           case SLJIT_NOT_OVERFLOW:
1863           case SLJIT_ORDERED_F64:
1864                     return 0x6;
1865 
1866           default:
1867                     SLJIT_UNREACHABLE();
1868                     return 0xe;
1869           }
1870 }
1871 
sljit_emit_label(struct sljit_compiler * compiler)1872 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
1873 {
1874           struct sljit_label *label;
1875 
1876           CHECK_ERROR_PTR();
1877           CHECK_PTR(check_sljit_emit_label(compiler));
1878 
1879           if (compiler->last_label && compiler->last_label->size == compiler->size)
1880                     return compiler->last_label;
1881 
1882           label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
1883           PTR_FAIL_IF(!label);
1884           set_label(label, compiler);
1885           return label;
1886 }
1887 
sljit_emit_jump(struct sljit_compiler * compiler,sljit_s32 type)1888 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
1889 {
1890           struct sljit_jump *jump;
1891 
1892           CHECK_ERROR_PTR();
1893           CHECK_PTR(check_sljit_emit_jump(compiler, type));
1894 
1895           jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
1896           PTR_FAIL_IF(!jump);
1897           set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
1898           type &= 0xff;
1899 
1900           if (type < SLJIT_JUMP) {
1901                     jump->flags |= IS_COND;
1902                     PTR_FAIL_IF(push_inst(compiler, B_CC | (6 << 5) | get_cc(type)));
1903           }
1904           else if (type >= SLJIT_FAST_CALL)
1905                     jump->flags |= IS_BL;
1906 
1907           PTR_FAIL_IF(emit_imm64_const(compiler, TMP_REG1, 0));
1908           jump->addr = compiler->size;
1909           PTR_FAIL_IF(push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(TMP_REG1)));
1910 
1911           return jump;
1912 }
1913 
emit_cmp_to0(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 src,sljit_sw srcw)1914 static SLJIT_INLINE struct sljit_jump* emit_cmp_to0(struct sljit_compiler *compiler, sljit_s32 type,
1915           sljit_s32 src, sljit_sw srcw)
1916 {
1917           struct sljit_jump *jump;
1918           sljit_ins inv_bits = (type & SLJIT_I32_OP) ? (1 << 31) : 0;
1919 
1920           SLJIT_ASSERT((type & 0xff) == SLJIT_EQUAL || (type & 0xff) == SLJIT_NOT_EQUAL);
1921           ADJUST_LOCAL_OFFSET(src, srcw);
1922 
1923           jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
1924           PTR_FAIL_IF(!jump);
1925           set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
1926           jump->flags |= IS_CBZ | IS_COND;
1927 
1928           if (src & SLJIT_MEM) {
1929                     PTR_FAIL_IF(emit_op_mem(compiler, inv_bits ? INT_SIZE : WORD_SIZE, TMP_REG1, src, srcw));
1930                     src = TMP_REG1;
1931           }
1932           else if (src & SLJIT_IMM) {
1933                     PTR_FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
1934                     src = TMP_REG1;
1935           }
1936           SLJIT_ASSERT(FAST_IS_REG(src));
1937 
1938           if ((type & 0xff) == SLJIT_EQUAL)
1939                     inv_bits |= 1 << 24;
1940 
1941           PTR_FAIL_IF(push_inst(compiler, (CBZ ^ inv_bits) | (6 << 5) | RT(src)));
1942           PTR_FAIL_IF(emit_imm64_const(compiler, TMP_REG1, 0));
1943           jump->addr = compiler->size;
1944           PTR_FAIL_IF(push_inst(compiler, BR | RN(TMP_REG1)));
1945           return jump;
1946 }
1947 
sljit_emit_ijump(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 src,sljit_sw srcw)1948 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
1949 {
1950           struct sljit_jump *jump;
1951 
1952           CHECK_ERROR();
1953           CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
1954           ADJUST_LOCAL_OFFSET(src, srcw);
1955 
1956           /* In ARM, we don't need to touch the arguments. */
1957           if (!(src & SLJIT_IMM)) {
1958                     if (src & SLJIT_MEM) {
1959                               FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw));
1960                               src = TMP_REG1;
1961                     }
1962                     return push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(src));
1963           }
1964 
1965           jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
1966           FAIL_IF(!jump);
1967           set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0));
1968           jump->u.target = srcw;
1969 
1970           FAIL_IF(emit_imm64_const(compiler, TMP_REG1, 0));
1971           jump->addr = compiler->size;
1972           return push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(TMP_REG1));
1973 }
1974 
sljit_emit_op_flags(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw,sljit_s32 type)1975 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
1976           sljit_s32 dst, sljit_sw dstw,
1977           sljit_s32 src, sljit_sw srcw,
1978           sljit_s32 type)
1979 {
1980           sljit_s32 dst_r, flags, mem_flags;
1981           sljit_ins cc;
1982 
1983           CHECK_ERROR();
1984           CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
1985           ADJUST_LOCAL_OFFSET(dst, dstw);
1986           ADJUST_LOCAL_OFFSET(src, srcw);
1987 
1988           if (dst == SLJIT_UNUSED)
1989                     return SLJIT_SUCCESS;
1990 
1991           cc = get_cc(type & 0xff);
1992           dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1993 
1994           if (GET_OPCODE(op) < SLJIT_ADD) {
1995                     FAIL_IF(push_inst(compiler, CSINC | (cc << 12) | RD(dst_r) | RN(TMP_ZERO) | RM(TMP_ZERO)));
1996                     if (dst_r != TMP_REG1)
1997                               return SLJIT_SUCCESS;
1998                     return emit_op_mem(compiler, (GET_OPCODE(op) == SLJIT_MOV ? WORD_SIZE : INT_SIZE) | STORE, TMP_REG1, dst, dstw);
1999           }
2000 
2001           compiler->cache_arg = 0;
2002           compiler->cache_argw = 0;
2003           flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
2004           mem_flags = WORD_SIZE;
2005           if (op & SLJIT_I32_OP) {
2006                     flags |= INT_OP;
2007                     mem_flags = INT_SIZE;
2008           }
2009 
2010           if (src & SLJIT_MEM) {
2011                     FAIL_IF(emit_op_mem2(compiler, mem_flags, TMP_REG1, src, srcw, dst, dstw));
2012                     src = TMP_REG1;
2013                     srcw = 0;
2014           } else if (src & SLJIT_IMM)
2015                     flags |= ARG1_IMM;
2016 
2017           FAIL_IF(push_inst(compiler, CSINC | (cc << 12) | RD(TMP_REG2) | RN(TMP_ZERO) | RM(TMP_ZERO)));
2018           emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src, TMP_REG2);
2019 
2020           if (dst_r != TMP_REG1)
2021                     return SLJIT_SUCCESS;
2022           return emit_op_mem2(compiler, mem_flags | STORE, TMP_REG1, dst, dstw, 0, 0);
2023 }
2024 
sljit_emit_const(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_sw init_value)2025 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
2026 {
2027           struct sljit_const *const_;
2028           sljit_s32 dst_r;
2029 
2030           CHECK_ERROR_PTR();
2031           CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
2032           ADJUST_LOCAL_OFFSET(dst, dstw);
2033 
2034           const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
2035           PTR_FAIL_IF(!const_);
2036           set_const(const_, compiler);
2037 
2038           dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
2039           PTR_FAIL_IF(emit_imm64_const(compiler, dst_r, init_value));
2040 
2041           if (dst & SLJIT_MEM)
2042                     PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw));
2043           return const_;
2044 }
2045 
sljit_set_jump_addr(sljit_uw addr,sljit_uw new_target,sljit_sw executable_offset)2046 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
2047 {
2048           sljit_ins* inst = (sljit_ins*)addr;
2049           modify_imm64_const(inst, new_target);
2050           inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
2051           SLJIT_CACHE_FLUSH(inst, inst + 4);
2052 }
2053 
sljit_set_const(sljit_uw addr,sljit_sw new_constant,sljit_sw executable_offset)2054 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
2055 {
2056           sljit_ins* inst = (sljit_ins*)addr;
2057           modify_imm64_const(inst, new_constant);
2058           inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
2059           SLJIT_CACHE_FLUSH(inst, inst + 4);
2060 }
2061