1;; ARM NEON coprocessor Machine Description
2;; Copyright (C) 2006-2022 Free Software Foundation, Inc.
3;; Written by CodeSourcery.
4;;
5;; This file is part of GCC.
6;;
7;; GCC is free software; you can redistribute it and/or modify it
8;; under the terms of the GNU General Public License as published by
9;; the Free Software Foundation; either version 3, or (at your option)
10;; any later version.
11;;
12;; GCC is distributed in the hope that it will be useful, but
13;; WITHOUT ANY WARRANTY; without even the implied warranty of
14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15;; General Public License for more details.
16;;
17;; You should have received a copy of the GNU General Public License
18;; along with GCC; see the file COPYING3.  If not see
19;; <http://www.gnu.org/licenses/>.
20
21
22;; Attribute used to permit string comparisons against <VQH_mnem> in
23;; type attribute definitions.
24(define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
25
26(define_insn "unaligned_storev8qi"
27  [(set (match_operand:V8QI 0 "memory_operand" "=Un")
28          (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")]
29                         UNSPEC_UNALIGNED_STORE))]
30  "TARGET_NEON"
31  "*
32  return output_move_neon (operands);
33  "
34  [(set_attr "type" "neon_store1_1reg")])
35
36(define_insn "*neon_mov<mode>"
37  [(set (match_operand:VDXMOV 0 "nonimmediate_operand"
38            "=w,Un,w, w, w,  ?r,?w,?r, ?Us,*r")
39          (match_operand:VDXMOV 1 "general_operand"
40            " w,w, Dm,Dn,Uni, w, r, Usi,r,*r"))]
41  "TARGET_NEON
42   && (register_operand (operands[0], <MODE>mode)
43       || register_operand (operands[1], <MODE>mode))"
44{
45  if (which_alternative == 2 || which_alternative == 3)
46    {
47      int width, is_valid;
48      static char templ[40];
49
50      is_valid = simd_immediate_valid_for_move (operands[1], <MODE>mode,
51        &operands[1], &width);
52
53      gcc_assert (is_valid != 0);
54
55      if (width == 0)
56        return "vmov.f32\t%P0, %1  @ <mode>";
57      else
58        sprintf (templ, "vmov.i%d\t%%P0, %%x1  @ <mode>", width);
59
60      return templ;
61    }
62
63  switch (which_alternative)
64    {
65    case 0: return "vmov\t%P0, %P1  @ <mode>";
66    case 1: case 4: return output_move_neon (operands);
67    case 2: case 3: gcc_unreachable ();
68    case 5: return "vmov\t%Q0, %R0, %P1  @ <mode>";
69    case 6: return "vmov\t%P0, %Q1, %R1  @ <mode>";
70    case 9: return "#";
71    default: return output_move_double (operands, true, NULL);
72    }
73}
74 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
75                    neon_move<q>,neon_load1_1reg, neon_to_gp<q>,\
76                    neon_from_gp<q>,neon_load1_2reg, neon_store1_2reg,\
77                        multiple")
78  (set_attr "length" "4,4,4,4,4,4,4,8,8,8")
79  (set_attr "arm_pool_range"     "*,*,*,*,1020,*,*,1020,*,*")
80  (set_attr "thumb2_pool_range"     "*,*,*,*,1018,*,*,1018,*,*")
81  (set_attr "neg_pool_range" "*,*,*,*,1004,*,*,1004,*,*")])
82
83(define_insn "*neon_mov<mode>"
84  [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
85            "=w,Un,w, w, w,  ?r,?w,?r,?r,  ?Us")
86          (match_operand:VQXMOV 1 "general_operand"
87            " w,w, Dm,DN,Uni, w, r, r, Usi, r"))]
88  "TARGET_NEON
89   && (register_operand (operands[0], <MODE>mode)
90       || register_operand (operands[1], <MODE>mode))"
91{
92  if (which_alternative == 2 || which_alternative == 3)
93    {
94      int width, is_valid;
95      static char templ[40];
96
97      is_valid = simd_immediate_valid_for_move (operands[1], <MODE>mode,
98        &operands[1], &width);
99
100      gcc_assert (is_valid != 0);
101
102      if (width == 0)
103        return "vmov.f32\t%q0, %1  @ <mode>";
104      else
105        sprintf (templ, "vmov.i%d\t%%q0, %%1  @ <mode>", width);
106
107      return templ;
108    }
109
110  switch (which_alternative)
111    {
112    case 0: return "vmov\t%q0, %q1  @ <mode>";
113    case 1: case 4: return output_move_neon (operands);
114    case 2: case 3: gcc_unreachable ();
115    case 5: return "vmov\t%Q0, %R0, %e1  @ <mode>\;vmov\t%J0, %K0, %f1";
116    case 6: return "vmov\t%e0, %Q1, %R1  @ <mode>\;vmov\t%f0, %J1, %K1";
117    default: return output_move_quad (operands);
118    }
119}
120  [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
121                     neon_move_q,neon_load2_2reg_q,neon_to_gp_q,\
122                     neon_from_gp_q,mov_reg,neon_load1_4reg,neon_store1_4reg")
123   (set_attr "length" "4,8,4,4,8,8,8,16,8,16")
124   (set_attr "arm_pool_range" "*,*,*,*,1020,*,*,*,1020,*")
125   (set_attr "thumb2_pool_range" "*,*,*,*,1018,*,*,*,1018,*")
126   (set_attr "neg_pool_range" "*,*,*,*,996,*,*,*,996,*")])
127
128/* We define these mov expanders to match the standard mov$a optab to prevent
129   the mid-end from trying to do a subreg for these modes which is the most
130   inefficient way to expand the move.  Also big-endian subreg's aren't
131   allowed for a subset of modes, See TARGET_CAN_CHANGE_MODE_CLASS.
132   Without these RTL generation patterns the mid-end would attempt to take a
133   sub-reg and may ICE if it can't.  */
134
135(define_expand "movti"
136  [(set (match_operand:TI 0 "nonimmediate_operand")
137          (match_operand:TI 1 "general_operand"))]
138  "TARGET_NEON"
139{
140  gcc_checking_assert (aligned_operand (operands[0], TImode));
141  gcc_checking_assert (aligned_operand (operands[1], TImode));
142  if (can_create_pseudo_p ())
143    {
144      if (!REG_P (operands[0]))
145          operands[1] = force_reg (TImode, operands[1]);
146    }
147})
148
149(define_expand "mov<mode>"
150  [(set (match_operand:VSTRUCT 0 "nonimmediate_operand")
151          (match_operand:VSTRUCT 1 "general_operand"))]
152  "TARGET_NEON || TARGET_HAVE_MVE"
153{
154  gcc_checking_assert (aligned_operand (operands[0], <MODE>mode));
155  gcc_checking_assert (aligned_operand (operands[1], <MODE>mode));
156  if (can_create_pseudo_p ())
157    {
158      if (!REG_P (operands[0]))
159          operands[1] = force_reg (<MODE>mode, operands[1]);
160    }
161})
162
163;; The pattern mov<mode> where mode is v8hf, v4hf, v4bf and v8bf are split into
164;; two groups.  The pattern movv8hf is common for MVE and NEON, so it is moved
165;; into vec-common.md file.  Remaining mov expand patterns with half float and
166;; bfloats are implemented below.
167(define_expand "mov<mode>"
168  [(set (match_operand:VHFBF_split 0 "s_register_operand")
169          (match_operand:VHFBF_split 1 "s_register_operand"))]
170  "TARGET_NEON"
171{
172  gcc_checking_assert (aligned_operand (operands[0], <MODE>mode));
173  gcc_checking_assert (aligned_operand (operands[1], <MODE>mode));
174  if (can_create_pseudo_p ())
175    {
176      if (!REG_P (operands[0]))
177          operands[1] = force_reg (<MODE>mode, operands[1]);
178    }
179})
180
181(define_insn "*neon_mov<mode>"
182  [(set (match_operand:VSTRUCT 0 "nonimmediate_operand"     "=w,Ut,w")
183          (match_operand:VSTRUCT 1 "general_operand"        " w,w, Ut"))]
184  "(TARGET_NEON || TARGET_HAVE_MVE)
185   && (register_operand (operands[0], <MODE>mode)
186       || register_operand (operands[1], <MODE>mode))"
187{
188  switch (which_alternative)
189    {
190    case 0: return "#";
191    case 1: case 2: return output_move_neon (operands);
192    default: gcc_unreachable ();
193    }
194}
195  [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
196   (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
197
198(define_split
199  [(set (match_operand:EI 0 "s_register_operand" "")
200          (match_operand:EI 1 "s_register_operand" ""))]
201  "TARGET_NEON && reload_completed"
202  [(set (match_dup 0) (match_dup 1))
203   (set (match_dup 2) (match_dup 3))]
204{
205  int rdest = REGNO (operands[0]);
206  int rsrc = REGNO (operands[1]);
207  rtx dest[2], src[2];
208
209  dest[0] = gen_rtx_REG (TImode, rdest);
210  src[0] = gen_rtx_REG (TImode, rsrc);
211  dest[1] = gen_rtx_REG (DImode, rdest + 4);
212  src[1] = gen_rtx_REG (DImode, rsrc + 4);
213
214  neon_disambiguate_copy (operands, dest, src, 2);
215})
216
217(define_split
218  [(set (match_operand:OI 0 "s_register_operand" "")
219          (match_operand:OI 1 "s_register_operand" ""))]
220  "(TARGET_NEON || TARGET_HAVE_MVE)&& reload_completed"
221  [(set (match_dup 0) (match_dup 1))
222   (set (match_dup 2) (match_dup 3))]
223{
224  int rdest = REGNO (operands[0]);
225  int rsrc = REGNO (operands[1]);
226  rtx dest[2], src[2];
227
228  dest[0] = gen_rtx_REG (TImode, rdest);
229  src[0] = gen_rtx_REG (TImode, rsrc);
230  dest[1] = gen_rtx_REG (TImode, rdest + 4);
231  src[1] = gen_rtx_REG (TImode, rsrc + 4);
232
233  neon_disambiguate_copy (operands, dest, src, 2);
234})
235
236(define_split
237  [(set (match_operand:CI 0 "s_register_operand" "")
238          (match_operand:CI 1 "s_register_operand" ""))]
239  "TARGET_NEON && reload_completed"
240  [(set (match_dup 0) (match_dup 1))
241   (set (match_dup 2) (match_dup 3))
242   (set (match_dup 4) (match_dup 5))]
243{
244  int rdest = REGNO (operands[0]);
245  int rsrc = REGNO (operands[1]);
246  rtx dest[3], src[3];
247
248  dest[0] = gen_rtx_REG (TImode, rdest);
249  src[0] = gen_rtx_REG (TImode, rsrc);
250  dest[1] = gen_rtx_REG (TImode, rdest + 4);
251  src[1] = gen_rtx_REG (TImode, rsrc + 4);
252  dest[2] = gen_rtx_REG (TImode, rdest + 8);
253  src[2] = gen_rtx_REG (TImode, rsrc + 8);
254
255  neon_disambiguate_copy (operands, dest, src, 3);
256})
257
258(define_split
259  [(set (match_operand:XI 0 "s_register_operand" "")
260          (match_operand:XI 1 "s_register_operand" ""))]
261  "(TARGET_NEON || TARGET_HAVE_MVE) && reload_completed"
262  [(set (match_dup 0) (match_dup 1))
263   (set (match_dup 2) (match_dup 3))
264   (set (match_dup 4) (match_dup 5))
265   (set (match_dup 6) (match_dup 7))]
266{
267  int rdest = REGNO (operands[0]);
268  int rsrc = REGNO (operands[1]);
269  rtx dest[4], src[4];
270
271  dest[0] = gen_rtx_REG (TImode, rdest);
272  src[0] = gen_rtx_REG (TImode, rsrc);
273  dest[1] = gen_rtx_REG (TImode, rdest + 4);
274  src[1] = gen_rtx_REG (TImode, rsrc + 4);
275  dest[2] = gen_rtx_REG (TImode, rdest + 8);
276  src[2] = gen_rtx_REG (TImode, rsrc + 8);
277  dest[3] = gen_rtx_REG (TImode, rdest + 12);
278  src[3] = gen_rtx_REG (TImode, rsrc + 12);
279
280  neon_disambiguate_copy (operands, dest, src, 4);
281})
282
283(define_insn "*movmisalign<mode>_neon_store"
284  [(set (match_operand:VDX 0 "neon_permissive_struct_operand"         "=Um")
285          (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
286                        UNSPEC_MISALIGNED_ACCESS))]
287  "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
288  "vst1.<V_sz_elem>\t{%P1}, %A0"
289  [(set_attr "type" "neon_store1_1reg<q>")])
290
291(define_insn "*movmisalign<mode>_neon_load"
292  [(set (match_operand:VDX 0 "s_register_operand"                     "=w")
293          (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
294                                                                                          " Um")]
295                        UNSPEC_MISALIGNED_ACCESS))]
296  "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
297  "vld1.<V_sz_elem>\t{%P0}, %A1"
298  [(set_attr "type" "neon_load1_1reg<q>")])
299
300(define_insn "*movmisalign<mode>_neon_store"
301  [(set (match_operand:VQX 0 "neon_permissive_struct_operand"  "=Um")
302          (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
303                        UNSPEC_MISALIGNED_ACCESS))]
304  "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
305  "vst1.<V_sz_elem>\t{%q1}, %A0"
306  [(set_attr "type" "neon_store1_1reg<q>")])
307
308(define_insn "*movmisalign<mode>_neon_load"
309  [(set (match_operand:VQX 0 "s_register_operand"                     "=w")
310          (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
311                                                                                          " Um")]
312                        UNSPEC_MISALIGNED_ACCESS))]
313  "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
314  "vld1.<V_sz_elem>\t{%q0}, %A1"
315  [(set_attr "type" "neon_load1_1reg<q>")])
316
317(define_insn "@vec_set<mode>_internal"
318  [(set (match_operand:VD_LANE 0 "s_register_operand" "=w,w")
319        (vec_merge:VD_LANE
320          (vec_duplicate:VD_LANE
321            (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
322          (match_operand:VD_LANE 3 "s_register_operand" "0,0")
323          (match_operand:SI 2 "immediate_operand" "i,i")))]
324  "TARGET_NEON"
325{
326  int elt = ffs ((int) INTVAL (operands[2])) - 1;
327  if (BYTES_BIG_ENDIAN)
328    elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
329  operands[2] = GEN_INT (elt);
330
331  if (which_alternative == 0)
332    return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
333  else
334    return "vmov.<V_sz_elem>\t%P0[%c2], %1";
335}
336  [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
337
338(define_insn "@vec_set<mode>_internal"
339  [(set (match_operand:VQ2 0 "s_register_operand" "=w,w")
340        (vec_merge:VQ2
341          (vec_duplicate:VQ2
342            (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
343          (match_operand:VQ2 3 "s_register_operand" "0,0")
344          (match_operand:SI 2 "immediate_operand" "i,i")))]
345  "TARGET_NEON"
346{
347  HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
348  int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
349  int elt = elem % half_elts;
350  int hi = (elem / half_elts) * 2;
351  int regno = REGNO (operands[0]);
352
353  if (BYTES_BIG_ENDIAN)
354    elt = half_elts - 1 - elt;
355
356  operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
357  operands[2] = GEN_INT (elt);
358
359  if (which_alternative == 0)
360    return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
361  else
362    return "vmov.<V_sz_elem>\t%P0[%c2], %1";
363}
364  [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
365)
366
367(define_insn "@vec_set<mode>_internal"
368  [(set (match_operand:V2DI_ONLY 0 "s_register_operand" "=w,w")
369        (vec_merge:V2DI_ONLY
370          (vec_duplicate:V2DI_ONLY
371            (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
372          (match_operand:V2DI_ONLY 3 "s_register_operand" "0,0")
373          (match_operand:SI 2 "immediate_operand" "i,i")))]
374  "TARGET_NEON"
375{
376  HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
377  int regno = REGNO (operands[0]) + 2 * elem;
378
379  operands[0] = gen_rtx_REG (DImode, regno);
380
381  if (which_alternative == 0)
382    return "vld1.64\t%P0, %A1";
383  else
384    return "vmov\t%P0, %Q1, %R1";
385}
386  [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
387)
388
389(define_insn "vec_extract<mode><V_elem_l>"
390  [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
391        (vec_select:<V_elem>
392          (match_operand:VD_LANE 1 "s_register_operand" "w,w")
393          (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
394  "TARGET_NEON"
395{
396  if (BYTES_BIG_ENDIAN)
397    {
398      int elt = INTVAL (operands[2]);
399      elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
400      operands[2] = GEN_INT (elt);
401    }
402
403  if (which_alternative == 0)
404    return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
405  else
406    return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
407}
408  [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
409)
410
411;; This pattern is renamed from "vec_extract<mode><V_elem_l>" to
412;; "neon_vec_extract<mode><V_elem_l>" and this pattern is called
413;; by define_expand in vec-common.md file.
414(define_insn "neon_vec_extract<mode><V_elem_l>"
415  [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
416          (vec_select:<V_elem>
417          (match_operand:VQ2 1 "s_register_operand" "w,w")
418          (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
419  "TARGET_NEON"
420{
421  int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
422  int elt = INTVAL (operands[2]) % half_elts;
423  int hi = (INTVAL (operands[2]) / half_elts) * 2;
424  int regno = REGNO (operands[1]);
425
426  if (BYTES_BIG_ENDIAN)
427    elt = half_elts - 1 - elt;
428
429  operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
430  operands[2] = GEN_INT (elt);
431
432  if (which_alternative == 0)
433    return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
434  else
435    return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
436}
437  [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
438)
439
440;; This pattern is renamed from "vec_extractv2didi" to "neon_vec_extractv2didi"
441;; and this pattern is called by define_expand in vec-common.md file.
442(define_insn "neon_vec_extractv2didi"
443  [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
444          (vec_select:DI
445          (match_operand:V2DI 1 "s_register_operand" "w,w")
446          (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
447  "TARGET_NEON"
448{
449  int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
450
451  operands[1] = gen_rtx_REG (DImode, regno);
452
453  if (which_alternative == 0)
454    return "vst1.64\t{%P1}, %A0  @ v2di";
455  else
456    return "vmov\t%Q0, %R0, %P1  @ v2di";
457}
458  [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
459)
460
461;; Doubleword and quadword arithmetic.
462
463;; NOTE: some other instructions also support 64-bit integer
464;; element size, which we could potentially use for "long long" operations.
465
466(define_insn "*add<mode>3_neon"
467  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
468        (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
469                      (match_operand:VDQ 2 "s_register_operand" "w")))]
470  "ARM_HAVE_NEON_<MODE>_ARITH"
471  "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
472  [(set (attr "type")
473      (if_then_else (match_test "<Is_float_mode>")
474                    (const_string "neon_fp_addsub_s<q>")
475                    (const_string "neon_add<q>")))]
476)
477
478(define_insn "*sub<mode>3_neon"
479  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
480        (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
481                   (match_operand:VDQ 2 "s_register_operand" "w")))]
482  "ARM_HAVE_NEON_<MODE>_ARITH"
483  "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
484  [(set (attr "type")
485      (if_then_else (match_test "<Is_float_mode>")
486                    (const_string "neon_fp_addsub_s<q>")
487                    (const_string "neon_sub<q>")))]
488)
489
490(define_insn "*mul<mode>3_neon"
491  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
492        (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
493                   (match_operand:VDQW 2 "s_register_operand" "w")))]
494  "ARM_HAVE_NEON_<MODE>_ARITH"
495  "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
496  [(set (attr "type")
497      (if_then_else (match_test "<Is_float_mode>")
498                        (const_string "neon_fp_mul_s<q>")
499                    (const_string "neon_mul_<V_elem_ch><q>")))]
500)
501
502/* Perform division using multiply-by-reciprocal.
503   Reciprocal is calculated using Newton-Raphson method.
504   Enabled with -funsafe-math-optimizations -freciprocal-math
505   and disabled for -Os since it increases code size .  */
506
507(define_expand "div<VCVTF:mode>3"
508  [(set (match_operand:VCVTF 0 "s_register_operand")
509        (div:VCVTF (match_operand:VCVTF 1 "s_register_operand")
510                      (match_operand:VCVTF 2 "s_register_operand")))]
511  "ARM_HAVE_NEON_<MODE>_ARITH && !optimize_size
512   && flag_reciprocal_math"
513  {
514    rtx rec = gen_reg_rtx (<MODE>mode);
515    rtx vrecps_temp = gen_reg_rtx (<MODE>mode);
516
517    /* Reciprocal estimate.  */
518    emit_insn (gen_neon_vrecpe<mode> (rec, operands[2]));
519
520    /* Perform 2 iterations of newton-raphson method.  */
521    for (int i = 0; i < 2; i++)
522      {
523          emit_insn (gen_neon_vrecps<mode> (vrecps_temp, rec, operands[2]));
524          emit_insn (gen_mul<mode>3 (rec, rec, vrecps_temp));
525      }
526
527    /* We now have reciprocal in rec, perform operands[0] = operands[1] * rec.  */
528    emit_insn (gen_mul<mode>3 (operands[0], operands[1], rec));
529    DONE;
530  }
531)
532
533
534(define_insn "mul<mode>3add<mode>_neon"
535  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
536        (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
537                            (match_operand:VDQW 3 "s_register_operand" "w"))
538                      (match_operand:VDQW 1 "s_register_operand" "0")))]
539  "ARM_HAVE_NEON_<MODE>_ARITH"
540  "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
541  [(set (attr "type")
542      (if_then_else (match_test "<Is_float_mode>")
543                        (const_string "neon_fp_mla_s<q>")
544                        (const_string "neon_mla_<V_elem_ch><q>")))]
545)
546
547(define_insn "mul<mode>3add<mode>_neon"
548  [(set (match_operand:VH 0 "s_register_operand" "=w")
549          (plus:VH (mult:VH (match_operand:VH 2 "s_register_operand" "w")
550                                (match_operand:VH 3 "s_register_operand" "w"))
551                      (match_operand:VH 1 "s_register_operand" "0")))]
552  "ARM_HAVE_NEON_<MODE>_ARITH"
553  "vmla.f16\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
554  [(set_attr "type" "neon_fp_mla_s<q>")]
555)
556
557(define_insn "mul<mode>3neg<mode>add<mode>_neon"
558  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
559        (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
560                    (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
561                               (match_operand:VDQW 3 "s_register_operand" "w"))))]
562  "ARM_HAVE_NEON_<MODE>_ARITH"
563  "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
564  [(set (attr "type")
565      (if_then_else (match_test "<Is_float_mode>")
566                        (const_string "neon_fp_mla_s<q>")
567                        (const_string "neon_mla_<V_elem_ch><q>")))]
568)
569
570;; Fused multiply-accumulate
571;; We define each insn twice here:
572;;    1: with flag_unsafe_math_optimizations for the widening multiply phase
573;;       to be able to use when converting to FMA.
574;;    2: without flag_unsafe_math_optimizations for the intrinsics to use.
575(define_insn "fma<VCVTF:mode>4"
576  [(set (match_operand:VCVTF 0 "register_operand" "=w")
577        (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
578                     (match_operand:VCVTF 2 "register_operand" "w")
579                     (match_operand:VCVTF 3 "register_operand" "0")))]
580  "ARM_HAVE_NEON_<MODE>_ARITH && TARGET_FMA"
581  "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
582  [(set_attr "type" "neon_fp_mla_s<q>")]
583)
584
585(define_insn "fma<VCVTF:mode>4_intrinsic"
586  [(set (match_operand:VCVTF 0 "register_operand" "=w")
587        (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
588                     (match_operand:VCVTF 2 "register_operand" "w")
589                     (match_operand:VCVTF 3 "register_operand" "0")))]
590  "TARGET_NEON && TARGET_FMA"
591  "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
592  [(set_attr "type" "neon_fp_mla_s<q>")]
593)
594
595(define_insn "fma<VH:mode>4"
596 [(set (match_operand:VH 0 "register_operand" "=w")
597   (fma:VH
598    (match_operand:VH 1 "register_operand" "w")
599    (match_operand:VH 2 "register_operand" "w")
600    (match_operand:VH 3 "register_operand" "0")))]
601 "ARM_HAVE_NEON_<MODE>_ARITH"
602 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
603 [(set_attr "type" "neon_fp_mla_s<q>")]
604)
605
606(define_insn "*fmsub<VCVTF:mode>4"
607  [(set (match_operand:VCVTF 0 "register_operand" "=w")
608        (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
609                       (match_operand:VCVTF 2 "register_operand" "w")
610                       (match_operand:VCVTF 3 "register_operand" "0")))]
611  "ARM_HAVE_NEON_<MODE>_ARITH && TARGET_FMA"
612  "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
613  [(set_attr "type" "neon_fp_mla_s<q>")]
614)
615
616(define_insn "fmsub<VCVTF:mode>4_intrinsic"
617 [(set (match_operand:VCVTF 0 "register_operand" "=w")
618   (fma:VCVTF
619    (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
620    (match_operand:VCVTF 2 "register_operand" "w")
621    (match_operand:VCVTF 3 "register_operand" "0")))]
622 "TARGET_NEON && TARGET_FMA"
623 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
624 [(set_attr "type" "neon_fp_mla_s<q>")]
625)
626
627(define_insn "fmsub<VH:mode>4_intrinsic"
628 [(set (match_operand:VH 0 "register_operand" "=w")
629   (fma:VH
630    (neg:VH (match_operand:VH 1 "register_operand" "w"))
631    (match_operand:VH 2 "register_operand" "w")
632    (match_operand:VH 3 "register_operand" "0")))]
633 "TARGET_NEON_FP16INST"
634 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
635 [(set_attr "type" "neon_fp_mla_s<q>")]
636)
637
638(define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
639  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
640        (unspec:VCVTF [(match_operand:VCVTF 1
641                             "s_register_operand" "w")]
642                    NEON_VRINT))]
643  "TARGET_NEON && TARGET_VFP5"
644  "vrint<nvrint_variant>.f32\\t%<V_reg>0, %<V_reg>1"
645  [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
646)
647
648(define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
649  [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
650          (FIXUORS:<V_cmp_result> (unspec:VCVTF
651                                     [(match_operand:VCVTF 1 "register_operand" "w")]
652                                     NEON_VCVT)))]
653  "TARGET_NEON && TARGET_VFP5"
654  "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
655  [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
656   (set_attr "predicable" "no")]
657)
658
659(define_insn "ior<mode>3_neon"
660  [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
661          (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
662                     (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
663  "TARGET_NEON"
664{
665  switch (which_alternative)
666    {
667    case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
668    case 1: return neon_output_logic_immediate ("vorr", &operands[2],
669                         <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
670    default: gcc_unreachable ();
671    }
672}
673  [(set_attr "type" "neon_logic<q>")]
674)
675
676;; The concrete forms of the Neon immediate-logic instructions are vbic and
677;; vorr. We support the pseudo-instruction vand instead, because that
678;; corresponds to the canonical form the middle-end expects to use for
679;; immediate bitwise-ANDs.
680
681(define_insn "and<mode>3_neon"
682  [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
683          (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
684                     (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
685  "TARGET_NEON"
686{
687  switch (which_alternative)
688    {
689    case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
690    case 1: return neon_output_logic_immediate ("vand", &operands[2],
691                         <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
692    default: gcc_unreachable ();
693    }
694}
695  [(set_attr "type" "neon_logic<q>")]
696)
697
698(define_insn "orn<mode>3_neon"
699  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
700          (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
701                     (match_operand:VDQ 1 "s_register_operand" "w")))]
702  "TARGET_NEON"
703  "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
704  [(set_attr "type" "neon_logic<q>")]
705)
706
707(define_insn "bic<mode>3_neon"
708  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
709          (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
710                     (match_operand:VDQ 1 "s_register_operand" "w")))]
711  "TARGET_NEON"
712  "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
713  [(set_attr "type" "neon_logic<q>")]
714)
715
716(define_insn "xor<mode>3_neon"
717  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
718          (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
719                     (match_operand:VDQ 2 "s_register_operand" "w")))]
720  "TARGET_NEON"
721  "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
722  [(set_attr "type" "neon_logic<q>")]
723)
724
725(define_insn "one_cmpl<mode>2_neon"
726  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
727        (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
728  "TARGET_NEON"
729  "vmvn\t%<V_reg>0, %<V_reg>1"
730  [(set_attr "type" "neon_move<q>")]
731)
732
733(define_insn "neon_abs<mode>2"
734  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
735          (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
736  "TARGET_NEON"
737  "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
738  [(set (attr "type")
739      (if_then_else (match_test "<Is_float_mode>")
740                    (const_string "neon_fp_abs_s<q>")
741                    (const_string "neon_abs<q>")))]
742)
743
744(define_insn "neon_neg<mode>2"
745  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
746          (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
747  "TARGET_NEON"
748  "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
749  [(set (attr "type")
750      (if_then_else (match_test "<Is_float_mode>")
751                    (const_string "neon_fp_neg_s<q>")
752                    (const_string "neon_neg<q>")))]
753)
754
755(define_insn "neon_<absneg_str><mode>2"
756  [(set (match_operand:VH 0 "s_register_operand" "=w")
757    (ABSNEG:VH (match_operand:VH 1 "s_register_operand" "w")))]
758 "TARGET_NEON_FP16INST"
759 "v<absneg_str>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
760 [(set_attr "type" "neon_abs<q>")]
761)
762
763(define_expand "neon_v<absneg_str><mode>"
764 [(set
765   (match_operand:VH 0 "s_register_operand")
766   (ABSNEG:VH (match_operand:VH 1 "s_register_operand")))]
767 "TARGET_NEON_FP16INST"
768{
769  emit_insn (gen_neon_<absneg_str><mode>2 (operands[0], operands[1]));
770  DONE;
771})
772
773(define_insn "neon_v<fp16_rnd_str><mode>"
774  [(set (match_operand:VH 0 "s_register_operand" "=w")
775    (unspec:VH
776     [(match_operand:VH 1 "s_register_operand" "w")]
777     FP16_RND))]
778 "TARGET_NEON_FP16INST"
779 "<fp16_rnd_insn>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
780 [(set_attr "type" "neon_fp_round_s<q>")]
781)
782
783(define_insn "neon_vrsqrte<mode>"
784  [(set (match_operand:VH 0 "s_register_operand" "=w")
785    (unspec:VH
786     [(match_operand:VH 1 "s_register_operand" "w")]
787     UNSPEC_VRSQRTE))]
788  "TARGET_NEON_FP16INST"
789  "vrsqrte.f16\t%<V_reg>0, %<V_reg>1"
790 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
791)
792
793(define_insn "*umin<mode>3_neon"
794  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
795          (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
796                        (match_operand:VDQIW 2 "s_register_operand" "w")))]
797  "TARGET_NEON"
798  "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
799  [(set_attr "type" "neon_minmax<q>")]
800)
801
802(define_insn "*umax<mode>3_neon"
803  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
804          (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
805                        (match_operand:VDQIW 2 "s_register_operand" "w")))]
806  "TARGET_NEON"
807  "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
808  [(set_attr "type" "neon_minmax<q>")]
809)
810
811(define_insn "*smin<mode>3_neon"
812  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
813          (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
814                       (match_operand:VDQW 2 "s_register_operand" "w")))]
815  "TARGET_NEON"
816  "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
817  [(set (attr "type")
818      (if_then_else (match_test "<Is_float_mode>")
819                    (const_string "neon_fp_minmax_s<q>")
820                    (const_string "neon_minmax<q>")))]
821)
822
823(define_insn "*smax<mode>3_neon"
824  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
825          (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
826                       (match_operand:VDQW 2 "s_register_operand" "w")))]
827  "TARGET_NEON"
828  "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
829  [(set (attr "type")
830      (if_then_else (match_test "<Is_float_mode>")
831                    (const_string "neon_fp_minmax_s<q>")
832                    (const_string "neon_minmax<q>")))]
833)
834
835; TODO: V2DI shifts are current disabled because there are bugs in the
836; generic vectorizer code.  It ends up creating a V2DI constructor with
837; SImode elements.
838
839(define_insn "vashr<mode>3_imm"
840  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
841          (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
842                              (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dm")))]
843  "TARGET_NEON"
844  {
845    return neon_output_shift_immediate ("vshr", 's', &operands[2],
846                                                  <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
847                                                  false);
848  }
849  [(set_attr "type" "neon_shift_imm<q>")]
850)
851
852(define_insn "vlshr<mode>3_imm"
853  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
854          (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
855                              (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dm")))]
856  "TARGET_NEON"
857  {
858    return neon_output_shift_immediate ("vshr", 'u', &operands[2],
859                                                  <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
860                                                  false);
861  }
862  [(set_attr "type" "neon_shift_imm<q>")]
863)
864
865; Used for implementing logical shift-right, which is a left-shift by a negative
866; amount, with signed operands. This is essentially the same as ashl<mode>3
867; above, but using an unspec in case GCC tries anything tricky with negative
868; shift amounts.
869
870(define_insn "ashl<mode>3_signed"
871  [(set (match_operand:VDQI 0 "s_register_operand" "=w")
872          (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
873                          (match_operand:VDQI 2 "s_register_operand" "w")]
874                         UNSPEC_ASHIFT_SIGNED))]
875  "TARGET_NEON"
876  "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
877  [(set_attr "type" "neon_shift_reg<q>")]
878)
879
880; Used for implementing logical shift-right, which is a left-shift by a negative
881; amount, with unsigned operands.
882
883(define_insn "ashl<mode>3_unsigned"
884  [(set (match_operand:VDQI 0 "s_register_operand" "=w")
885          (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
886                          (match_operand:VDQI 2 "s_register_operand" "w")]
887                         UNSPEC_ASHIFT_UNSIGNED))]
888  "TARGET_NEON"
889  "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
890  [(set_attr "type" "neon_shift_reg<q>")]
891)
892
893;; 64-bit shifts
894
895;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
896;; leaving the upper half uninitalized.  This is OK since the shift
897;; instruction only looks at the low 8 bits anyway.  To avoid confusing
898;; data flow analysis however, we pretend the full register is set
899;; using an unspec.
900(define_insn "neon_load_count"
901  [(set (match_operand:DI 0 "s_register_operand" "=w,w")
902        (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
903                   UNSPEC_LOAD_COUNT))]
904  "TARGET_NEON"
905  "@
906   vld1.32\t{%P0[0]}, %A1
907   vmov.32\t%P0[0], %1"
908  [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
909)
910
911;; Widening operations
912
913(define_expand "widen_ssum<mode>3"
914  [(set (match_operand:<V_double_width> 0 "s_register_operand")
915          (plus:<V_double_width>
916           (sign_extend:<V_double_width>
917            (match_operand:VQI 1 "s_register_operand"))
918           (match_operand:<V_double_width> 2 "s_register_operand")))]
919  "TARGET_NEON"
920  {
921    machine_mode mode = GET_MODE (operands[1]);
922    rtx p1, p2;
923
924    p1  = arm_simd_vect_par_cnst_half (mode, false);
925    p2  = arm_simd_vect_par_cnst_half (mode, true);
926
927    if (operands[0] != operands[2])
928      emit_move_insn (operands[0], operands[2]);
929
930    emit_insn (gen_vec_sel_widen_ssum_lo<mode><V_half>3 (operands[0],
931                                                                       operands[1],
932                                                                       p1,
933                                                                       operands[0]));
934    emit_insn (gen_vec_sel_widen_ssum_hi<mode><V_half>3 (operands[0],
935                                                                       operands[1],
936                                                                       p2,
937                                                                       operands[0]));
938    DONE;
939  }
940)
941
942(define_insn "vec_sel_widen_ssum_lo<mode><V_half>3"
943  [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
944          (plus:<V_double_width>
945           (sign_extend:<V_double_width>
946            (vec_select:<V_HALF>
947             (match_operand:VQI 1 "s_register_operand" "%w")
948             (match_operand:VQI 2 "vect_par_constant_low" "")))
949           (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
950  "TARGET_NEON"
951{
952  return BYTES_BIG_ENDIAN ?  "vaddw.<V_s_elem>\t%q0, %q3, %f1" :
953    "vaddw.<V_s_elem>\t%q0, %q3, %e1";
954}
955  [(set_attr "type" "neon_add_widen")])
956
957(define_insn "vec_sel_widen_ssum_hi<mode><V_half>3"
958  [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
959          (plus:<V_double_width>
960           (sign_extend:<V_double_width>
961            (vec_select:<V_HALF>
962                               (match_operand:VQI 1 "s_register_operand" "%w")
963                               (match_operand:VQI 2 "vect_par_constant_high" "")))
964           (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
965  "TARGET_NEON"
966{
967  return BYTES_BIG_ENDIAN ?  "vaddw.<V_s_elem>\t%q0, %q3, %e1" :
968    "vaddw.<V_s_elem>\t%q0, %q3, %f1";
969}
970  [(set_attr "type" "neon_add_widen")])
971
972(define_insn "widen_ssum<mode>3"
973  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
974          (plus:<V_widen>
975           (sign_extend:<V_widen>
976            (match_operand:VW 1 "s_register_operand" "%w"))
977           (match_operand:<V_widen> 2 "s_register_operand" "w")))]
978  "TARGET_NEON"
979  "vaddw.<V_s_elem>\t%q0, %q2, %P1"
980  [(set_attr "type" "neon_add_widen")]
981)
982
983(define_expand "widen_usum<mode>3"
984  [(set (match_operand:<V_double_width> 0 "s_register_operand")
985          (plus:<V_double_width>
986           (zero_extend:<V_double_width>
987            (match_operand:VQI 1 "s_register_operand"))
988           (match_operand:<V_double_width> 2 "s_register_operand")))]
989  "TARGET_NEON"
990  {
991    machine_mode mode = GET_MODE (operands[1]);
992    rtx p1, p2;
993
994    p1  = arm_simd_vect_par_cnst_half (mode, false);
995    p2  = arm_simd_vect_par_cnst_half (mode, true);
996
997    if (operands[0] != operands[2])
998      emit_move_insn (operands[0], operands[2]);
999
1000    emit_insn (gen_vec_sel_widen_usum_lo<mode><V_half>3 (operands[0],
1001                                                                       operands[1],
1002                                                                       p1,
1003                                                                       operands[0]));
1004    emit_insn (gen_vec_sel_widen_usum_hi<mode><V_half>3 (operands[0],
1005                                                                       operands[1],
1006                                                                       p2,
1007                                                                       operands[0]));
1008    DONE;
1009  }
1010)
1011
1012(define_insn "vec_sel_widen_usum_lo<mode><V_half>3"
1013  [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1014          (plus:<V_double_width>
1015           (zero_extend:<V_double_width>
1016            (vec_select:<V_HALF>
1017             (match_operand:VQI 1 "s_register_operand" "%w")
1018             (match_operand:VQI 2 "vect_par_constant_low" "")))
1019           (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1020  "TARGET_NEON"
1021{
1022  return BYTES_BIG_ENDIAN ?  "vaddw.<V_u_elem>\t%q0, %q3, %f1" :
1023    "vaddw.<V_u_elem>\t%q0, %q3, %e1";
1024}
1025  [(set_attr "type" "neon_add_widen")])
1026
1027(define_insn "vec_sel_widen_usum_hi<mode><V_half>3"
1028  [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1029          (plus:<V_double_width>
1030           (zero_extend:<V_double_width>
1031            (vec_select:<V_HALF>
1032                               (match_operand:VQI 1 "s_register_operand" "%w")
1033                               (match_operand:VQI 2 "vect_par_constant_high" "")))
1034           (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1035  "TARGET_NEON"
1036{
1037 return BYTES_BIG_ENDIAN ?  "vaddw.<V_u_elem>\t%q0, %q3, %e1" :
1038    "vaddw.<V_u_elem>\t%q0, %q3, %f1";
1039}
1040  [(set_attr "type" "neon_add_widen")])
1041
1042(define_insn "widen_usum<mode>3"
1043  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1044          (plus:<V_widen> (zero_extend:<V_widen>
1045                                (match_operand:VW 1 "s_register_operand" "%w"))
1046                            (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1047  "TARGET_NEON"
1048  "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1049  [(set_attr "type" "neon_add_widen")]
1050)
1051
1052;; Helpers for quad-word reduction operations
1053
1054; Add (or smin, smax...) the low N/2 elements of the N-element vector
1055; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1056; N/2-element vector.
1057
1058(define_insn "quad_halves_<code>v4si"
1059  [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1060        (VQH_OPS:V2SI
1061          (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1062                           (parallel [(const_int 0) (const_int 1)]))
1063          (vec_select:V2SI (match_dup 1)
1064                           (parallel [(const_int 2) (const_int 3)]))))]
1065  "TARGET_NEON"
1066  "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1067  [(set_attr "vqh_mnem" "<VQH_mnem>")
1068   (set_attr "type" "neon_reduc_<VQH_type>_q")]
1069)
1070
1071(define_insn "quad_halves_<code>v4sf"
1072  [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1073        (VQHS_OPS:V2SF
1074          (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1075                           (parallel [(const_int 0) (const_int 1)]))
1076          (vec_select:V2SF (match_dup 1)
1077                           (parallel [(const_int 2) (const_int 3)]))))]
1078  "ARM_HAVE_NEON_V4SF_ARITH"
1079  "<VQH_mnem>.f32\t%P0, %e1, %f1"
1080  [(set_attr "vqh_mnem" "<VQH_mnem>")
1081   (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1082)
1083
1084(define_insn "quad_halves_<code>v8hi"
1085  [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1086        (VQH_OPS:V4HI
1087          (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1088                           (parallel [(const_int 0) (const_int 1)
1089                                              (const_int 2) (const_int 3)]))
1090          (vec_select:V4HI (match_dup 1)
1091                           (parallel [(const_int 4) (const_int 5)
1092                                              (const_int 6) (const_int 7)]))))]
1093  "TARGET_NEON"
1094  "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1095  [(set_attr "vqh_mnem" "<VQH_mnem>")
1096   (set_attr "type" "neon_reduc_<VQH_type>_q")]
1097)
1098
1099(define_insn "quad_halves_<code>v16qi"
1100  [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1101        (VQH_OPS:V8QI
1102          (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1103                           (parallel [(const_int 0) (const_int 1)
1104                                              (const_int 2) (const_int 3)
1105                                              (const_int 4) (const_int 5)
1106                                              (const_int 6) (const_int 7)]))
1107          (vec_select:V8QI (match_dup 1)
1108                           (parallel [(const_int 8) (const_int 9)
1109                                              (const_int 10) (const_int 11)
1110                                              (const_int 12) (const_int 13)
1111                                              (const_int 14) (const_int 15)]))))]
1112  "TARGET_NEON"
1113  "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1114  [(set_attr "vqh_mnem" "<VQH_mnem>")
1115   (set_attr "type" "neon_reduc_<VQH_type>_q")]
1116)
1117
1118(define_expand "move_hi_quad_<mode>"
1119 [(match_operand:ANY128 0 "s_register_operand")
1120  (match_operand:<V_HALF> 1 "s_register_operand")]
1121 "TARGET_NEON"
1122{
1123  emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1124                                               GET_MODE_SIZE (<V_HALF>mode)),
1125                      operands[1]);
1126  DONE;
1127})
1128
1129(define_expand "move_lo_quad_<mode>"
1130 [(match_operand:ANY128 0 "s_register_operand")
1131  (match_operand:<V_HALF> 1 "s_register_operand")]
1132 "TARGET_NEON"
1133{
1134  emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1135                                               <MODE>mode, 0),
1136                      operands[1]);
1137  DONE;
1138})
1139
1140;; Reduction operations
1141
1142(define_expand "reduc_plus_scal_<mode>"
1143  [(match_operand:<V_elem> 0 "nonimmediate_operand")
1144   (match_operand:VD 1 "s_register_operand")]
1145  "ARM_HAVE_NEON_<MODE>_ARITH"
1146{
1147  rtx vec = gen_reg_rtx (<MODE>mode);
1148  neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1149                              &gen_neon_vpadd_internal<mode>);
1150  /* The same result is actually computed into every element.  */
1151  emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1152  DONE;
1153})
1154
1155(define_expand "reduc_plus_scal_v2di"
1156  [(match_operand:DI 0 "nonimmediate_operand")
1157   (match_operand:V2DI 1 "s_register_operand")]
1158  "TARGET_NEON && !BYTES_BIG_ENDIAN"
1159{
1160  rtx vec = gen_reg_rtx (V2DImode);
1161
1162  emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
1163  emit_insn (gen_vec_extractv2didi (operands[0], vec, const0_rtx));
1164
1165  DONE;
1166})
1167
1168(define_insn "arm_reduc_plus_internal_v2di"
1169  [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1170          (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1171                         UNSPEC_VPADD))]
1172  "TARGET_NEON && !BYTES_BIG_ENDIAN"
1173  "vadd.i64\t%e0, %e1, %f1"
1174  [(set_attr "type" "neon_add_q")]
1175)
1176
1177(define_expand "reduc_smin_scal_<mode>"
1178  [(match_operand:<V_elem> 0 "nonimmediate_operand")
1179   (match_operand:VD 1 "s_register_operand")]
1180  "ARM_HAVE_NEON_<MODE>_ARITH"
1181{
1182  rtx vec = gen_reg_rtx (<MODE>mode);
1183
1184  neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1185                              &gen_neon_vpsmin<mode>);
1186  /* The result is computed into every element of the vector.  */
1187  emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1188  DONE;
1189})
1190
1191(define_expand "reduc_smin_scal_<mode>"
1192  [(match_operand:<V_elem> 0 "nonimmediate_operand")
1193   (match_operand:VQ 1 "s_register_operand")]
1194  "ARM_HAVE_NEON_<MODE>_ARITH && !BYTES_BIG_ENDIAN"
1195{
1196  rtx step1 = gen_reg_rtx (<V_HALF>mode);
1197
1198  emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1199  emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
1200
1201  DONE;
1202})
1203
1204(define_expand "reduc_smax_scal_<mode>"
1205  [(match_operand:<V_elem> 0 "nonimmediate_operand")
1206   (match_operand:VD 1 "s_register_operand")]
1207  "ARM_HAVE_NEON_<MODE>_ARITH"
1208{
1209  rtx vec = gen_reg_rtx (<MODE>mode);
1210  neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1211                              &gen_neon_vpsmax<mode>);
1212  /* The result is computed into every element of the vector.  */
1213  emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1214  DONE;
1215})
1216
1217(define_expand "reduc_smax_scal_<mode>"
1218  [(match_operand:<V_elem> 0 "nonimmediate_operand")
1219   (match_operand:VQ 1 "s_register_operand")]
1220  "ARM_HAVE_NEON_<MODE>_ARITH && !BYTES_BIG_ENDIAN"
1221{
1222  rtx step1 = gen_reg_rtx (<V_HALF>mode);
1223
1224  emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1225  emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
1226
1227  DONE;
1228})
1229
1230(define_expand "reduc_umin_scal_<mode>"
1231  [(match_operand:<V_elem> 0 "nonimmediate_operand")
1232   (match_operand:VDI 1 "s_register_operand")]
1233  "TARGET_NEON"
1234{
1235  rtx vec = gen_reg_rtx (<MODE>mode);
1236  neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1237                              &gen_neon_vpumin<mode>);
1238  /* The result is computed into every element of the vector.  */
1239  emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1240  DONE;
1241})
1242
1243(define_expand "reduc_umin_scal_<mode>"
1244  [(match_operand:<V_elem> 0 "nonimmediate_operand")
1245   (match_operand:VQI 1 "s_register_operand")]
1246  "TARGET_NEON && !BYTES_BIG_ENDIAN"
1247{
1248  rtx step1 = gen_reg_rtx (<V_HALF>mode);
1249
1250  emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1251  emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
1252
1253  DONE;
1254})
1255
1256(define_expand "reduc_umax_scal_<mode>"
1257  [(match_operand:<V_elem> 0 "nonimmediate_operand")
1258   (match_operand:VDI 1 "s_register_operand")]
1259  "TARGET_NEON"
1260{
1261  rtx vec = gen_reg_rtx (<MODE>mode);
1262  neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1263                              &gen_neon_vpumax<mode>);
1264  /* The result is computed into every element of the vector.  */
1265  emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1266  DONE;
1267})
1268
1269(define_expand "reduc_umax_scal_<mode>"
1270  [(match_operand:<V_elem> 0 "nonimmediate_operand")
1271   (match_operand:VQI 1 "s_register_operand")]
1272  "TARGET_NEON && !BYTES_BIG_ENDIAN"
1273{
1274  rtx step1 = gen_reg_rtx (<V_HALF>mode);
1275
1276  emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1277  emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
1278
1279  DONE;
1280})
1281
1282(define_insn "neon_vpadd_internal<mode>"
1283  [(set (match_operand:VD 0 "s_register_operand" "=w")
1284          (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1285                        (match_operand:VD 2 "s_register_operand" "w")]
1286                   UNSPEC_VPADD))]
1287  "TARGET_NEON"
1288  "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1289  ;; Assume this schedules like vadd.
1290  [(set (attr "type")
1291      (if_then_else (match_test "<Is_float_mode>")
1292                    (const_string "neon_fp_reduc_add_s<q>")
1293                    (const_string "neon_reduc_add<q>")))]
1294)
1295
1296(define_insn "neon_vpaddv4hf"
1297 [(set
1298   (match_operand:V4HF 0 "s_register_operand" "=w")
1299   (unspec:V4HF [(match_operand:V4HF 1 "s_register_operand" "w")
1300                     (match_operand:V4HF 2 "s_register_operand" "w")]
1301    UNSPEC_VPADD))]
1302 "TARGET_NEON_FP16INST"
1303 "vpadd.f16\t%P0, %P1, %P2"
1304 [(set_attr "type" "neon_reduc_add")]
1305)
1306
1307(define_insn "neon_vpsmin<mode>"
1308  [(set (match_operand:VD 0 "s_register_operand" "=w")
1309          (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1310                        (match_operand:VD 2 "s_register_operand" "w")]
1311                   UNSPEC_VPSMIN))]
1312  "TARGET_NEON"
1313  "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1314  [(set (attr "type")
1315      (if_then_else (match_test "<Is_float_mode>")
1316                    (const_string "neon_fp_reduc_minmax_s<q>")
1317                    (const_string "neon_reduc_minmax<q>")))]
1318)
1319
1320(define_insn "neon_vpsmax<mode>"
1321  [(set (match_operand:VD 0 "s_register_operand" "=w")
1322          (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1323                        (match_operand:VD 2 "s_register_operand" "w")]
1324                   UNSPEC_VPSMAX))]
1325  "TARGET_NEON"
1326  "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1327  [(set (attr "type")
1328      (if_then_else (match_test "<Is_float_mode>")
1329                    (const_string "neon_fp_reduc_minmax_s<q>")
1330                    (const_string "neon_reduc_minmax<q>")))]
1331)
1332
1333(define_insn "neon_vpumin<mode>"
1334  [(set (match_operand:VDI 0 "s_register_operand" "=w")
1335          (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1336                         (match_operand:VDI 2 "s_register_operand" "w")]
1337                   UNSPEC_VPUMIN))]
1338  "TARGET_NEON"
1339  "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1340  [(set_attr "type" "neon_reduc_minmax<q>")]
1341)
1342
1343(define_insn "neon_vpumax<mode>"
1344  [(set (match_operand:VDI 0 "s_register_operand" "=w")
1345          (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1346                         (match_operand:VDI 2 "s_register_operand" "w")]
1347                   UNSPEC_VPUMAX))]
1348  "TARGET_NEON"
1349  "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1350  [(set_attr "type" "neon_reduc_minmax<q>")]
1351)
1352
1353;; Saturating arithmetic
1354
1355; NOTE: Neon supports many more saturating variants of instructions than the
1356; following, but these are all GCC currently understands.
1357; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1358; yet either, although these patterns may be used by intrinsics when they're
1359; added.
1360
1361(define_insn "*ss_add<mode>_neon"
1362  [(set (match_operand:VD 0 "s_register_operand" "=w")
1363       (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1364                   (match_operand:VD 2 "s_register_operand" "w")))]
1365  "TARGET_NEON"
1366  "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1367  [(set_attr "type" "neon_qadd<q>")]
1368)
1369
1370(define_insn "*us_add<mode>_neon"
1371  [(set (match_operand:VD 0 "s_register_operand" "=w")
1372       (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1373                   (match_operand:VD 2 "s_register_operand" "w")))]
1374  "TARGET_NEON"
1375  "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1376  [(set_attr "type" "neon_qadd<q>")]
1377)
1378
1379(define_insn "*ss_sub<mode>_neon"
1380  [(set (match_operand:VD 0 "s_register_operand" "=w")
1381       (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1382                    (match_operand:VD 2 "s_register_operand" "w")))]
1383  "TARGET_NEON"
1384  "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1385  [(set_attr "type" "neon_qsub<q>")]
1386)
1387
1388(define_insn "*us_sub<mode>_neon"
1389  [(set (match_operand:VD 0 "s_register_operand" "=w")
1390       (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1391                    (match_operand:VD 2 "s_register_operand" "w")))]
1392  "TARGET_NEON"
1393  "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1394  [(set_attr "type" "neon_qsub<q>")]
1395)
1396
1397(define_expand "vec_cmp<mode><v_cmp_result>"
1398  [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
1399          (match_operator:<V_cmp_result> 1 "comparison_operator"
1400            [(match_operand:VDQWH 2 "s_register_operand")
1401             (match_operand:VDQWH 3 "reg_or_zero_operand")]))]
1402  "TARGET_NEON
1403   && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1404{
1405  arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
1406                                   operands[2], operands[3], false);
1407  DONE;
1408})
1409
1410(define_expand "vec_cmpu<mode><mode>"
1411  [(set (match_operand:VDQIW 0 "s_register_operand")
1412          (match_operator:VDQIW 1 "comparison_operator"
1413            [(match_operand:VDQIW 2 "s_register_operand")
1414             (match_operand:VDQIW 3 "reg_or_zero_operand")]))]
1415  "TARGET_NEON"
1416{
1417  arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
1418                                   operands[2], operands[3], false);
1419  DONE;
1420})
1421
1422(define_expand "vcond_mask_<mode><v_cmp_result>"
1423  [(set (match_operand:VDQWH 0 "s_register_operand")
1424          (if_then_else:VDQWH
1425            (match_operand:<V_cmp_result> 3 "s_register_operand")
1426            (match_operand:VDQWH 1 "s_register_operand")
1427            (match_operand:VDQWH 2 "s_register_operand")))]
1428  "TARGET_NEON
1429   && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1430{
1431  emit_insn (gen_neon_vbsl<mode> (operands[0], operands[3], operands[1],
1432                                          operands[2]));
1433  DONE;
1434})
1435
1436;; Patterns for builtins.
1437
1438; good for plain vadd, vaddq.
1439
1440(define_expand "neon_vadd<mode>"
1441  [(match_operand:VCVTF 0 "s_register_operand")
1442   (match_operand:VCVTF 1 "s_register_operand")
1443   (match_operand:VCVTF 2 "s_register_operand")]
1444  "TARGET_NEON"
1445{
1446  if (ARM_HAVE_NEON_<MODE>_ARITH)
1447    emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
1448  else
1449    emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
1450                                                     operands[2]));
1451  DONE;
1452})
1453
1454(define_expand "neon_vadd<mode>"
1455  [(match_operand:VH 0 "s_register_operand")
1456   (match_operand:VH 1 "s_register_operand")
1457   (match_operand:VH 2 "s_register_operand")]
1458  "TARGET_NEON_FP16INST"
1459{
1460  emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
1461  DONE;
1462})
1463
1464(define_expand "neon_vsub<mode>"
1465  [(match_operand:VH 0 "s_register_operand")
1466   (match_operand:VH 1 "s_register_operand")
1467   (match_operand:VH 2 "s_register_operand")]
1468  "TARGET_NEON_FP16INST"
1469{
1470  emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
1471  DONE;
1472})
1473
1474; Note that NEON operations don't support the full IEEE 754 standard: in
1475; particular, denormal values are flushed to zero.  This means that GCC cannot
1476; use those instructions for autovectorization, etc. unless
1477; -funsafe-math-optimizations is in effect (in which case flush-to-zero
1478; behavior is permissible).  Intrinsic operations (provided by the arm_neon.h
1479; header) must work in either case: if -funsafe-math-optimizations is given,
1480; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
1481; expand to unspecs (which may potentially limit the extent to which they might
1482; be optimized by generic code).
1483
1484; Used for intrinsics when flag_unsafe_math_optimizations is false.
1485
1486(define_insn "neon_vadd<mode>_unspec"
1487  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1488        (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
1489                          (match_operand:VCVTF 2 "s_register_operand" "w")]
1490                     UNSPEC_VADD))]
1491  "TARGET_NEON"
1492  "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1493  [(set (attr "type")
1494      (if_then_else (match_test "<Is_float_mode>")
1495                    (const_string "neon_fp_addsub_s<q>")
1496                    (const_string "neon_add<q>")))]
1497)
1498
1499(define_insn "neon_vaddl<sup><mode>"
1500  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1501        (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
1502                               (match_operand:VDI 2 "s_register_operand" "w")]
1503                          VADDL))]
1504  "TARGET_NEON"
1505  "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
1506  [(set_attr "type" "neon_add_long")]
1507)
1508
1509(define_insn "neon_vaddw<sup><mode>"
1510  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1511        (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
1512                               (match_operand:VDI 2 "s_register_operand" "w")]
1513                          VADDW))]
1514  "TARGET_NEON"
1515  "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
1516  [(set_attr "type" "neon_add_widen")]
1517)
1518
1519; vhadd and vrhadd.
1520
1521(define_insn "@neon_v<r>hadd<sup><mode>"
1522  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1523        (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
1524                           (match_operand:VDQIW 2 "s_register_operand" "w")]
1525                          VHADD))]
1526  "TARGET_NEON"
1527  "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1528  [(set_attr "type" "neon_add_halve_q")]
1529)
1530
1531(define_insn "neon_vqadd<sup><mode>"
1532  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
1533        (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
1534                           (match_operand:VDQIX 2 "s_register_operand" "w")]
1535                     VQADD))]
1536  "TARGET_NEON"
1537  "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1538  [(set_attr "type" "neon_qadd<q>")]
1539)
1540
1541(define_insn "neon_v<r>addhn<mode>"
1542  [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
1543        (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
1544                                (match_operand:VN 2 "s_register_operand" "w")]
1545                           VADDHN))]
1546  "TARGET_NEON"
1547  "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2"
1548  [(set_attr "type" "neon_add_halve_narrow_q")]
1549)
1550
1551;; Polynomial and Float multiplication.
1552(define_insn "neon_vmul<pf><mode>"
1553  [(set (match_operand:VPF 0 "s_register_operand" "=w")
1554        (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w")
1555                          (match_operand:VPF 2 "s_register_operand" "w")]
1556                         UNSPEC_VMUL))]
1557  "TARGET_NEON"
1558  "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1559  [(set (attr "type")
1560      (if_then_else (match_test "<Is_float_mode>")
1561                    (const_string "neon_fp_mul_s<q>")
1562                    (const_string "neon_mul_<V_elem_ch><q>")))]
1563)
1564
1565(define_insn "neon_vmulf<mode>"
1566 [(set
1567   (match_operand:VH 0 "s_register_operand" "=w")
1568   (mult:VH
1569    (match_operand:VH 1 "s_register_operand" "w")
1570    (match_operand:VH 2 "s_register_operand" "w")))]
1571  "TARGET_NEON_FP16INST"
1572  "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1573 [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
1574)
1575
1576(define_expand "neon_vmla<mode>"
1577  [(match_operand:VDQW 0 "s_register_operand")
1578   (match_operand:VDQW 1 "s_register_operand")
1579   (match_operand:VDQW 2 "s_register_operand")
1580   (match_operand:VDQW 3 "s_register_operand")]
1581  "TARGET_NEON"
1582{
1583  if (ARM_HAVE_NEON_<MODE>_ARITH)
1584    emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
1585                                                     operands[2], operands[3]));
1586  else
1587    emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
1588                                                     operands[2], operands[3]));
1589  DONE;
1590})
1591
1592(define_expand "neon_vfma<VCVTF:mode>"
1593  [(match_operand:VCVTF 0 "s_register_operand")
1594   (match_operand:VCVTF 1 "s_register_operand")
1595   (match_operand:VCVTF 2 "s_register_operand")
1596   (match_operand:VCVTF 3 "s_register_operand")]
1597  "TARGET_NEON && TARGET_FMA"
1598{
1599  emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
1600                                               operands[1]));
1601  DONE;
1602})
1603
1604(define_expand "neon_vfma<VH:mode>"
1605  [(match_operand:VH 0 "s_register_operand")
1606   (match_operand:VH 1 "s_register_operand")
1607   (match_operand:VH 2 "s_register_operand")
1608   (match_operand:VH 3 "s_register_operand")]
1609  "TARGET_NEON_FP16INST"
1610{
1611  emit_insn (gen_fma<mode>4 (operands[0], operands[2], operands[3],
1612                                   operands[1]));
1613  DONE;
1614})
1615
1616(define_expand "neon_vfms<VCVTF:mode>"
1617  [(match_operand:VCVTF 0 "s_register_operand")
1618   (match_operand:VCVTF 1 "s_register_operand")
1619   (match_operand:VCVTF 2 "s_register_operand")
1620   (match_operand:VCVTF 3 "s_register_operand")]
1621  "TARGET_NEON && TARGET_FMA"
1622{
1623  emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
1624                                                   operands[1]));
1625  DONE;
1626})
1627
1628(define_expand "neon_vfms<VH:mode>"
1629  [(match_operand:VH 0 "s_register_operand")
1630   (match_operand:VH 1 "s_register_operand")
1631   (match_operand:VH 2 "s_register_operand")
1632   (match_operand:VH 3 "s_register_operand")]
1633  "TARGET_NEON_FP16INST"
1634{
1635  emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
1636                                                   operands[1]));
1637  DONE;
1638})
1639
1640;; The expand RTL structure here is not important.
1641;; We use the gen_* functions anyway.
1642;; We just need something to wrap the iterators around.
1643
1644(define_expand "neon_vfm<vfml_op>l_<vfml_half><mode>"
1645  [(set (match_operand:VCVTF 0 "s_register_operand")
1646     (unspec:VCVTF
1647          [(match_operand:VCVTF 1 "s_register_operand")
1648             (PLUSMINUS:<VFML>
1649               (match_operand:<VFML> 2 "s_register_operand")
1650               (match_operand:<VFML> 3 "s_register_operand"))] VFMLHALVES))]
1651  "TARGET_FP16FML"
1652{
1653  rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
1654  emit_insn (gen_vfm<vfml_op>l_<vfml_half><mode>_intrinsic (operands[0],
1655                                                                           operands[1],
1656                                                                           operands[2],
1657                                                                           operands[3],
1658                                                                           half, half));
1659  DONE;
1660})
1661
1662(define_insn "vfmal_low<mode>_intrinsic"
1663 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1664          (fma:VCVTF
1665           (float_extend:VCVTF
1666            (vec_select:<VFMLSEL>
1667             (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1668             (match_operand:<VFML> 4 "vect_par_constant_low" "")))
1669           (float_extend:VCVTF
1670            (vec_select:<VFMLSEL>
1671             (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
1672             (match_operand:<VFML> 5 "vect_par_constant_low" "")))
1673           (match_operand:VCVTF 1 "s_register_operand" "0")))]
1674 "TARGET_FP16FML"
1675 "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3"
1676 [(set_attr "type" "neon_fp_mla_s<q>")]
1677)
1678
1679(define_insn "vfmsl_high<mode>_intrinsic"
1680 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1681          (fma:VCVTF
1682           (float_extend:VCVTF
1683            (neg:<VFMLSEL>
1684              (vec_select:<VFMLSEL>
1685                (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1686                (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
1687           (float_extend:VCVTF
1688            (vec_select:<VFMLSEL>
1689             (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
1690             (match_operand:<VFML> 5 "vect_par_constant_high" "")))
1691           (match_operand:VCVTF 1 "s_register_operand" "0")))]
1692 "TARGET_FP16FML"
1693 "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3"
1694 [(set_attr "type" "neon_fp_mla_s<q>")]
1695)
1696
1697(define_insn "vfmal_high<mode>_intrinsic"
1698 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1699          (fma:VCVTF
1700           (float_extend:VCVTF
1701            (vec_select:<VFMLSEL>
1702             (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1703             (match_operand:<VFML> 4 "vect_par_constant_high" "")))
1704           (float_extend:VCVTF
1705            (vec_select:<VFMLSEL>
1706             (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
1707             (match_operand:<VFML> 5 "vect_par_constant_high" "")))
1708           (match_operand:VCVTF 1 "s_register_operand" "0")))]
1709 "TARGET_FP16FML"
1710 "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3"
1711 [(set_attr "type" "neon_fp_mla_s<q>")]
1712)
1713
1714(define_insn "vfmsl_low<mode>_intrinsic"
1715 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1716          (fma:VCVTF
1717           (float_extend:VCVTF
1718            (neg:<VFMLSEL>
1719              (vec_select:<VFMLSEL>
1720                (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1721                (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
1722           (float_extend:VCVTF
1723            (vec_select:<VFMLSEL>
1724             (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
1725             (match_operand:<VFML> 5 "vect_par_constant_low" "")))
1726           (match_operand:VCVTF 1 "s_register_operand" "0")))]
1727 "TARGET_FP16FML"
1728 "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3"
1729 [(set_attr "type" "neon_fp_mla_s<q>")]
1730)
1731
1732(define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><VCVTF:mode>"
1733  [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand")
1734     (unspec:VCVTF
1735          [(match_operand:VCVTF 1 "s_register_operand")
1736           (PLUSMINUS:<VFML>
1737             (match_operand:<VFML> 2 "s_register_operand")
1738             (match_operand:<VFML> 3 "s_register_operand"))
1739           (match_operand:SI 4 "const_int_operand")] VFMLHALVES))]
1740  "TARGET_FP16FML"
1741{
1742  rtx lane = GEN_INT (NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[4])));
1743  rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
1744  emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><mode>_intrinsic
1745                                                         (operands[0], operands[1],
1746                                                            operands[2], operands[3],
1747                                                            half, lane));
1748  DONE;
1749})
1750
1751(define_insn "vfmal_lane_low<mode>_intrinsic"
1752 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1753          (fma:VCVTF
1754           (float_extend:VCVTF
1755            (vec_select:<VFMLSEL>
1756             (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1757             (match_operand:<VFML> 4 "vect_par_constant_low" "")))
1758           (float_extend:VCVTF
1759             (vec_duplicate:<VFMLSEL>
1760               (vec_select:HF
1761                 (match_operand:<VFML> 3 "s_register_operand" "x")
1762                 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
1763           (match_operand:VCVTF 1 "s_register_operand" "0")))]
1764 "TARGET_FP16FML"
1765 {
1766    int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
1767    if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
1768      {
1769          operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
1770          return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]";
1771      }
1772    else
1773      {
1774          operands[5] = GEN_INT (lane);
1775          return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]";
1776      }
1777  }
1778 [(set_attr "type" "neon_fp_mla_s<q>")]
1779)
1780
1781(define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>"
1782  [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand")
1783     (unspec:VCVTF
1784          [(match_operand:VCVTF 1 "s_register_operand")
1785           (PLUSMINUS:<VFML>
1786             (match_operand:<VFML> 2 "s_register_operand")
1787             (match_operand:<VFMLSEL2> 3 "s_register_operand"))
1788           (match_operand:SI 4 "const_int_operand")] VFMLHALVES))]
1789  "TARGET_FP16FML"
1790{
1791  rtx lane
1792    = GEN_INT (NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[4])));
1793  rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
1794  emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>_intrinsic
1795                    (operands[0], operands[1], operands[2], operands[3],
1796                     half, lane));
1797  DONE;
1798})
1799
1800;; Used to implement the intrinsics:
1801;; float32x4_t vfmlalq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
1802;; float32x2_t vfmlal_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
1803;; Needs a bit of care to get the modes of the different sub-expressions right
1804;; due to 'a' and 'b' having different sizes and make sure we use the right
1805;; S or D subregister to select the appropriate lane from.
1806
1807(define_insn "vfmal_lane_low<vfmlsel2><mode>_intrinsic"
1808 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1809          (fma:VCVTF
1810           (float_extend:VCVTF
1811            (vec_select:<VFMLSEL>
1812             (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1813             (match_operand:<VFML> 4 "vect_par_constant_low" "")))
1814           (float_extend:VCVTF
1815             (vec_duplicate:<VFMLSEL>
1816               (vec_select:HF
1817                 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
1818                 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
1819           (match_operand:VCVTF 1 "s_register_operand" "0")))]
1820 "TARGET_FP16FML"
1821 {
1822   int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
1823   int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
1824   int new_lane = lane % elts_per_reg;
1825   int regdiff = lane / elts_per_reg;
1826   operands[5] = GEN_INT (new_lane);
1827   /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes
1828      because we want the print_operand code to print the appropriate
1829      S or D register prefix.  */
1830   operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
1831   operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2]));
1832   return "vfmal.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]";
1833 }
1834 [(set_attr "type" "neon_fp_mla_s<q>")]
1835)
1836
1837;; Used to implement the intrinsics:
1838;; float32x4_t vfmlalq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
1839;; float32x2_t vfmlal_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
1840;; Needs a bit of care to get the modes of the different sub-expressions right
1841;; due to 'a' and 'b' having different sizes and make sure we use the right
1842;; S or D subregister to select the appropriate lane from.
1843
1844(define_insn "vfmal_lane_high<vfmlsel2><mode>_intrinsic"
1845 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1846          (fma:VCVTF
1847           (float_extend:VCVTF
1848            (vec_select:<VFMLSEL>
1849             (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1850             (match_operand:<VFML> 4 "vect_par_constant_high" "")))
1851           (float_extend:VCVTF
1852             (vec_duplicate:<VFMLSEL>
1853               (vec_select:HF
1854                 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
1855                 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
1856           (match_operand:VCVTF 1 "s_register_operand" "0")))]
1857 "TARGET_FP16FML"
1858 {
1859   int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
1860   int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
1861   int new_lane = lane % elts_per_reg;
1862   int regdiff = lane / elts_per_reg;
1863   operands[5] = GEN_INT (new_lane);
1864   /* We re-create operands[3] in the halved VFMLSEL mode
1865      because we've calculated the correct half-width subreg to extract
1866      the lane from and we want to print *that* subreg instead.  */
1867   operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
1868   return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]";
1869 }
1870 [(set_attr "type" "neon_fp_mla_s<q>")]
1871)
1872
1873(define_insn "vfmal_lane_high<mode>_intrinsic"
1874 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1875          (fma:VCVTF
1876           (float_extend:VCVTF
1877            (vec_select:<VFMLSEL>
1878             (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1879             (match_operand:<VFML> 4 "vect_par_constant_high" "")))
1880           (float_extend:VCVTF
1881             (vec_duplicate:<VFMLSEL>
1882               (vec_select:HF
1883                 (match_operand:<VFML> 3 "s_register_operand" "x")
1884                 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
1885           (match_operand:VCVTF 1 "s_register_operand" "0")))]
1886 "TARGET_FP16FML"
1887  {
1888    int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
1889    if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
1890      {
1891          operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
1892          return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]";
1893      }
1894    else
1895      {
1896          operands[5] = GEN_INT (lane);
1897          return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]";
1898      }
1899  }
1900 [(set_attr "type" "neon_fp_mla_s<q>")]
1901)
1902
1903(define_insn "vfmsl_lane_low<mode>_intrinsic"
1904 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1905          (fma:VCVTF
1906           (float_extend:VCVTF
1907            (neg:<VFMLSEL>
1908              (vec_select:<VFMLSEL>
1909                (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1910                (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
1911           (float_extend:VCVTF
1912             (vec_duplicate:<VFMLSEL>
1913               (vec_select:HF
1914                 (match_operand:<VFML> 3 "s_register_operand" "x")
1915                 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
1916           (match_operand:VCVTF 1 "s_register_operand" "0")))]
1917 "TARGET_FP16FML"
1918 {
1919    int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
1920    if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
1921      {
1922          operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
1923          return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]";
1924      }
1925    else
1926      {
1927          operands[5] = GEN_INT (lane);
1928          return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]";
1929      }
1930  }
1931 [(set_attr "type" "neon_fp_mla_s<q>")]
1932)
1933
1934;; Used to implement the intrinsics:
1935;; float32x4_t vfmlslq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
1936;; float32x2_t vfmlsl_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
1937;; Needs a bit of care to get the modes of the different sub-expressions right
1938;; due to 'a' and 'b' having different sizes and make sure we use the right
1939;; S or D subregister to select the appropriate lane from.
1940
1941(define_insn "vfmsl_lane_low<vfmlsel2><mode>_intrinsic"
1942 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1943          (fma:VCVTF
1944           (float_extend:VCVTF
1945            (neg:<VFMLSEL>
1946              (vec_select:<VFMLSEL>
1947                (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1948                (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
1949           (float_extend:VCVTF
1950             (vec_duplicate:<VFMLSEL>
1951               (vec_select:HF
1952                 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
1953                 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
1954           (match_operand:VCVTF 1 "s_register_operand" "0")))]
1955 "TARGET_FP16FML"
1956 {
1957   int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
1958   int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
1959   int new_lane = lane % elts_per_reg;
1960   int regdiff = lane / elts_per_reg;
1961   operands[5] = GEN_INT (new_lane);
1962   /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes
1963      because we want the print_operand code to print the appropriate
1964      S or D register prefix.  */
1965   operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
1966   operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2]));
1967   return "vfmsl.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]";
1968 }
1969 [(set_attr "type" "neon_fp_mla_s<q>")]
1970)
1971
1972;; Used to implement the intrinsics:
1973;; float32x4_t vfmlslq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
1974;; float32x2_t vfmlsl_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
1975;; Needs a bit of care to get the modes of the different sub-expressions right
1976;; due to 'a' and 'b' having different sizes and make sure we use the right
1977;; S or D subregister to select the appropriate lane from.
1978
1979(define_insn "vfmsl_lane_high<vfmlsel2><mode>_intrinsic"
1980 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1981          (fma:VCVTF
1982           (float_extend:VCVTF
1983            (neg:<VFMLSEL>
1984              (vec_select:<VFMLSEL>
1985               (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1986               (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
1987           (float_extend:VCVTF
1988             (vec_duplicate:<VFMLSEL>
1989               (vec_select:HF
1990                 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
1991                 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
1992           (match_operand:VCVTF 1 "s_register_operand" "0")))]
1993 "TARGET_FP16FML"
1994 {
1995   int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
1996   int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
1997   int new_lane = lane % elts_per_reg;
1998   int regdiff = lane / elts_per_reg;
1999   operands[5] = GEN_INT (new_lane);
2000   /* We re-create operands[3] in the halved VFMLSEL mode
2001      because we've calculated the correct half-width subreg to extract
2002      the lane from and we want to print *that* subreg instead.  */
2003   operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2004   return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]";
2005 }
2006 [(set_attr "type" "neon_fp_mla_s<q>")]
2007)
2008
2009(define_insn "vfmsl_lane_high<mode>_intrinsic"
2010 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2011          (fma:VCVTF
2012           (float_extend:VCVTF
2013            (neg:<VFMLSEL>
2014              (vec_select:<VFMLSEL>
2015               (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2016               (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2017           (float_extend:VCVTF
2018             (vec_duplicate:<VFMLSEL>
2019               (vec_select:HF
2020                 (match_operand:<VFML> 3 "s_register_operand" "x")
2021                 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2022           (match_operand:VCVTF 1 "s_register_operand" "0")))]
2023 "TARGET_FP16FML"
2024  {
2025    int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2026    if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2027      {
2028          operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2029          return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]";
2030      }
2031    else
2032      {
2033          operands[5] = GEN_INT (lane);
2034          return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]";
2035      }
2036  }
2037 [(set_attr "type" "neon_fp_mla_s<q>")]
2038)
2039
2040; Used for intrinsics when flag_unsafe_math_optimizations is false.
2041
2042(define_insn "neon_vmla<mode>_unspec"
2043  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2044          (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2045                          (match_operand:VDQW 2 "s_register_operand" "w")
2046                          (match_operand:VDQW 3 "s_register_operand" "w")]
2047                        UNSPEC_VMLA))]
2048  "TARGET_NEON"
2049  "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2050  [(set (attr "type")
2051      (if_then_else (match_test "<Is_float_mode>")
2052                    (const_string "neon_fp_mla_s<q>")
2053                    (const_string "neon_mla_<V_elem_ch><q>")))]
2054)
2055
2056(define_insn "neon_vmlal<sup><mode>"
2057  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2058        (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2059                               (match_operand:VW 2 "s_register_operand" "w")
2060                               (match_operand:VW 3 "s_register_operand" "w")]
2061                          VMLAL))]
2062  "TARGET_NEON"
2063  "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2064  [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2065)
2066
2067(define_expand "neon_vmls<mode>"
2068  [(match_operand:VDQW 0 "s_register_operand")
2069   (match_operand:VDQW 1 "s_register_operand")
2070   (match_operand:VDQW 2 "s_register_operand")
2071   (match_operand:VDQW 3 "s_register_operand")]
2072  "TARGET_NEON"
2073{
2074  if (ARM_HAVE_NEON_<MODE>_ARITH)
2075    emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
2076                     operands[1], operands[2], operands[3]));
2077  else
2078    emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
2079                                                     operands[2], operands[3]));
2080  DONE;
2081})
2082
2083; Used for intrinsics when flag_unsafe_math_optimizations is false.
2084
2085(define_insn "neon_vmls<mode>_unspec"
2086  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2087          (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2088                          (match_operand:VDQW 2 "s_register_operand" "w")
2089                          (match_operand:VDQW 3 "s_register_operand" "w")]
2090                        UNSPEC_VMLS))]
2091  "TARGET_NEON"
2092  "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2093  [(set (attr "type")
2094      (if_then_else (match_test "<Is_float_mode>")
2095                    (const_string "neon_fp_mla_s<q>")
2096                    (const_string "neon_mla_<V_elem_ch><q>")))]
2097)
2098
2099(define_insn "neon_vmlsl<sup><mode>"
2100  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2101        (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2102                               (match_operand:VW 2 "s_register_operand" "w")
2103                               (match_operand:VW 3 "s_register_operand" "w")]
2104                          VMLSL))]
2105  "TARGET_NEON"
2106  "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2107  [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2108)
2109
2110;; vqdmulh, vqrdmulh
2111(define_insn "neon_vq<r>dmulh<mode>"
2112  [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2113        (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2114                           (match_operand:VMDQI 2 "s_register_operand" "w")]
2115                      VQDMULH))]
2116  "TARGET_NEON"
2117  "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2118  [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2119)
2120
2121;; vqrdmlah, vqrdmlsh
2122(define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>"
2123  [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2124          (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "0")
2125                           (match_operand:VMDQI 2 "s_register_operand" "w")
2126                           (match_operand:VMDQI 3 "s_register_operand" "w")]
2127                          VQRDMLH_AS))]
2128  "TARGET_NEON_RDMA"
2129  "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2130  [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2131)
2132
2133(define_insn "neon_vqdmlal<mode>"
2134  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2135        (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2136                               (match_operand:VMDI 2 "s_register_operand" "w")
2137                               (match_operand:VMDI 3 "s_register_operand" "w")]
2138                          UNSPEC_VQDMLAL))]
2139  "TARGET_NEON"
2140  "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2141  [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2142)
2143
2144(define_insn "neon_vqdmlsl<mode>"
2145  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2146        (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2147                               (match_operand:VMDI 2 "s_register_operand" "w")
2148                               (match_operand:VMDI 3 "s_register_operand" "w")]
2149                          UNSPEC_VQDMLSL))]
2150  "TARGET_NEON"
2151  "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2152  [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2153)
2154
2155(define_insn "neon_vmull<sup><mode>"
2156  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2157        (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2158                               (match_operand:VW 2 "s_register_operand" "w")]
2159                          VMULL))]
2160  "TARGET_NEON"
2161  "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2162  [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2163)
2164
2165(define_insn "neon_vqdmull<mode>"
2166  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2167        (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2168                               (match_operand:VMDI 2 "s_register_operand" "w")]
2169                          UNSPEC_VQDMULL))]
2170  "TARGET_NEON"
2171  "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2172  [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2173)
2174
2175(define_expand "neon_vsub<mode>"
2176  [(match_operand:VCVTF 0 "s_register_operand")
2177   (match_operand:VCVTF 1 "s_register_operand")
2178   (match_operand:VCVTF 2 "s_register_operand")]
2179  "TARGET_NEON"
2180{
2181  if (ARM_HAVE_NEON_<MODE>_ARITH)
2182    emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2183  else
2184    emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2185                                                     operands[2]));
2186  DONE;
2187})
2188
2189; Used for intrinsics when flag_unsafe_math_optimizations is false.
2190
2191(define_insn "neon_vsub<mode>_unspec"
2192  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2193        (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2194                          (match_operand:VCVTF 2 "s_register_operand" "w")]
2195                     UNSPEC_VSUB))]
2196  "TARGET_NEON"
2197  "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2198  [(set (attr "type")
2199      (if_then_else (match_test "<Is_float_mode>")
2200                    (const_string "neon_fp_addsub_s<q>")
2201                    (const_string "neon_sub<q>")))]
2202)
2203
2204(define_insn "neon_vsubl<sup><mode>"
2205  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2206        (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2207                               (match_operand:VDI 2 "s_register_operand" "w")]
2208                          VSUBL))]
2209  "TARGET_NEON"
2210  "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2211  [(set_attr "type" "neon_sub_long")]
2212)
2213
2214(define_insn "neon_vsubw<sup><mode>"
2215  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2216        (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2217                               (match_operand:VDI 2 "s_register_operand" "w")]
2218                                VSUBW))]
2219  "TARGET_NEON"
2220  "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2221  [(set_attr "type" "neon_sub_widen")]
2222)
2223
2224(define_insn "neon_vqsub<sup><mode>"
2225  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2226        (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2227                           (match_operand:VDQIX 2 "s_register_operand" "w")]
2228                          VQSUB))]
2229  "TARGET_NEON"
2230  "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2231  [(set_attr "type" "neon_qsub<q>")]
2232)
2233
2234(define_insn "neon_vhsub<sup><mode>"
2235  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2236        (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2237                           (match_operand:VDQIW 2 "s_register_operand" "w")]
2238                          VHSUB))]
2239  "TARGET_NEON"
2240  "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2241  [(set_attr "type" "neon_sub_halve<q>")]
2242)
2243
2244(define_insn "neon_v<r>subhn<mode>"
2245  [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2246        (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2247                                (match_operand:VN 2 "s_register_operand" "w")]
2248                           VSUBHN))]
2249  "TARGET_NEON"
2250  "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2"
2251  [(set_attr "type" "neon_sub_halve_narrow_q")]
2252)
2253
2254;; These may expand to an UNSPEC pattern when a floating point mode is used
2255;; without unsafe math optimizations.
2256(define_expand "@neon_vc<cmp_op><mode>"
2257  [(match_operand:<V_cmp_result> 0 "s_register_operand")
2258     (neg:<V_cmp_result>
2259       (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand")
2260                         (match_operand:VDQW 2 "reg_or_zero_operand")))]
2261  "TARGET_NEON"
2262  {
2263    /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2264       are enabled.  */
2265    if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2266        && !flag_unsafe_math_optimizations)
2267      {
2268        /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because
2269           we define gen_neon_vceq<mode>_insn_unspec only for float modes
2270           whereas this expander iterates over the integer modes as well,
2271           but we will never expand to UNSPECs for the integer comparisons.  */
2272        switch (<MODE>mode)
2273          {
2274            case E_V2SFmode:
2275              emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0],
2276                                                              operands[1],
2277                                                              operands[2]));
2278              break;
2279            case E_V4SFmode:
2280              emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0],
2281                                                              operands[1],
2282                                                              operands[2]));
2283              break;
2284            default:
2285              gcc_unreachable ();
2286          }
2287      }
2288    else
2289      emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0],
2290                                                 operands[1],
2291                                                 operands[2]));
2292    DONE;
2293  }
2294)
2295
2296(define_insn "@neon_vc<cmp_op><mode>_insn"
2297  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2298        (neg:<V_cmp_result>
2299          (COMPARISONS:<V_cmp_result>
2300            (match_operand:VDQW 1 "s_register_operand" "w,w")
2301            (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))]
2302  "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2303                    && !flag_unsafe_math_optimizations)"
2304  {
2305    char pattern[100];
2306    sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2307                      " %%<V_reg>1, %s",
2308                       GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2309                         ? "f" : "<cmp_type>",
2310                       which_alternative == 0
2311                         ? "%<V_reg>2" : "#0");
2312    output_asm_insn (pattern, operands);
2313    return "";
2314  }
2315  [(set (attr "type")
2316        (if_then_else (match_operand 2 "zero_operand")
2317                      (const_string "neon_compare_zero<q>")
2318                      (const_string "neon_compare<q>")))]
2319)
2320
2321(define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec"
2322  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2323        (unspec:<V_cmp_result>
2324            [(match_operand:VCVTF 1 "s_register_operand" "w,w")
2325             (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")]
2326          NEON_VCMP))]
2327  "TARGET_NEON"
2328  {
2329    char pattern[100];
2330    sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2331                       " %%<V_reg>1, %s",
2332                       which_alternative == 0
2333                         ? "%<V_reg>2" : "#0");
2334    output_asm_insn (pattern, operands);
2335    return "";
2336}
2337  [(set_attr "type" "neon_fp_compare_s<q>")]
2338)
2339
2340(define_expand "@neon_vc<cmp_op><mode>"
2341 [(match_operand:<V_cmp_result> 0 "s_register_operand")
2342  (neg:<V_cmp_result>
2343   (COMPARISONS:VH
2344    (match_operand:VH 1 "s_register_operand")
2345    (match_operand:VH 2 "reg_or_zero_operand")))]
2346 "TARGET_NEON_FP16INST"
2347{
2348  /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2349     are enabled.  */
2350  if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2351      && !flag_unsafe_math_optimizations)
2352    emit_insn
2353      (gen_neon_vc<cmp_op><mode>_fp16insn_unspec
2354       (operands[0], operands[1], operands[2]));
2355  else
2356    emit_insn
2357      (gen_neon_vc<cmp_op><mode>_fp16insn
2358       (operands[0], operands[1], operands[2]));
2359  DONE;
2360})
2361
2362(define_insn "neon_vc<cmp_op><mode>_fp16insn"
2363 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2364   (neg:<V_cmp_result>
2365    (COMPARISONS:<V_cmp_result>
2366     (match_operand:VH 1 "s_register_operand" "w,w")
2367     (match_operand:VH 2 "reg_or_zero_operand" "w,Dz"))))]
2368 "TARGET_NEON_FP16INST
2369  && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2370  && !flag_unsafe_math_optimizations)"
2371{
2372  char pattern[100];
2373  sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2374             " %%<V_reg>1, %s",
2375             GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2376             ? "f" : "<cmp_type>",
2377             which_alternative == 0
2378             ? "%<V_reg>2" : "#0");
2379  output_asm_insn (pattern, operands);
2380  return "";
2381}
2382 [(set (attr "type")
2383   (if_then_else (match_operand 2 "zero_operand")
2384    (const_string "neon_compare_zero<q>")
2385    (const_string "neon_compare<q>")))])
2386
2387(define_insn "neon_vc<cmp_op_unsp><mode>_fp16insn_unspec"
2388 [(set
2389   (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2390   (unspec:<V_cmp_result>
2391    [(match_operand:VH 1 "s_register_operand" "w,w")
2392     (match_operand:VH 2 "reg_or_zero_operand" "w,Dz")]
2393    NEON_VCMP))]
2394 "TARGET_NEON_FP16INST"
2395{
2396  char pattern[100];
2397  sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2398             " %%<V_reg>1, %s",
2399             which_alternative == 0
2400             ? "%<V_reg>2" : "#0");
2401  output_asm_insn (pattern, operands);
2402  return "";
2403}
2404 [(set_attr "type" "neon_fp_compare_s<q>")])
2405
2406(define_insn "@neon_vc<code><mode>"
2407  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2408        (neg:<V_cmp_result>
2409          (GTUGEU:<V_cmp_result>
2410              (match_operand:VDQIW 1 "s_register_operand" "w")
2411              (match_operand:VDQIW 2 "s_register_operand" "w"))))]
2412  "TARGET_NEON"
2413  "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2414  [(set_attr "type" "neon_compare<q>")]
2415)
2416
2417(define_expand "neon_vca<cmp_op><mode>"
2418  [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
2419        (neg:<V_cmp_result>
2420          (GLTE:<V_cmp_result>
2421            (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand"))
2422            (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))]
2423  "TARGET_NEON"
2424  {
2425    if (flag_unsafe_math_optimizations)
2426      emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1],
2427                                                  operands[2]));
2428    else
2429      emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0],
2430                                                         operands[1],
2431                                                         operands[2]));
2432    DONE;
2433  }
2434)
2435
2436(define_insn "neon_vca<cmp_op><mode>_insn"
2437  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2438        (neg:<V_cmp_result>
2439          (GLTE:<V_cmp_result>
2440            (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w"))
2441            (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))]
2442  "TARGET_NEON && flag_unsafe_math_optimizations"
2443  "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2444  [(set_attr "type" "neon_fp_compare_s<q>")]
2445)
2446
2447(define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec"
2448  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2449        (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
2450                                    (match_operand:VCVTF 2 "s_register_operand" "w")]
2451                               NEON_VAGLTE))]
2452  "TARGET_NEON"
2453  "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2454  [(set_attr "type" "neon_fp_compare_s<q>")]
2455)
2456
2457(define_expand "neon_vca<cmp_op><mode>"
2458  [(set
2459    (match_operand:<V_cmp_result> 0 "s_register_operand")
2460    (neg:<V_cmp_result>
2461     (GLTE:<V_cmp_result>
2462      (abs:VH (match_operand:VH 1 "s_register_operand"))
2463      (abs:VH (match_operand:VH 2 "s_register_operand")))))]
2464 "TARGET_NEON_FP16INST"
2465{
2466  if (flag_unsafe_math_optimizations)
2467    emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn
2468                 (operands[0], operands[1], operands[2]));
2469  else
2470    emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn_unspec
2471                 (operands[0], operands[1], operands[2]));
2472  DONE;
2473})
2474
2475(define_insn "neon_vca<cmp_op><mode>_fp16insn"
2476  [(set
2477    (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2478    (neg:<V_cmp_result>
2479     (GLTE:<V_cmp_result>
2480      (abs:VH (match_operand:VH 1 "s_register_operand" "w"))
2481      (abs:VH (match_operand:VH 2 "s_register_operand" "w")))))]
2482 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
2483 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2484 [(set_attr "type" "neon_fp_compare_s<q>")]
2485)
2486
2487(define_insn "neon_vca<cmp_op_unsp><mode>_fp16insn_unspec"
2488 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2489   (unspec:<V_cmp_result>
2490    [(match_operand:VH 1 "s_register_operand" "w")
2491     (match_operand:VH 2 "s_register_operand" "w")]
2492    NEON_VAGLTE))]
2493 "TARGET_NEON"
2494 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2495 [(set_attr "type" "neon_fp_compare_s<q>")]
2496)
2497
2498(define_expand "neon_vc<cmp_op>z<mode>"
2499 [(set
2500   (match_operand:<V_cmp_result> 0 "s_register_operand")
2501   (COMPARISONS:<V_cmp_result>
2502    (match_operand:VH 1 "s_register_operand")
2503    (const_int 0)))]
2504 "TARGET_NEON_FP16INST"
2505 {
2506  emit_insn (gen_neon_vc<cmp_op><mode> (operands[0], operands[1],
2507                                                  CONST0_RTX (<MODE>mode)));
2508  DONE;
2509})
2510
2511(define_insn "neon_vtst_combine<mode>"
2512  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2513        (plus:VDQIW
2514            (eq:VDQIW
2515              (and:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
2516                           (match_operand:VDQIW 2 "s_register_operand" "w"))
2517              (match_operand:VDQIW 3 "zero_operand" "i"))
2518            (match_operand:VDQIW 4 "minus_one_operand" "i")))]
2519  "TARGET_NEON"
2520  "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2521  [(set_attr "type" "neon_tst<q>")]
2522)
2523
2524(define_insn "neon_vabd<sup><mode>"
2525  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2526        (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2527                          (match_operand:VDQIW 2 "s_register_operand" "w")]
2528                         VABD))]
2529  "TARGET_NEON"
2530  "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2531  [(set_attr "type" "neon_abd<q>")]
2532)
2533
2534(define_insn "neon_vabd<mode>"
2535  [(set (match_operand:VH 0 "s_register_operand" "=w")
2536    (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
2537                    (match_operand:VH 2 "s_register_operand" "w")]
2538     UNSPEC_VABD_F))]
2539 "TARGET_NEON_FP16INST"
2540 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2541  [(set_attr "type" "neon_abd<q>")]
2542)
2543
2544(define_insn "neon_vabdf<mode>"
2545  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2546        (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2547                          (match_operand:VCVTF 2 "s_register_operand" "w")]
2548                         UNSPEC_VABD_F))]
2549  "TARGET_NEON"
2550  "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2551  [(set_attr "type" "neon_fp_abd_s<q>")]
2552)
2553
2554(define_insn "neon_vabdl<sup><mode>"
2555  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2556        (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2557                               (match_operand:VW 2 "s_register_operand" "w")]
2558                          VABDL))]
2559  "TARGET_NEON"
2560  "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2561  [(set_attr "type" "neon_abd_long")]
2562)
2563
2564(define_insn "neon_vaba<sup><mode>"
2565  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2566        (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
2567                                       (match_operand:VDQIW 3 "s_register_operand" "w")]
2568                                      VABD)
2569                        (match_operand:VDQIW 1 "s_register_operand" "0")))]
2570  "TARGET_NEON"
2571  "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2572  [(set_attr "type" "neon_arith_acc<q>")]
2573)
2574
2575(define_insn "neon_vabal<sup><mode>"
2576  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2577        (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
2578                                           (match_operand:VW 3 "s_register_operand" "w")]
2579                                                     VABDL)
2580                               (match_operand:<V_widen> 1 "s_register_operand" "0")))]
2581  "TARGET_NEON"
2582  "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2583  [(set_attr "type" "neon_arith_acc<q>")]
2584)
2585
2586(define_expand "<sup>sadv16qi"
2587  [(use (match_operand:V4SI 0 "register_operand"))
2588   (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
2589                  (use (match_operand:V16QI 2 "register_operand"))] VABAL)
2590   (use (match_operand:V4SI 3 "register_operand"))]
2591  "TARGET_NEON"
2592  {
2593    rtx reduc = gen_reg_rtx (V8HImode);
2594    rtx op1_highpart = gen_reg_rtx (V8QImode);
2595    rtx op2_highpart = gen_reg_rtx (V8QImode);
2596
2597    emit_insn (gen_neon_vabdl<sup>v8qi (reduc,
2598                                        gen_lowpart (V8QImode, operands[1]),
2599                                        gen_lowpart (V8QImode, operands[2])));
2600
2601    emit_insn (gen_neon_vget_highv16qi (op1_highpart, operands[1]));
2602    emit_insn (gen_neon_vget_highv16qi (op2_highpart, operands[2]));
2603    emit_insn (gen_neon_vabal<sup>v8qi (reduc, reduc,
2604                                        op1_highpart, op2_highpart));
2605    emit_insn (gen_neon_vpadal<sup>v8hi (operands[3], operands[3], reduc));
2606
2607    emit_move_insn (operands[0], operands[3]);
2608    DONE;
2609  }
2610)
2611
2612(define_insn "neon_v<maxmin><sup><mode>"
2613  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2614        (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2615                          (match_operand:VDQIW 2 "s_register_operand" "w")]
2616                     VMAXMIN))]
2617  "TARGET_NEON"
2618  "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2619  [(set_attr "type" "neon_minmax<q>")]
2620)
2621
2622(define_insn "neon_v<maxmin>f<mode>"
2623  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2624        (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2625                          (match_operand:VCVTF 2 "s_register_operand" "w")]
2626                     VMAXMINF))]
2627  "TARGET_NEON"
2628  "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2629  [(set_attr "type" "neon_fp_minmax_s<q>")]
2630)
2631
2632(define_insn "neon_v<maxmin>f<mode>"
2633 [(set (match_operand:VH 0 "s_register_operand" "=w")
2634   (unspec:VH
2635    [(match_operand:VH 1 "s_register_operand" "w")
2636     (match_operand:VH 2 "s_register_operand" "w")]
2637    VMAXMINF))]
2638 "TARGET_NEON_FP16INST"
2639 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2640 [(set_attr "type" "neon_fp_minmax_s<q>")]
2641)
2642
2643(define_insn "neon_vp<maxmin>fv4hf"
2644 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
2645   (unspec:V4HF
2646    [(match_operand:V4HF 1 "s_register_operand" "w")
2647     (match_operand:V4HF 2 "s_register_operand" "w")]
2648    VPMAXMINF))]
2649 "TARGET_NEON_FP16INST"
2650 "vp<maxmin>.f16\t%P0, %P1, %P2"
2651  [(set_attr "type" "neon_reduc_minmax")]
2652)
2653
2654(define_insn "neon_<fmaxmin_op><mode>"
2655 [(set
2656   (match_operand:VH 0 "s_register_operand" "=w")
2657   (unspec:VH
2658    [(match_operand:VH 1 "s_register_operand" "w")
2659     (match_operand:VH 2 "s_register_operand" "w")]
2660    VMAXMINFNM))]
2661 "TARGET_NEON_FP16INST"
2662 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2663 [(set_attr "type" "neon_fp_minmax_s<q>")]
2664)
2665
2666;; v<maxmin>nm intrinsics.
2667(define_insn "neon_<fmaxmin_op><mode>"
2668  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2669          (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2670                           (match_operand:VCVTF 2 "s_register_operand" "w")]
2671                           VMAXMINFNM))]
2672  "TARGET_NEON && TARGET_VFP5"
2673  "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2674  [(set_attr "type" "neon_fp_minmax_s<q>")]
2675)
2676
2677;; Vector forms for the IEEE-754 fmax()/fmin() functions
2678(define_insn "<fmaxmin><mode>3"
2679  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2680          (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2681                           (match_operand:VCVTF 2 "s_register_operand" "w")]
2682                           VMAXMINFNM))]
2683  "TARGET_NEON && TARGET_VFP5"
2684  "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2685  [(set_attr "type" "neon_fp_minmax_s<q>")]
2686)
2687
2688(define_expand "neon_vpadd<mode>"
2689  [(match_operand:VD 0 "s_register_operand")
2690   (match_operand:VD 1 "s_register_operand")
2691   (match_operand:VD 2 "s_register_operand")]
2692  "TARGET_NEON"
2693{
2694  emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
2695                                                      operands[2]));
2696  DONE;
2697})
2698
2699(define_insn "neon_vpaddl<sup><mode>"
2700  [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2701        (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")]
2702                                 VPADDL))]
2703  "TARGET_NEON"
2704  "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2705  [(set_attr "type" "neon_reduc_add_long")]
2706)
2707
2708(define_insn "neon_vpadal<sup><mode>"
2709  [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2710        (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
2711                                  (match_operand:VDQIW 2 "s_register_operand" "w")]
2712                                 VPADAL))]
2713  "TARGET_NEON"
2714  "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
2715  [(set_attr "type" "neon_reduc_add_acc")]
2716)
2717
2718(define_insn "neon_vp<maxmin><sup><mode>"
2719  [(set (match_operand:VDI 0 "s_register_operand" "=w")
2720        (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
2721                        (match_operand:VDI 2 "s_register_operand" "w")]
2722                   VPMAXMIN))]
2723  "TARGET_NEON"
2724  "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2725  [(set_attr "type" "neon_reduc_minmax<q>")]
2726)
2727
2728(define_insn "neon_vp<maxmin>f<mode>"
2729  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2730        (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2731                        (match_operand:VCVTF 2 "s_register_operand" "w")]
2732                   VPMAXMINF))]
2733  "TARGET_NEON"
2734  "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2735  [(set_attr "type" "neon_fp_reduc_minmax_s<q>")]
2736)
2737
2738(define_insn "neon_vrecps<mode>"
2739  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2740        (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2741                           (match_operand:VCVTF 2 "s_register_operand" "w")]
2742                      UNSPEC_VRECPS))]
2743  "TARGET_NEON"
2744  "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2745  [(set_attr "type" "neon_fp_recps_s<q>")]
2746)
2747
2748(define_insn "neon_vrecps<mode>"
2749  [(set
2750    (match_operand:VH 0 "s_register_operand" "=w")
2751    (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
2752                    (match_operand:VH 2 "s_register_operand" "w")]
2753     UNSPEC_VRECPS))]
2754  "TARGET_NEON_FP16INST"
2755  "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2756  [(set_attr "type" "neon_fp_recps_s<q>")]
2757)
2758
2759(define_insn "neon_vrsqrts<mode>"
2760  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2761        (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2762                           (match_operand:VCVTF 2 "s_register_operand" "w")]
2763                      UNSPEC_VRSQRTS))]
2764  "TARGET_NEON"
2765  "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2766  [(set_attr "type" "neon_fp_rsqrts_s<q>")]
2767)
2768
2769(define_insn "neon_vrsqrts<mode>"
2770  [(set
2771    (match_operand:VH 0 "s_register_operand" "=w")
2772    (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
2773                     (match_operand:VH 2 "s_register_operand" "w")]
2774     UNSPEC_VRSQRTS))]
2775 "TARGET_NEON_FP16INST"
2776 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2777 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
2778)
2779
2780(define_expand "neon_vabs<mode>"
2781  [(match_operand:VDQW 0 "s_register_operand")
2782   (match_operand:VDQW 1 "s_register_operand")]
2783  "TARGET_NEON"
2784{
2785  emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
2786  DONE;
2787})
2788
2789(define_insn "neon_vqabs<mode>"
2790  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2791          (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2792                          UNSPEC_VQABS))]
2793  "TARGET_NEON"
2794  "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2795  [(set_attr "type" "neon_qabs<q>")]
2796)
2797
2798(define_insn "neon_bswap<mode>"
2799  [(set (match_operand:VDQHSD 0 "register_operand" "=w")
2800        (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
2801  "TARGET_NEON"
2802  "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
2803  [(set_attr "type" "neon_rev<q>")]
2804)
2805
2806(define_expand "neon_vneg<mode>"
2807  [(match_operand:VDQW 0 "s_register_operand")
2808   (match_operand:VDQW 1 "s_register_operand")]
2809  "TARGET_NEON"
2810{
2811  emit_insn (gen_neon_neg<mode>2 (operands[0], operands[1]));
2812  DONE;
2813})
2814
2815
2816;; The vcadd and vcmla patterns are made UNSPEC for the explicitly due to the
2817;; fact that their usage need to guarantee that the source vectors are
2818;; contiguous.  It would be wrong to describe the operation without being able
2819;; to describe the permute that is also required, but even if that is done
2820;; the permute would have been created as a LOAD_LANES which means the values
2821;; in the registers are in the wrong order.
2822(define_insn "neon_vcadd<rot><mode>"
2823  [(set (match_operand:VF 0 "register_operand" "=w")
2824          (unspec:VF [(match_operand:VF 1 "register_operand" "w")
2825                        (match_operand:VF 2 "register_operand" "w")]
2826                        VCADD))]
2827  "TARGET_COMPLEX"
2828  "vcadd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, #<rot>"
2829  [(set_attr "type" "neon_fcadd")]
2830)
2831
2832(define_insn "neon_vcmla<rot><mode>"
2833  [(set (match_operand:VF 0 "register_operand" "=w")
2834          (plus:VF (match_operand:VF 1 "register_operand" "0")
2835                     (unspec:VF [(match_operand:VF 2 "register_operand" "w")
2836                                   (match_operand:VF 3 "register_operand" "w")]
2837                                   VCMLA)))]
2838  "TARGET_COMPLEX"
2839  "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3, #<rot>"
2840  [(set_attr "type" "neon_fcmla")]
2841)
2842
2843(define_insn "neon_vcmla_lane<rot><mode>"
2844  [(set (match_operand:VF 0 "s_register_operand" "=w")
2845          (plus:VF (match_operand:VF 1 "s_register_operand" "0")
2846                     (unspec:VF [(match_operand:VF 2 "s_register_operand" "w")
2847                                   (match_operand:VF 3 "s_register_operand" "<VF_constraint>")
2848                                   (match_operand:SI 4 "const_int_operand" "n")]
2849                                   VCMLA)))]
2850  "TARGET_COMPLEX"
2851  {
2852    operands = neon_vcmla_lane_prepare_operands (operands);
2853    return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>";
2854  }
2855  [(set_attr "type" "neon_fcmla")]
2856)
2857
2858(define_insn "neon_vcmla_laneq<rot><mode>"
2859  [(set (match_operand:VDF 0 "s_register_operand" "=w")
2860          (plus:VDF (match_operand:VDF 1 "s_register_operand" "0")
2861                      (unspec:VDF [(match_operand:VDF 2 "s_register_operand" "w")
2862                                    (match_operand:<V_DOUBLE> 3 "s_register_operand" "<VF_constraint>")
2863                                    (match_operand:SI 4 "const_int_operand" "n")]
2864                                    VCMLA)))]
2865  "TARGET_COMPLEX"
2866  {
2867    operands = neon_vcmla_lane_prepare_operands (operands);
2868    return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>";
2869  }
2870  [(set_attr "type" "neon_fcmla")]
2871)
2872
2873(define_insn "neon_vcmlaq_lane<rot><mode>"
2874  [(set (match_operand:VQ_HSF 0 "s_register_operand" "=w")
2875          (plus:VQ_HSF (match_operand:VQ_HSF 1 "s_register_operand" "0")
2876                     (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "s_register_operand" "w")
2877                                         (match_operand:<V_HALF> 3 "s_register_operand" "<VF_constraint>")
2878                                         (match_operand:SI 4 "const_int_operand" "n")]
2879                                         VCMLA)))]
2880  "TARGET_COMPLEX"
2881  {
2882    operands = neon_vcmla_lane_prepare_operands (operands);
2883    return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>";
2884  }
2885  [(set_attr "type" "neon_fcmla")]
2886)
2887
2888;; The complex mul operations always need to expand to two instructions.
2889;; The first operation does half the computation and the second does the
2890;; remainder.  Because of this, expand early.
2891(define_expand "cmul<conj_op><mode>3"
2892  [(set (match_operand:VDF 0 "register_operand")
2893          (unspec:VDF [(match_operand:VDF 1 "register_operand")
2894                         (match_operand:VDF 2 "register_operand")]
2895                        VCMUL_OP))]
2896  "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
2897{
2898  rtx res1 = gen_reg_rtx (<MODE>mode);
2899  rtx tmp = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
2900  emit_insn (gen_neon_vcmla<rotsplit1><mode> (res1, tmp,
2901                                                        operands[2], operands[1]));
2902  emit_insn (gen_neon_vcmla<rotsplit2><mode> (operands[0], res1,
2903                                                        operands[2], operands[1]));
2904  DONE;
2905})
2906
2907
2908;; These map to the auto-vectorizer Dot Product optab.
2909;; The auto-vectorizer expects a dot product builtin that also does an
2910;; accumulation into the provided register.
2911;; Given the following pattern
2912;;
2913;; for (i=0; i<len; i++) {
2914;;     c = a[i] * b[i];
2915;;     r += c;
2916;; }
2917;; return result;
2918;;
2919;; This can be auto-vectorized to
2920;; r  = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
2921;;
2922;; given enough iterations.  However the vectorizer can keep unrolling the loop
2923;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
2924;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
2925;; ...
2926;;
2927;; and so the vectorizer provides r, in which the result has to be accumulated.
2928(define_insn "<sup>dot_prod<vsi2qi>"
2929  [(set (match_operand:VCVTI 0 "register_operand" "=w")
2930          (plus:VCVTI
2931            (unspec:VCVTI [(match_operand:<VSI2QI> 1 "register_operand" "w")
2932                               (match_operand:<VSI2QI> 2 "register_operand" "w")]
2933                               DOTPROD)
2934            (match_operand:VCVTI 3 "register_operand" "0")))]
2935  "TARGET_DOTPROD"
2936  "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2937  [(set_attr "type" "neon_dot<q>")]
2938)
2939
2940;; These instructions map to the __builtins for the Dot Product operations
2941(define_expand "neon_<sup>dot<vsi2qi>"
2942  [(set (match_operand:VCVTI 0 "register_operand" "=w")
2943          (plus:VCVTI
2944            (unspec:VCVTI [(match_operand:<VSI2QI> 2 "register_operand")
2945                               (match_operand:<VSI2QI> 3 "register_operand")]
2946                               DOTPROD)
2947            (match_operand:VCVTI 1 "register_operand")))]
2948  "TARGET_DOTPROD"
2949)
2950
2951;; These instructions map to the __builtins for the Dot Product operations.
2952(define_insn "neon_usdot<vsi2qi>"
2953  [(set (match_operand:VCVTI 0 "register_operand" "=w")
2954          (plus:VCVTI
2955            (unspec:VCVTI
2956              [(match_operand:<VSI2QI> 2 "register_operand" "w")
2957              (match_operand:<VSI2QI> 3 "register_operand" "w")]
2958              UNSPEC_DOT_US)
2959            (match_operand:VCVTI 1 "register_operand" "0")))]
2960  "TARGET_I8MM"
2961  "vusdot.s8\\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2962  [(set_attr "type" "neon_dot<q>")]
2963)
2964
2965;; These instructions map to the __builtins for the Dot Product
2966;; indexed operations.
2967(define_insn "neon_<sup>dot_lane<vsi2qi>"
2968  [(set (match_operand:VCVTI 0 "register_operand" "=w")
2969          (plus:VCVTI
2970            (unspec:VCVTI [(match_operand:<VSI2QI> 2 "register_operand" "w")
2971                               (match_operand:V8QI 3 "register_operand" "t")
2972                               (match_operand:SI 4 "immediate_operand" "i")]
2973                               DOTPROD)
2974            (match_operand:VCVTI 1 "register_operand" "0")))]
2975  "TARGET_DOTPROD"
2976  "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]";
2977  [(set_attr "type" "neon_dot<q>")]
2978)
2979
2980;; These instructions map to the __builtins for the Dot Product
2981;; indexed operations.
2982(define_insn "neon_<sup>dot_laneq<vsi2qi>"
2983  [(set (match_operand:VCVTI 0 "register_operand" "=w")
2984          (plus:VCVTI
2985            (unspec:VCVTI [(match_operand:<VSI2QI> 2 "register_operand" "w")
2986                               (match_operand:V16QI 3 "register_operand" "t")
2987                               (match_operand:SI 4 "immediate_operand" "i")]
2988                               DOTPROD)
2989            (match_operand:VCVTI 1 "register_operand" "0")))]
2990  "TARGET_DOTPROD"
2991  {
2992    int lane = INTVAL (operands[4]);
2993    if (lane > GET_MODE_NUNITS (V2SImode) - 1)
2994      {
2995          operands[4] = GEN_INT (lane - GET_MODE_NUNITS (V2SImode));
2996          return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %f3[%c4]";
2997      }
2998    else
2999      {
3000          operands[4] = GEN_INT (lane);
3001          return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %e3[%c4]";
3002      }
3003  }
3004  [(set_attr "type" "neon_dot<q>")]
3005)
3006
3007;; These instructions map to the __builtins for the Dot Product
3008;; indexed operations in the v8.6 I8MM extension.
3009(define_insn "neon_<sup>dot_lane<vsi2qi>"
3010  [(set (match_operand:VCVTI 0 "register_operand" "=w")
3011          (plus:VCVTI
3012            (unspec:VCVTI
3013             [(match_operand:<VSI2QI> 2 "register_operand" "w")
3014              (match_operand:V8QI 3 "register_operand" "t")
3015              (match_operand:SI 4 "immediate_operand" "i")]
3016              DOTPROD_I8MM)
3017            (match_operand:VCVTI 1 "register_operand" "0")))]
3018  "TARGET_I8MM"
3019  "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]"
3020  [(set_attr "type" "neon_dot<q>")]
3021)
3022
3023;; These instructions map to the __builtins for the Dot Product
3024;; indexed operations in the v8.6 I8MM extension.
3025(define_insn "neon_<sup>dot_laneq<vsi2qi>"
3026  [(set (match_operand:VCVTI 0 "register_operand" "=w")
3027          (plus:VCVTI
3028            (unspec:VCVTI [(match_operand:<VSI2QI> 2 "register_operand" "w")
3029                               (match_operand:V16QI 3 "register_operand" "t")
3030                               (match_operand:SI 4 "immediate_operand" "i")]
3031                               DOTPROD_I8MM)
3032            (match_operand:VCVTI 1 "register_operand" "0")))]
3033  "TARGET_I8MM"
3034  {
3035    int lane = INTVAL (operands[4]);
3036    if (lane > GET_MODE_NUNITS (V2SImode) - 1)
3037      {
3038          operands[4] = GEN_INT (lane - GET_MODE_NUNITS (V2SImode));
3039          return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %f3[%c4]";
3040      }
3041    else
3042      {
3043          operands[4] = GEN_INT (lane);
3044          return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %e3[%c4]";
3045      }
3046  }
3047  [(set_attr "type" "neon_dot<q>")]
3048)
3049
3050;; Auto-vectorizer pattern for usdot
3051(define_expand "usdot_prod<vsi2qi>"
3052  [(set (match_operand:VCVTI 0 "register_operand")
3053          (plus:VCVTI (unspec:VCVTI [(match_operand:<VSI2QI> 1
3054                                                                      "register_operand")
3055                                           (match_operand:<VSI2QI> 2
3056                                                                      "register_operand")]
3057                         UNSPEC_DOT_US)
3058                        (match_operand:VCVTI 3 "register_operand")))]
3059  "TARGET_I8MM"
3060)
3061
3062(define_expand "neon_copysignf<mode>"
3063  [(match_operand:VCVTF 0 "register_operand")
3064   (match_operand:VCVTF 1 "register_operand")
3065   (match_operand:VCVTF 2 "register_operand")]
3066  "TARGET_NEON"
3067  "{
3068     rtx v_bitmask_cast;
3069     rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
3070     rtx c = gen_int_mode (0x80000000, SImode);
3071
3072     emit_move_insn (v_bitmask,
3073                         gen_const_vec_duplicate (<VCVTF:V_cmp_result>mode, c));
3074     emit_move_insn (operands[0], operands[2]);
3075     v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
3076                                                     <VCVTF:V_cmp_result>mode, 0);
3077     emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
3078                                             operands[1]));
3079
3080     DONE;
3081  }"
3082)
3083
3084(define_insn "neon_vqneg<mode>"
3085  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3086          (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3087                          UNSPEC_VQNEG))]
3088  "TARGET_NEON"
3089  "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3090  [(set_attr "type" "neon_qneg<q>")]
3091)
3092
3093(define_insn "neon_vcls<mode>"
3094  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3095          (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3096                          UNSPEC_VCLS))]
3097  "TARGET_NEON"
3098  "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3099  [(set_attr "type" "neon_cls<q>")]
3100)
3101
3102(define_insn "neon_vclz<mode>"
3103  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3104        (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
3105  "TARGET_NEON"
3106  "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
3107  [(set_attr "type" "neon_cnt<q>")]
3108)
3109
3110(define_insn "popcount<mode>2"
3111  [(set (match_operand:VE 0 "s_register_operand" "=w")
3112        (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
3113  "TARGET_NEON"
3114  "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3115  [(set_attr "type" "neon_cnt<q>")]
3116)
3117
3118(define_expand "neon_vcnt<mode>"
3119  [(match_operand:VE 0 "s_register_operand")
3120   (match_operand:VE 1 "s_register_operand")]
3121  "TARGET_NEON"
3122{
3123  emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
3124  DONE;
3125})
3126
3127(define_insn "neon_vrecpe<mode>"
3128  [(set (match_operand:VH 0 "s_register_operand" "=w")
3129          (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")]
3130                       UNSPEC_VRECPE))]
3131  "TARGET_NEON_FP16INST"
3132  "vrecpe.f16\t%<V_reg>0, %<V_reg>1"
3133  [(set_attr "type" "neon_fp_recpe_s<q>")]
3134)
3135
3136(define_insn "neon_vrecpe<mode>"
3137  [(set (match_operand:V32 0 "s_register_operand" "=w")
3138          (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3139                    UNSPEC_VRECPE))]
3140  "TARGET_NEON"
3141  "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3142  [(set_attr "type" "neon_fp_recpe_s<q>")]
3143)
3144
3145(define_insn "neon_vrsqrte<mode>"
3146  [(set (match_operand:V32 0 "s_register_operand" "=w")
3147          (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3148                    UNSPEC_VRSQRTE))]
3149  "TARGET_NEON"
3150  "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3151  [(set_attr "type" "neon_fp_rsqrte_s<q>")]
3152)
3153
3154(define_expand "neon_vmvn<mode>"
3155  [(match_operand:VDQIW 0 "s_register_operand")
3156   (match_operand:VDQIW 1 "s_register_operand")]
3157  "TARGET_NEON"
3158{
3159  emit_insn (gen_one_cmpl<mode>2_neon (operands[0], operands[1]));
3160  DONE;
3161})
3162
3163(define_insn "neon_vget_lane<mode>_sext_internal"
3164  [(set (match_operand:SI 0 "s_register_operand" "=r")
3165          (sign_extend:SI
3166            (vec_select:<V_elem>
3167              (match_operand:VD 1 "s_register_operand" "w")
3168              (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3169  "TARGET_NEON"
3170{
3171  if (BYTES_BIG_ENDIAN)
3172    {
3173      int elt = INTVAL (operands[2]);
3174      elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3175      operands[2] = GEN_INT (elt);
3176    }
3177  return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
3178}
3179  [(set_attr "type" "neon_to_gp")]
3180)
3181
3182(define_insn "neon_vget_lane<mode>_zext_internal"
3183  [(set (match_operand:SI 0 "s_register_operand" "=r")
3184          (zero_extend:SI
3185            (vec_select:<V_elem>
3186              (match_operand:VD 1 "s_register_operand" "w")
3187              (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3188  "TARGET_NEON"
3189{
3190  if (BYTES_BIG_ENDIAN)
3191    {
3192      int elt = INTVAL (operands[2]);
3193      elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3194      operands[2] = GEN_INT (elt);
3195    }
3196  return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
3197}
3198  [(set_attr "type" "neon_to_gp")]
3199)
3200
3201(define_insn "neon_vget_lane<mode>_sext_internal"
3202  [(set (match_operand:SI 0 "s_register_operand" "=r")
3203          (sign_extend:SI
3204            (vec_select:<V_elem>
3205              (match_operand:VQ2 1 "s_register_operand" "w")
3206              (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3207  "TARGET_NEON"
3208{
3209  rtx ops[3];
3210  int regno = REGNO (operands[1]);
3211  unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3212  unsigned int elt = INTVAL (operands[2]);
3213  unsigned int elt_adj = elt % halfelts;
3214
3215  if (BYTES_BIG_ENDIAN)
3216    elt_adj = halfelts - 1 - elt_adj;
3217
3218  ops[0] = operands[0];
3219  ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3220  ops[2] = GEN_INT (elt_adj);
3221  output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
3222
3223  return "";
3224}
3225  [(set_attr "type" "neon_to_gp_q")]
3226)
3227
3228(define_insn "neon_vget_lane<mode>_zext_internal"
3229  [(set (match_operand:SI 0 "s_register_operand" "=r")
3230          (zero_extend:SI
3231            (vec_select:<V_elem>
3232              (match_operand:VQ2 1 "s_register_operand" "w")
3233              (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3234  "TARGET_NEON"
3235{
3236  rtx ops[3];
3237  int regno = REGNO (operands[1]);
3238  unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3239  unsigned int elt = INTVAL (operands[2]);
3240  unsigned int elt_adj = elt % halfelts;
3241
3242  if (BYTES_BIG_ENDIAN)
3243    elt_adj = halfelts - 1 - elt_adj;
3244
3245  ops[0] = operands[0];
3246  ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3247  ops[2] = GEN_INT (elt_adj);
3248  output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
3249
3250  return "";
3251}
3252  [(set_attr "type" "neon_to_gp_q")]
3253)
3254
3255(define_expand "neon_vget_lane<mode>"
3256  [(match_operand:<V_ext> 0 "s_register_operand")
3257   (match_operand:VDQW 1 "s_register_operand")
3258   (match_operand:SI 2 "immediate_operand")]
3259  "TARGET_NEON"
3260{
3261  if (BYTES_BIG_ENDIAN)
3262    {
3263      /* The intrinsics are defined in terms of a model where the
3264           element ordering in memory is vldm order, whereas the generic
3265           RTL is defined in terms of a model where the element ordering
3266           in memory is array order.  Convert the lane number to conform
3267           to this model.  */
3268      unsigned int elt = INTVAL (operands[2]);
3269      unsigned int reg_nelts
3270          = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3271      elt ^= reg_nelts - 1;
3272      operands[2] = GEN_INT (elt);
3273    }
3274
3275  if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3276    emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3277                                                            operands[2]));
3278  else
3279    emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0],
3280                                                                   operands[1],
3281                                                                   operands[2]));
3282  DONE;
3283})
3284
3285(define_expand "neon_vget_laneu<mode>"
3286  [(match_operand:<V_ext> 0 "s_register_operand")
3287   (match_operand:VDQIW 1 "s_register_operand")
3288   (match_operand:SI 2 "immediate_operand")]
3289  "TARGET_NEON"
3290{
3291  if (BYTES_BIG_ENDIAN)
3292    {
3293      /* The intrinsics are defined in terms of a model where the
3294           element ordering in memory is vldm order, whereas the generic
3295           RTL is defined in terms of a model where the element ordering
3296           in memory is array order.  Convert the lane number to conform
3297           to this model.  */
3298      unsigned int elt = INTVAL (operands[2]);
3299      unsigned int reg_nelts
3300          = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3301      elt ^= reg_nelts - 1;
3302      operands[2] = GEN_INT (elt);
3303    }
3304
3305  if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3306    emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3307                                                            operands[2]));
3308  else
3309    emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0],
3310                                                                   operands[1],
3311                                                                   operands[2]));
3312  DONE;
3313})
3314
3315(define_expand "neon_vget_lanedi"
3316  [(match_operand:DI 0 "s_register_operand")
3317   (match_operand:DI 1 "s_register_operand")
3318   (match_operand:SI 2 "immediate_operand")]
3319  "TARGET_NEON"
3320{
3321  emit_move_insn (operands[0], operands[1]);
3322  DONE;
3323})
3324
3325(define_expand "neon_vget_lanev2di"
3326  [(match_operand:DI 0 "s_register_operand")
3327   (match_operand:V2DI 1 "s_register_operand")
3328   (match_operand:SI 2 "immediate_operand")]
3329  "TARGET_NEON"
3330{
3331  int lane;
3332
3333if (BYTES_BIG_ENDIAN)
3334    {
3335      /* The intrinsics are defined in terms of a model where the
3336           element ordering in memory is vldm order, whereas the generic
3337           RTL is defined in terms of a model where the element ordering
3338           in memory is array order.  Convert the lane number to conform
3339           to this model.  */
3340      unsigned int elt = INTVAL (operands[2]);
3341      unsigned int reg_nelts = 2;
3342      elt ^= reg_nelts - 1;
3343      operands[2] = GEN_INT (elt);
3344    }
3345
3346  lane = INTVAL (operands[2]);
3347  gcc_assert ((lane ==0) || (lane == 1));
3348  emit_move_insn (operands[0], lane == 0
3349                                        ? gen_lowpart (DImode, operands[1])
3350                                        : gen_highpart (DImode, operands[1]));
3351  DONE;
3352})
3353
3354(define_expand "neon_vset_lane<mode>"
3355  [(match_operand:VDQ 0 "s_register_operand")
3356   (match_operand:<V_elem> 1 "s_register_operand")
3357   (match_operand:VDQ 2 "s_register_operand")
3358   (match_operand:SI 3 "immediate_operand")]
3359  "TARGET_NEON"
3360{
3361  unsigned int elt = INTVAL (operands[3]);
3362
3363  if (BYTES_BIG_ENDIAN)
3364    {
3365      unsigned int reg_nelts
3366          = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3367      elt ^= reg_nelts - 1;
3368    }
3369
3370  emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
3371                                         GEN_INT (1 << elt), operands[2]));
3372  DONE;
3373})
3374
3375; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
3376
3377(define_expand "neon_vset_lanedi"
3378  [(match_operand:DI 0 "s_register_operand")
3379   (match_operand:DI 1 "s_register_operand")
3380   (match_operand:DI 2 "s_register_operand")
3381   (match_operand:SI 3 "immediate_operand")]
3382  "TARGET_NEON"
3383{
3384  emit_move_insn (operands[0], operands[1]);
3385  DONE;
3386})
3387
3388(define_expand "neon_vcreate<mode>"
3389  [(match_operand:VD_RE 0 "s_register_operand")
3390   (match_operand:DI 1 "general_operand")]
3391  "TARGET_NEON"
3392{
3393  rtx src = gen_lowpart (<MODE>mode, operands[1]);
3394  emit_move_insn (operands[0], src);
3395  DONE;
3396})
3397
3398(define_insn "neon_vdup_n<mode>"
3399  [(set (match_operand:VX 0 "s_register_operand" "=w")
3400        (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
3401  "TARGET_NEON"
3402  "vdup.<V_sz_elem>\t%<V_reg>0, %1"
3403  [(set_attr "type" "neon_from_gp<q>")]
3404)
3405
3406(define_insn "neon_vdup_nv4hf"
3407  [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3408        (vec_duplicate:V4HF (match_operand:HF 1 "s_register_operand" "r")))]
3409  "TARGET_NEON"
3410  "vdup.16\t%P0, %1"
3411  [(set_attr "type" "neon_from_gp")]
3412)
3413
3414(define_insn "neon_vdup_nv8hf"
3415  [(set (match_operand:V8HF 0 "s_register_operand" "=w")
3416        (vec_duplicate:V8HF (match_operand:HF 1 "s_register_operand" "r")))]
3417  "TARGET_NEON"
3418  "vdup.16\t%q0, %1"
3419  [(set_attr "type" "neon_from_gp_q")]
3420)
3421
3422(define_insn "neon_vdup_nv4bf"
3423  [(set (match_operand:V4BF 0 "s_register_operand" "=w")
3424        (vec_duplicate:V4BF (match_operand:BF 1 "s_register_operand" "r")))]
3425  "TARGET_NEON"
3426  "vdup.16\t%P0, %1"
3427  [(set_attr "type" "neon_from_gp")]
3428)
3429
3430(define_insn "neon_vdup_nv8bf"
3431  [(set (match_operand:V8BF 0 "s_register_operand" "=w")
3432        (vec_duplicate:V8BF (match_operand:BF 1 "s_register_operand" "r")))]
3433  "TARGET_NEON"
3434  "vdup.16\t%q0, %1"
3435  [(set_attr "type" "neon_from_gp_q")]
3436)
3437
3438(define_insn "neon_vdup_n<mode>"
3439  [(set (match_operand:V32 0 "s_register_operand" "=w,w")
3440        (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
3441  "TARGET_NEON"
3442  "@
3443  vdup.<V_sz_elem>\t%<V_reg>0, %1
3444  vdup.<V_sz_elem>\t%<V_reg>0, %y1"
3445  [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
3446)
3447
3448(define_expand "neon_vdup_ndi"
3449  [(match_operand:DI 0 "s_register_operand")
3450   (match_operand:DI 1 "s_register_operand")]
3451  "TARGET_NEON"
3452{
3453  emit_move_insn (operands[0], operands[1]);
3454  DONE;
3455}
3456)
3457
3458(define_insn "neon_vdup_nv2di"
3459  [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
3460        (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
3461  "TARGET_NEON"
3462  "@
3463  vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
3464  vmov\t%e0, %P1\;vmov\t%f0, %P1"
3465  [(set_attr "length" "8")
3466   (set_attr "type" "multiple")]
3467)
3468
3469(define_insn "neon_vdup_lane<mode>_internal"
3470  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
3471          (vec_duplicate:VDQW
3472          (vec_select:<V_elem>
3473            (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3474            (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3475  "TARGET_NEON"
3476{
3477  if (BYTES_BIG_ENDIAN)
3478    {
3479      int elt = INTVAL (operands[2]);
3480      elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3481      operands[2] = GEN_INT (elt);
3482    }
3483  if (<Is_d_reg>)
3484    return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3485  else
3486    return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3487}
3488  [(set_attr "type" "neon_dup<q>")]
3489)
3490
3491(define_insn "neon_vdup_lane<mode>_internal"
3492 [(set (match_operand:VHFBF 0 "s_register_operand" "=w")
3493   (vec_duplicate:VHFBF
3494    (vec_select:<V_elem>
3495     (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3496     (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3497 "TARGET_NEON && (TARGET_FP16 || TARGET_BF16_SIMD)"
3498{
3499  if (BYTES_BIG_ENDIAN)
3500    {
3501      int elt = INTVAL (operands[2]);
3502      elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3503      operands[2] = GEN_INT (elt);
3504    }
3505  if (<Is_d_reg>)
3506    return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3507  else
3508    return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3509}
3510  [(set_attr "type" "neon_dup<q>")]
3511)
3512
3513(define_expand "neon_vdup_lane<mode>"
3514  [(match_operand:VDQW 0 "s_register_operand")
3515   (match_operand:<V_double_vector_mode> 1 "s_register_operand")
3516   (match_operand:SI 2 "immediate_operand")]
3517  "TARGET_NEON"
3518{
3519  if (BYTES_BIG_ENDIAN)
3520    {
3521      unsigned int elt = INTVAL (operands[2]);
3522      unsigned int reg_nelts
3523          = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3524      elt ^= reg_nelts - 1;
3525      operands[2] = GEN_INT (elt);
3526    }
3527    emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3528                                                  operands[2]));
3529    DONE;
3530})
3531
3532(define_expand "neon_vdup_lane<mode>"
3533  [(match_operand:VHFBF 0 "s_register_operand")
3534   (match_operand:<V_double_vector_mode> 1 "s_register_operand")
3535   (match_operand:SI 2 "immediate_operand")]
3536  "TARGET_NEON && (TARGET_FP16 || TARGET_BF16_SIMD)"
3537{
3538  if (BYTES_BIG_ENDIAN)
3539    {
3540      unsigned int elt = INTVAL (operands[2]);
3541      unsigned int reg_nelts
3542          = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3543      elt ^= reg_nelts - 1;
3544      operands[2] = GEN_INT (elt);
3545    }
3546  emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3547                                                            operands[2]));
3548  DONE;
3549})
3550
3551; Scalar index is ignored, since only zero is valid here.
3552(define_expand "neon_vdup_lanedi"
3553  [(match_operand:DI 0 "s_register_operand")
3554   (match_operand:DI 1 "s_register_operand")
3555   (match_operand:SI 2 "immediate_operand")]
3556  "TARGET_NEON"
3557{
3558  emit_move_insn (operands[0], operands[1]);
3559  DONE;
3560})
3561
3562; Likewise for v2di, as the DImode second operand has only a single element.
3563(define_expand "neon_vdup_lanev2di"
3564  [(match_operand:V2DI 0 "s_register_operand")
3565   (match_operand:DI 1 "s_register_operand")
3566   (match_operand:SI 2 "immediate_operand")]
3567  "TARGET_NEON"
3568{
3569  emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
3570  DONE;
3571})
3572
3573; Disabled before reload because we don't want combine doing something silly,
3574; but used by the post-reload expansion of neon_vcombine.
3575(define_insn "*neon_vswp<mode>"
3576  [(set (match_operand:VDQX 0 "s_register_operand" "+w")
3577          (match_operand:VDQX 1 "s_register_operand" "+w"))
3578   (set (match_dup 1) (match_dup 0))]
3579  "TARGET_NEON && reload_completed"
3580  "vswp\t%<V_reg>0, %<V_reg>1"
3581  [(set_attr "type" "neon_permute<q>")]
3582)
3583
3584;; In this insn, operand 1 should be low, and operand 2 the high part of the
3585;; dest vector.
3586;; FIXME: A different implementation of this builtin could make it much
3587;; more likely that we wouldn't actually need to output anything (we could make
3588;; it so that the reg allocator puts things in the right places magically
3589;; instead). Lack of subregs for vectors makes that tricky though, I think.
3590
3591(define_insn_and_split "neon_vcombine<mode>"
3592  [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
3593        (vec_concat:<V_DOUBLE>
3594            (match_operand:VDX 1 "s_register_operand" "w")
3595            (match_operand:VDX 2 "s_register_operand" "w")))]
3596  "TARGET_NEON"
3597  "#"
3598  "&& reload_completed"
3599  [(const_int 0)]
3600{
3601  neon_split_vcombine (operands);
3602  DONE;
3603}
3604[(set_attr "type" "multiple")]
3605)
3606
3607(define_expand "neon_vget_high<mode>"
3608  [(match_operand:<V_HALF> 0 "s_register_operand")
3609   (match_operand:VQXBF 1 "s_register_operand")]
3610  "TARGET_NEON"
3611{
3612  emit_move_insn (operands[0],
3613                      simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
3614                                               GET_MODE_SIZE (<V_HALF>mode)));
3615  DONE;
3616})
3617
3618(define_expand "neon_vget_low<mode>"
3619  [(match_operand:<V_HALF> 0 "s_register_operand")
3620   (match_operand:VQX 1 "s_register_operand")]
3621  "TARGET_NEON"
3622{
3623  emit_move_insn (operands[0],
3624                      simplify_gen_subreg (<V_HALF>mode, operands[1],
3625                                               <MODE>mode, 0));
3626  DONE;
3627})
3628
3629(define_insn "float<mode><V_cvtto>2"
3630  [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3631        (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3632  "TARGET_NEON && !flag_rounding_math"
3633  "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
3634  [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3635)
3636
3637(define_insn "floatuns<mode><V_cvtto>2"
3638  [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3639        (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3640  "TARGET_NEON && !flag_rounding_math"
3641  "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
3642  [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3643)
3644
3645(define_insn "fix_trunc<mode><V_cvtto>2"
3646  [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3647        (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3648  "TARGET_NEON"
3649  "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
3650  [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3651)
3652
3653(define_insn "fixuns_trunc<mode><V_cvtto>2"
3654  [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3655        (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3656  "TARGET_NEON"
3657  "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
3658  [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3659)
3660
3661(define_insn "neon_vcvt<sup><mode>"
3662  [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3663          (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")]
3664                                VCVT_US))]
3665  "TARGET_NEON"
3666  "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1"
3667  [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3668)
3669
3670(define_insn "neon_vcvt<sup><mode>"
3671  [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3672          (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")]
3673                                VCVT_US))]
3674  "TARGET_NEON"
3675  "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1"
3676  [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3677)
3678
3679(define_insn "neon_vcvtv4sfv4hf"
3680  [(set (match_operand:V4SF 0 "s_register_operand" "=w")
3681          (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
3682                                UNSPEC_VCVT))]
3683  "TARGET_NEON && TARGET_FP16"
3684  "vcvt.f32.f16\t%q0, %P1"
3685  [(set_attr "type" "neon_fp_cvt_widen_h")]
3686)
3687
3688(define_insn "neon_vcvtv4hfv4sf"
3689  [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3690          (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
3691                                UNSPEC_VCVT))]
3692  "TARGET_NEON && TARGET_FP16"
3693  "vcvt.f16.f32\t%P0, %q1"
3694  [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
3695)
3696
3697(define_insn "neon_vcvt<sup><mode>"
3698 [(set
3699   (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3700   (unspec:<VH_CVTTO>
3701    [(match_operand:VCVTHI 1 "s_register_operand" "w")]
3702    VCVT_US))]
3703 "TARGET_NEON_FP16INST"
3704 "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1"
3705  [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
3706)
3707
3708(define_insn "neon_vcvt<sup><mode>"
3709 [(set
3710   (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3711   (unspec:<VH_CVTTO>
3712    [(match_operand:VH 1 "s_register_operand" "w")]
3713    VCVT_US))]
3714 "TARGET_NEON_FP16INST"
3715 "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
3716  [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
3717)
3718
3719(define_insn "neon_vcvt<sup>_n<mode>"
3720  [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3721          (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
3722                                 (match_operand:SI 2 "immediate_operand" "i")]
3723                                VCVT_US_N))]
3724  "TARGET_NEON"
3725{
3726  arm_const_bounds (operands[2], 1, 33);
3727  return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
3728}
3729  [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3730)
3731
3732(define_insn "neon_vcvt<sup>_n<mode>"
3733 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3734   (unspec:<VH_CVTTO>
3735    [(match_operand:VH 1 "s_register_operand" "w")
3736     (match_operand:SI 2 "immediate_operand" "i")]
3737    VCVT_US_N))]
3738  "TARGET_NEON_FP16INST"
3739{
3740  arm_const_bounds (operands[2], 0, 17);
3741  return "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1, %2";
3742}
3743 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
3744)
3745
3746(define_insn "neon_vcvt<sup>_n<mode>"
3747  [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3748          (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
3749                                 (match_operand:SI 2 "immediate_operand" "i")]
3750                                VCVT_US_N))]
3751  "TARGET_NEON"
3752{
3753  arm_const_bounds (operands[2], 1, 33);
3754  return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2";
3755}
3756  [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3757)
3758
3759(define_insn "neon_vcvt<sup>_n<mode>"
3760 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3761   (unspec:<VH_CVTTO>
3762    [(match_operand:VCVTHI 1 "s_register_operand" "w")
3763     (match_operand:SI 2 "immediate_operand" "i")]
3764    VCVT_US_N))]
3765 "TARGET_NEON_FP16INST"
3766{
3767  arm_const_bounds (operands[2], 0, 17);
3768  return "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1, %2";
3769}
3770 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
3771)
3772
3773(define_insn "neon_vcvt<vcvth_op><sup><mode>"
3774 [(set
3775   (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3776   (unspec:<VH_CVTTO>
3777    [(match_operand:VH 1 "s_register_operand" "w")]
3778    VCVT_HF_US))]
3779 "TARGET_NEON_FP16INST"
3780 "vcvt<vcvth_op>.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
3781  [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
3782)
3783
3784(define_insn "neon_vmovn<mode>"
3785  [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3786          (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3787                           UNSPEC_VMOVN))]
3788  "TARGET_NEON"
3789  "vmovn.<V_if_elem>\t%P0, %q1"
3790  [(set_attr "type" "neon_shift_imm_narrow_q")]
3791)
3792
3793(define_insn "neon_vqmovn<sup><mode>"
3794  [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3795          (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3796                           VQMOVN))]
3797  "TARGET_NEON"
3798  "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1"
3799  [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3800)
3801
3802(define_insn "neon_vqmovun<mode>"
3803  [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3804          (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3805                           UNSPEC_VQMOVUN))]
3806  "TARGET_NEON"
3807  "vqmovun.<V_s_elem>\t%P0, %q1"
3808  [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3809)
3810
3811(define_insn "neon_vmovl<sup><mode>"
3812  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3813          (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")]
3814                          VMOVL))]
3815  "TARGET_NEON"
3816  "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1"
3817  [(set_attr "type" "neon_shift_imm_long")]
3818)
3819
3820(define_insn "neon_vmul_lane<mode>"
3821  [(set (match_operand:VMD 0 "s_register_operand" "=w")
3822          (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
3823                         (match_operand:VMD 2 "s_register_operand"
3824                                        "<scalar_mul_constraint>")
3825                     (match_operand:SI 3 "immediate_operand" "i")]
3826                    UNSPEC_VMUL_LANE))]
3827  "TARGET_NEON"
3828{
3829  return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
3830}
3831  [(set (attr "type")
3832     (if_then_else (match_test "<Is_float_mode>")
3833                   (const_string "neon_fp_mul_s_scalar<q>")
3834                   (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3835)
3836
3837(define_insn "neon_vmul_lane<mode>"
3838  [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3839          (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
3840                         (match_operand:<V_HALF> 2 "s_register_operand"
3841                                             "<scalar_mul_constraint>")
3842                     (match_operand:SI 3 "immediate_operand" "i")]
3843                    UNSPEC_VMUL_LANE))]
3844  "TARGET_NEON"
3845{
3846  return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
3847}
3848  [(set (attr "type")
3849     (if_then_else (match_test "<Is_float_mode>")
3850                   (const_string "neon_fp_mul_s_scalar<q>")
3851                   (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3852)
3853
3854(define_insn "neon_vmul_lane<mode>"
3855  [(set (match_operand:VH 0 "s_register_operand" "=w")
3856          (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3857                        (match_operand:V4HF 2 "s_register_operand"
3858                         "<scalar_mul_constraint>")
3859                         (match_operand:SI 3 "immediate_operand" "i")]
3860                         UNSPEC_VMUL_LANE))]
3861  "TARGET_NEON_FP16INST"
3862  "vmul.f16\t%<V_reg>0, %<V_reg>1, %P2[%c3]"
3863  [(set_attr "type" "neon_fp_mul_s_scalar<q>")]
3864)
3865
3866(define_insn "neon_vmull<sup>_lane<mode>"
3867  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3868          (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3869                               (match_operand:VMDI 2 "s_register_operand"
3870                                                         "<scalar_mul_constraint>")
3871                           (match_operand:SI 3 "immediate_operand" "i")]
3872                          VMULL_LANE))]
3873  "TARGET_NEON"
3874{
3875  return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
3876}
3877  [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
3878)
3879
3880(define_insn "neon_vqdmull_lane<mode>"
3881  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3882          (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3883                               (match_operand:VMDI 2 "s_register_operand"
3884                                                         "<scalar_mul_constraint>")
3885                           (match_operand:SI 3 "immediate_operand" "i")]
3886                          UNSPEC_VQDMULL_LANE))]
3887  "TARGET_NEON"
3888{
3889  return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
3890}
3891  [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
3892)
3893
3894(define_insn "neon_vq<r>dmulh_lane<mode>"
3895  [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3896          (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
3897                          (match_operand:<V_HALF> 2 "s_register_operand"
3898                                                        "<scalar_mul_constraint>")
3899                      (match_operand:SI 3 "immediate_operand" "i")]
3900                      VQDMULH_LANE))]
3901  "TARGET_NEON"
3902{
3903  return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]";
3904}
3905  [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3906)
3907
3908(define_insn "neon_vq<r>dmulh_lane<mode>"
3909  [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3910          (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
3911                          (match_operand:VMDI 2 "s_register_operand"
3912                                                    "<scalar_mul_constraint>")
3913                      (match_operand:SI 3 "immediate_operand" "i")]
3914                      VQDMULH_LANE))]
3915  "TARGET_NEON"
3916{
3917  return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]";
3918}
3919  [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3920)
3921
3922;; vqrdmlah_lane, vqrdmlsh_lane
3923(define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
3924  [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3925          (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "0")
3926                          (match_operand:VMQI 2 "s_register_operand" "w")
3927                          (match_operand:<V_HALF> 3 "s_register_operand"
3928                                                    "<scalar_mul_constraint>")
3929                          (match_operand:SI 4 "immediate_operand" "i")]
3930                         VQRDMLH_AS))]
3931  "TARGET_NEON_RDMA"
3932{
3933  return
3934   "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%q0, %q2, %P3[%c4]";
3935}
3936  [(set_attr "type" "neon_mla_<V_elem_ch>_scalar<q>")]
3937)
3938
3939(define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
3940  [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3941          (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "0")
3942                          (match_operand:VMDI 2 "s_register_operand" "w")
3943                          (match_operand:VMDI 3 "s_register_operand"
3944                                                    "<scalar_mul_constraint>")
3945                          (match_operand:SI 4 "immediate_operand" "i")]
3946                         VQRDMLH_AS))]
3947  "TARGET_NEON_RDMA"
3948{
3949  return
3950   "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%P0, %P2, %P3[%c4]";
3951}
3952  [(set_attr "type" "neon_mla_<V_elem_ch>_scalar")]
3953)
3954
3955(define_insn "neon_vmla_lane<mode>"
3956  [(set (match_operand:VMD 0 "s_register_operand" "=w")
3957          (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3958                         (match_operand:VMD 2 "s_register_operand" "w")
3959                     (match_operand:VMD 3 "s_register_operand"
3960                                                  "<scalar_mul_constraint>")
3961                     (match_operand:SI 4 "immediate_operand" "i")]
3962                     UNSPEC_VMLA_LANE))]
3963  "TARGET_NEON"
3964{
3965  return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3966}
3967  [(set (attr "type")
3968     (if_then_else (match_test "<Is_float_mode>")
3969                   (const_string "neon_fp_mla_s_scalar<q>")
3970                   (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3971)
3972
3973(define_insn "neon_vmla_lane<mode>"
3974  [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3975          (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3976                         (match_operand:VMQ 2 "s_register_operand" "w")
3977                     (match_operand:<V_HALF> 3 "s_register_operand"
3978                                                       "<scalar_mul_constraint>")
3979                     (match_operand:SI 4 "immediate_operand" "i")]
3980                     UNSPEC_VMLA_LANE))]
3981  "TARGET_NEON"
3982{
3983  return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3984}
3985  [(set (attr "type")
3986     (if_then_else (match_test "<Is_float_mode>")
3987                   (const_string "neon_fp_mla_s_scalar<q>")
3988                   (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3989)
3990
3991(define_insn "neon_vmlal<sup>_lane<mode>"
3992  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3993          (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3994                                 (match_operand:VMDI 2 "s_register_operand" "w")
3995                           (match_operand:VMDI 3 "s_register_operand"
3996                                                         "<scalar_mul_constraint>")
3997                           (match_operand:SI 4 "immediate_operand" "i")]
3998                          VMLAL_LANE))]
3999  "TARGET_NEON"
4000{
4001  return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
4002}
4003  [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
4004)
4005
4006(define_insn "neon_vqdmlal_lane<mode>"
4007  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4008          (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4009                                 (match_operand:VMDI 2 "s_register_operand" "w")
4010                           (match_operand:VMDI 3 "s_register_operand"
4011                                                         "<scalar_mul_constraint>")
4012                           (match_operand:SI 4 "immediate_operand" "i")]
4013                          UNSPEC_VQDMLAL_LANE))]
4014  "TARGET_NEON"
4015{
4016  return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4017}
4018  [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4019)
4020
4021(define_insn "neon_vmls_lane<mode>"
4022  [(set (match_operand:VMD 0 "s_register_operand" "=w")
4023          (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
4024                         (match_operand:VMD 2 "s_register_operand" "w")
4025                     (match_operand:VMD 3 "s_register_operand"
4026                                                  "<scalar_mul_constraint>")
4027                     (match_operand:SI 4 "immediate_operand" "i")]
4028                    UNSPEC_VMLS_LANE))]
4029  "TARGET_NEON"
4030{
4031  return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
4032}
4033  [(set (attr "type")
4034     (if_then_else (match_test "<Is_float_mode>")
4035                   (const_string "neon_fp_mla_s_scalar<q>")
4036                   (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4037)
4038
4039(define_insn "neon_vmls_lane<mode>"
4040  [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4041          (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
4042                         (match_operand:VMQ 2 "s_register_operand" "w")
4043                     (match_operand:<V_HALF> 3 "s_register_operand"
4044                                                       "<scalar_mul_constraint>")
4045                     (match_operand:SI 4 "immediate_operand" "i")]
4046                    UNSPEC_VMLS_LANE))]
4047  "TARGET_NEON"
4048{
4049  return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
4050}
4051  [(set (attr "type")
4052     (if_then_else (match_test "<Is_float_mode>")
4053                   (const_string "neon_fp_mla_s_scalar<q>")
4054                   (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4055)
4056
4057(define_insn "neon_vmlsl<sup>_lane<mode>"
4058  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4059          (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4060                                 (match_operand:VMDI 2 "s_register_operand" "w")
4061                           (match_operand:VMDI 3 "s_register_operand"
4062                                                         "<scalar_mul_constraint>")
4063                           (match_operand:SI 4 "immediate_operand" "i")]
4064                          VMLSL_LANE))]
4065  "TARGET_NEON"
4066{
4067  return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
4068}
4069  [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
4070)
4071
4072(define_insn "neon_vqdmlsl_lane<mode>"
4073  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4074          (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4075                                 (match_operand:VMDI 2 "s_register_operand" "w")
4076                           (match_operand:VMDI 3 "s_register_operand"
4077                                                         "<scalar_mul_constraint>")
4078                           (match_operand:SI 4 "immediate_operand" "i")]
4079                          UNSPEC_VQDMLSL_LANE))]
4080  "TARGET_NEON"
4081{
4082  return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4083}
4084  [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4085)
4086
4087; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
4088; core register into a temp register, then use a scalar taken from that. This
4089; isn't an optimal solution if e.g. the scalar has just been read from memory
4090; or extracted from another vector. The latter case it's currently better to
4091; use the "_lane" variant, and the former case can probably be implemented
4092; using vld1_lane, but that hasn't been done yet.
4093
4094(define_expand "neon_vmul_n<mode>"
4095  [(match_operand:VMD 0 "s_register_operand")
4096   (match_operand:VMD 1 "s_register_operand")
4097   (match_operand:<V_elem> 2 "s_register_operand")]
4098  "TARGET_NEON"
4099{
4100  rtx tmp = gen_reg_rtx (<MODE>mode);
4101  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4102  emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4103                                               const0_rtx));
4104  DONE;
4105})
4106
4107(define_expand "neon_vmul_n<mode>"
4108  [(match_operand:VMQ 0 "s_register_operand")
4109   (match_operand:VMQ 1 "s_register_operand")
4110   (match_operand:<V_elem> 2 "s_register_operand")]
4111  "TARGET_NEON"
4112{
4113  rtx tmp = gen_reg_rtx (<V_HALF>mode);
4114  emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4115  emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4116                                               const0_rtx));
4117  DONE;
4118})
4119
4120(define_expand "neon_vmul_n<mode>"
4121  [(match_operand:VH 0 "s_register_operand")
4122   (match_operand:VH 1 "s_register_operand")
4123   (match_operand:<V_elem> 2 "s_register_operand")]
4124  "TARGET_NEON_FP16INST"
4125{
4126  rtx tmp = gen_reg_rtx (V4HFmode);
4127  emit_insn (gen_neon_vset_lanev4hf (tmp, operands[2], tmp, const0_rtx));
4128  emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4129                                               const0_rtx));
4130  DONE;
4131})
4132
4133(define_expand "neon_vmulls_n<mode>"
4134  [(match_operand:<V_widen> 0 "s_register_operand")
4135   (match_operand:VMDI 1 "s_register_operand")
4136   (match_operand:<V_elem> 2 "s_register_operand")]
4137  "TARGET_NEON"
4138{
4139  rtx tmp = gen_reg_rtx (<MODE>mode);
4140  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4141  emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp,
4142                                                   const0_rtx));
4143  DONE;
4144})
4145
4146(define_expand "neon_vmullu_n<mode>"
4147  [(match_operand:<V_widen> 0 "s_register_operand")
4148   (match_operand:VMDI 1 "s_register_operand")
4149   (match_operand:<V_elem> 2 "s_register_operand")]
4150  "TARGET_NEON"
4151{
4152  rtx tmp = gen_reg_rtx (<MODE>mode);
4153  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4154  emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp,
4155                                                   const0_rtx));
4156  DONE;
4157})
4158
4159(define_expand "neon_vqdmull_n<mode>"
4160  [(match_operand:<V_widen> 0 "s_register_operand")
4161   (match_operand:VMDI 1 "s_register_operand")
4162   (match_operand:<V_elem> 2 "s_register_operand")]
4163  "TARGET_NEON"
4164{
4165  rtx tmp = gen_reg_rtx (<MODE>mode);
4166  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4167  emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
4168                                                  const0_rtx));
4169  DONE;
4170})
4171
4172(define_expand "neon_vqdmulh_n<mode>"
4173  [(match_operand:VMDI 0 "s_register_operand")
4174   (match_operand:VMDI 1 "s_register_operand")
4175   (match_operand:<V_elem> 2 "s_register_operand")]
4176  "TARGET_NEON"
4177{
4178  rtx tmp = gen_reg_rtx (<MODE>mode);
4179  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4180  emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4181                                                  const0_rtx));
4182  DONE;
4183})
4184
4185(define_expand "neon_vqrdmulh_n<mode>"
4186  [(match_operand:VMDI 0 "s_register_operand")
4187   (match_operand:VMDI 1 "s_register_operand")
4188   (match_operand:<V_elem> 2 "s_register_operand")]
4189  "TARGET_NEON"
4190{
4191  rtx tmp = gen_reg_rtx (<MODE>mode);
4192  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4193  emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4194                                                  const0_rtx));
4195  DONE;
4196})
4197
4198(define_expand "neon_vqdmulh_n<mode>"
4199  [(match_operand:VMQI 0 "s_register_operand")
4200   (match_operand:VMQI 1 "s_register_operand")
4201   (match_operand:<V_elem> 2 "s_register_operand")]
4202  "TARGET_NEON"
4203{
4204  rtx tmp = gen_reg_rtx (<V_HALF>mode);
4205  emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4206  emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4207                                                    const0_rtx));
4208  DONE;
4209})
4210
4211(define_expand "neon_vqrdmulh_n<mode>"
4212  [(match_operand:VMQI 0 "s_register_operand")
4213   (match_operand:VMQI 1 "s_register_operand")
4214   (match_operand:<V_elem> 2 "s_register_operand")]
4215  "TARGET_NEON"
4216{
4217  rtx tmp = gen_reg_rtx (<V_HALF>mode);
4218  emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4219  emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4220                                                     const0_rtx));
4221  DONE;
4222})
4223
4224(define_expand "neon_vmla_n<mode>"
4225  [(match_operand:VMD 0 "s_register_operand")
4226   (match_operand:VMD 1 "s_register_operand")
4227   (match_operand:VMD 2 "s_register_operand")
4228   (match_operand:<V_elem> 3 "s_register_operand")]
4229  "TARGET_NEON"
4230{
4231  rtx tmp = gen_reg_rtx (<MODE>mode);
4232  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4233  emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4234                                               tmp, const0_rtx));
4235  DONE;
4236})
4237
4238(define_expand "neon_vmla_n<mode>"
4239  [(match_operand:VMQ 0 "s_register_operand")
4240   (match_operand:VMQ 1 "s_register_operand")
4241   (match_operand:VMQ 2 "s_register_operand")
4242   (match_operand:<V_elem> 3 "s_register_operand")]
4243  "TARGET_NEON"
4244{
4245  rtx tmp = gen_reg_rtx (<V_HALF>mode);
4246  emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4247  emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4248                                               tmp, const0_rtx));
4249  DONE;
4250})
4251
4252(define_expand "neon_vmlals_n<mode>"
4253  [(match_operand:<V_widen> 0 "s_register_operand")
4254   (match_operand:<V_widen> 1 "s_register_operand")
4255   (match_operand:VMDI 2 "s_register_operand")
4256   (match_operand:<V_elem> 3 "s_register_operand")]
4257  "TARGET_NEON"
4258{
4259  rtx tmp = gen_reg_rtx (<MODE>mode);
4260  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4261  emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2],
4262                                                   tmp, const0_rtx));
4263  DONE;
4264})
4265
4266(define_expand "neon_vmlalu_n<mode>"
4267  [(match_operand:<V_widen> 0 "s_register_operand")
4268   (match_operand:<V_widen> 1 "s_register_operand")
4269   (match_operand:VMDI 2 "s_register_operand")
4270   (match_operand:<V_elem> 3 "s_register_operand")]
4271  "TARGET_NEON"
4272{
4273  rtx tmp = gen_reg_rtx (<MODE>mode);
4274  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4275  emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2],
4276                                                   tmp, const0_rtx));
4277  DONE;
4278})
4279
4280(define_expand "neon_vqdmlal_n<mode>"
4281  [(match_operand:<V_widen> 0 "s_register_operand")
4282   (match_operand:<V_widen> 1 "s_register_operand")
4283   (match_operand:VMDI 2 "s_register_operand")
4284   (match_operand:<V_elem> 3 "s_register_operand")]
4285  "TARGET_NEON"
4286{
4287  rtx tmp = gen_reg_rtx (<MODE>mode);
4288  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4289  emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
4290                                                    tmp, const0_rtx));
4291  DONE;
4292})
4293
4294(define_expand "neon_vmls_n<mode>"
4295  [(match_operand:VMD 0 "s_register_operand")
4296   (match_operand:VMD 1 "s_register_operand")
4297   (match_operand:VMD 2 "s_register_operand")
4298   (match_operand:<V_elem> 3 "s_register_operand")]
4299  "TARGET_NEON"
4300{
4301  rtx tmp = gen_reg_rtx (<MODE>mode);
4302  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4303  emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4304                                               tmp, const0_rtx));
4305  DONE;
4306})
4307
4308(define_expand "neon_vmls_n<mode>"
4309  [(match_operand:VMQ 0 "s_register_operand")
4310   (match_operand:VMQ 1 "s_register_operand")
4311   (match_operand:VMQ 2 "s_register_operand")
4312   (match_operand:<V_elem> 3 "s_register_operand")]
4313  "TARGET_NEON"
4314{
4315  rtx tmp = gen_reg_rtx (<V_HALF>mode);
4316  emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4317  emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4318                                               tmp, const0_rtx));
4319  DONE;
4320})
4321
4322(define_expand "neon_vmlsls_n<mode>"
4323  [(match_operand:<V_widen> 0 "s_register_operand")
4324   (match_operand:<V_widen> 1 "s_register_operand")
4325   (match_operand:VMDI 2 "s_register_operand")
4326   (match_operand:<V_elem> 3 "s_register_operand")]
4327  "TARGET_NEON"
4328{
4329  rtx tmp = gen_reg_rtx (<MODE>mode);
4330  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4331  emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2],
4332                                                  tmp, const0_rtx));
4333  DONE;
4334})
4335
4336(define_expand "neon_vmlslu_n<mode>"
4337  [(match_operand:<V_widen> 0 "s_register_operand")
4338   (match_operand:<V_widen> 1 "s_register_operand")
4339   (match_operand:VMDI 2 "s_register_operand")
4340   (match_operand:<V_elem> 3 "s_register_operand")]
4341  "TARGET_NEON"
4342{
4343  rtx tmp = gen_reg_rtx (<MODE>mode);
4344  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4345  emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2],
4346                                                  tmp, const0_rtx));
4347  DONE;
4348})
4349
4350(define_expand "neon_vqdmlsl_n<mode>"
4351  [(match_operand:<V_widen> 0 "s_register_operand")
4352   (match_operand:<V_widen> 1 "s_register_operand")
4353   (match_operand:VMDI 2 "s_register_operand")
4354   (match_operand:<V_elem> 3 "s_register_operand")]
4355  "TARGET_NEON"
4356{
4357  rtx tmp = gen_reg_rtx (<MODE>mode);
4358  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4359  emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
4360                                                    tmp, const0_rtx));
4361  DONE;
4362})
4363
4364(define_insn "@neon_vext<mode>"
4365  [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4366          (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
4367                          (match_operand:VDQX 2 "s_register_operand" "w")
4368                      (match_operand:SI 3 "immediate_operand" "i")]
4369                     UNSPEC_VEXT))]
4370  "TARGET_NEON"
4371{
4372  arm_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
4373  return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
4374}
4375  [(set_attr "type" "neon_ext<q>")]
4376)
4377
4378(define_insn "@neon_vrev64<mode>"
4379  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
4380          (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")]
4381                    UNSPEC_VREV64))]
4382  "TARGET_NEON"
4383  "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4384  [(set_attr "type" "neon_rev<q>")]
4385)
4386
4387(define_insn "@neon_vrev32<mode>"
4388  [(set (match_operand:VX 0 "s_register_operand" "=w")
4389          (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")]
4390                   UNSPEC_VREV32))]
4391  "TARGET_NEON"
4392  "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4393  [(set_attr "type" "neon_rev<q>")]
4394)
4395
4396(define_insn "@neon_vrev16<mode>"
4397  [(set (match_operand:VE 0 "s_register_operand" "=w")
4398          (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")]
4399                   UNSPEC_VREV16))]
4400  "TARGET_NEON"
4401  "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4402  [(set_attr "type" "neon_rev<q>")]
4403)
4404
4405; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
4406; allocation. For an intrinsic of form:
4407;   rD = vbsl_* (rS, rN, rM)
4408; We can use any of:
4409;   vbsl rS, rN, rM  (if D = S)
4410;   vbit rD, rN, rS  (if D = M, so 1-bits in rS choose bits from rN, else rM)
4411;   vbif rD, rM, rS  (if D = N, so 0-bits in rS choose bits from rM, else rN)
4412
4413(define_insn "neon_vbsl<mode>_internal"
4414  [(set (match_operand:VDQX 0 "s_register_operand"                     "=w,w,w")
4415          (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
4416                          (match_operand:VDQX 2 "s_register_operand" " w,w,0")
4417                      (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
4418                     UNSPEC_VBSL))]
4419  "TARGET_NEON"
4420  "@
4421  vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
4422  vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
4423  vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
4424  [(set_attr "type" "neon_bsl<q>")]
4425)
4426
4427(define_expand "@neon_vbsl<mode>"
4428  [(set (match_operand:VDQX 0 "s_register_operand")
4429        (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand")
4430                      (match_operand:VDQX 2 "s_register_operand")
4431                      (match_operand:VDQX 3 "s_register_operand")]
4432                     UNSPEC_VBSL))]
4433  "TARGET_NEON"
4434{
4435  /* We can't alias operands together if they have different modes.  */
4436  operands[1] = gen_lowpart (<MODE>mode, operands[1]);
4437})
4438
4439;; vshl, vrshl
4440(define_insn "neon_v<shift_op><sup><mode>"
4441  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4442          (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4443                           (match_operand:VDQIX 2 "s_register_operand" "w")]
4444                      VSHL))]
4445  "TARGET_NEON"
4446  "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4447  [(set_attr "type" "neon_shift_imm<q>")]
4448)
4449
4450;; vqshl, vqrshl
4451(define_insn "neon_v<shift_op><sup><mode>"
4452  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4453          (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4454                           (match_operand:VDQIX 2 "s_register_operand" "w")]
4455                      VQSHL))]
4456  "TARGET_NEON"
4457  "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4458  [(set_attr "type" "neon_sat_shift_imm<q>")]
4459)
4460
4461;; vshr_n, vrshr_n
4462(define_insn "neon_v<shift_op><sup>_n<mode>"
4463  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4464          (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4465                           (match_operand:SI 2 "immediate_operand" "i")]
4466                      VSHR_N))]
4467  "TARGET_NEON"
4468{
4469  arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
4470  return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4471}
4472  [(set_attr "type" "neon_shift_imm<q>")]
4473)
4474
4475;; vshrn_n, vrshrn_n
4476(define_insn "neon_v<shift_op>_n<mode>"
4477  [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4478          (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4479                                  (match_operand:SI 2 "immediate_operand" "i")]
4480                           VSHRN_N))]
4481  "TARGET_NEON"
4482{
4483  arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4484  return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2";
4485}
4486  [(set_attr "type" "neon_shift_imm_narrow_q")]
4487)
4488
4489;; vqshrn_n, vqrshrn_n
4490(define_insn "neon_v<shift_op><sup>_n<mode>"
4491  [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4492          (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4493                                  (match_operand:SI 2 "immediate_operand" "i")]
4494                           VQSHRN_N))]
4495  "TARGET_NEON"
4496{
4497  arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4498  return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2";
4499}
4500  [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4501)
4502
4503;; vqshrun_n, vqrshrun_n
4504(define_insn "neon_v<shift_op>_n<mode>"
4505  [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4506          (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4507                                  (match_operand:SI 2 "immediate_operand" "i")]
4508                           VQSHRUN_N))]
4509  "TARGET_NEON"
4510{
4511  arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4512  return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2";
4513}
4514  [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4515)
4516
4517(define_insn "neon_vshl_n<mode>"
4518  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4519          (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4520                           (match_operand:SI 2 "immediate_operand" "i")]
4521                      UNSPEC_VSHL_N))]
4522  "TARGET_NEON"
4523{
4524  arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4525  return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
4526}
4527  [(set_attr "type" "neon_shift_imm<q>")]
4528)
4529
4530(define_insn "neon_vqshl_<sup>_n<mode>"
4531  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4532          (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4533                           (match_operand:SI 2 "immediate_operand" "i")]
4534                      VQSHL_N))]
4535  "TARGET_NEON"
4536{
4537  arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4538  return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4539}
4540  [(set_attr "type" "neon_sat_shift_imm<q>")]
4541)
4542
4543(define_insn "neon_vqshlu_n<mode>"
4544  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4545          (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4546                           (match_operand:SI 2 "immediate_operand" "i")]
4547                      UNSPEC_VQSHLU_N))]
4548  "TARGET_NEON"
4549{
4550  arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4551  return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2";
4552}
4553  [(set_attr "type" "neon_sat_shift_imm<q>")]
4554)
4555
4556(define_insn "neon_vshll<sup>_n<mode>"
4557  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4558          (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
4559                                 (match_operand:SI 2 "immediate_operand" "i")]
4560                                VSHLL_N))]
4561  "TARGET_NEON"
4562{
4563  /* The boundaries are: 0 < imm <= size.  */
4564  arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
4565  return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2";
4566}
4567  [(set_attr "type" "neon_shift_imm_long")]
4568)
4569
4570;; vsra_n, vrsra_n
4571(define_insn "neon_v<shift_op><sup>_n<mode>"
4572  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4573          (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4574                           (match_operand:VDQIX 2 "s_register_operand" "w")
4575                       (match_operand:SI 3 "immediate_operand" "i")]
4576                      VSRA_N))]
4577  "TARGET_NEON"
4578{
4579  arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
4580  return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4581}
4582  [(set_attr "type" "neon_shift_acc<q>")]
4583)
4584
4585(define_insn "neon_vsri_n<mode>"
4586  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4587          (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4588                 (match_operand:VDQIX 2 "s_register_operand" "w")
4589                       (match_operand:SI 3 "immediate_operand" "i")]
4590                      UNSPEC_VSRI))]
4591  "TARGET_NEON"
4592{
4593  arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
4594  return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4595}
4596  [(set_attr "type" "neon_shift_reg<q>")]
4597)
4598
4599(define_insn "neon_vsli_n<mode>"
4600  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4601          (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4602                 (match_operand:VDQIX 2 "s_register_operand" "w")
4603                       (match_operand:SI 3 "immediate_operand" "i")]
4604                      UNSPEC_VSLI))]
4605  "TARGET_NEON"
4606{
4607  arm_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
4608  return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4609}
4610  [(set_attr "type" "neon_shift_reg<q>")]
4611)
4612
4613(define_insn "neon_vtbl1v8qi"
4614  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4615          (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
4616                          (match_operand:V8QI 2 "s_register_operand" "w")]
4617                     UNSPEC_VTBL))]
4618  "TARGET_NEON"
4619  "vtbl.8\t%P0, {%P1}, %P2"
4620  [(set_attr "type" "neon_tbl1")]
4621)
4622
4623(define_insn "neon_vtbl2v8qi"
4624  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4625          (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
4626                          (match_operand:V8QI 2 "s_register_operand" "w")]
4627                     UNSPEC_VTBL))]
4628  "TARGET_NEON"
4629{
4630  rtx ops[4];
4631  int tabbase = REGNO (operands[1]);
4632
4633  ops[0] = operands[0];
4634  ops[1] = gen_rtx_REG (V8QImode, tabbase);
4635  ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4636  ops[3] = operands[2];
4637  output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
4638
4639  return "";
4640}
4641  [(set_attr "type" "neon_tbl2")]
4642)
4643
4644(define_insn "neon_vtbl3v8qi"
4645  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4646          (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
4647                          (match_operand:V8QI 2 "s_register_operand" "w")]
4648                     UNSPEC_VTBL))]
4649  "TARGET_NEON"
4650{
4651  rtx ops[5];
4652  int tabbase = REGNO (operands[1]);
4653
4654  ops[0] = operands[0];
4655  ops[1] = gen_rtx_REG (V8QImode, tabbase);
4656  ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4657  ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4658  ops[4] = operands[2];
4659  output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4660
4661  return "";
4662}
4663  [(set_attr "type" "neon_tbl3")]
4664)
4665
4666(define_insn "neon_vtbl4v8qi"
4667  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4668          (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
4669                          (match_operand:V8QI 2 "s_register_operand" "w")]
4670                     UNSPEC_VTBL))]
4671  "TARGET_NEON"
4672{
4673  rtx ops[6];
4674  int tabbase = REGNO (operands[1]);
4675
4676  ops[0] = operands[0];
4677  ops[1] = gen_rtx_REG (V8QImode, tabbase);
4678  ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4679  ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4680  ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4681  ops[5] = operands[2];
4682  output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4683
4684  return "";
4685}
4686  [(set_attr "type" "neon_tbl4")]
4687)
4688
4689;; These three are used by the vec_perm infrastructure for V16QImode.
4690(define_insn_and_split "neon_vtbl1v16qi"
4691  [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
4692          (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
4693                           (match_operand:V16QI 2 "s_register_operand" "w")]
4694                          UNSPEC_VTBL))]
4695  "TARGET_NEON"
4696  "#"
4697  "&& reload_completed"
4698  [(const_int 0)]
4699{
4700  rtx op0, op1, op2, part0, part2;
4701  unsigned ofs;
4702
4703  op0 = operands[0];
4704  op1 = gen_lowpart (TImode, operands[1]);
4705  op2 = operands[2];
4706
4707  ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4708  part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4709  part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4710  emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4711
4712  ofs = subreg_highpart_offset (V8QImode, V16QImode);
4713  part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4714  part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4715  emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4716  DONE;
4717}
4718  [(set_attr "type" "multiple")]
4719)
4720
4721(define_insn_and_split "neon_vtbl2v16qi"
4722  [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
4723          (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
4724                           (match_operand:V16QI 2 "s_register_operand" "w")]
4725                          UNSPEC_VTBL))]
4726  "TARGET_NEON"
4727  "#"
4728  "&& reload_completed"
4729  [(const_int 0)]
4730{
4731  rtx op0, op1, op2, part0, part2;
4732  unsigned ofs;
4733
4734  op0 = operands[0];
4735  op1 = operands[1];
4736  op2 = operands[2];
4737
4738  ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4739  part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4740  part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4741  emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4742
4743  ofs = subreg_highpart_offset (V8QImode, V16QImode);
4744  part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4745  part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4746  emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4747  DONE;
4748}
4749  [(set_attr "type" "multiple")]
4750)
4751
4752;; ??? Logically we should extend the regular neon_vcombine pattern to
4753;; handle quad-word input modes, producing octa-word output modes.  But
4754;; that requires us to add support for octa-word vector modes in moves.
4755;; That seems overkill for this one use in vec_perm.
4756(define_insn_and_split "neon_vcombinev16qi"
4757  [(set (match_operand:OI 0 "s_register_operand" "=w")
4758          (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
4759                        (match_operand:V16QI 2 "s_register_operand" "w")]
4760                       UNSPEC_VCONCAT))]
4761  "TARGET_NEON"
4762  "#"
4763  "&& reload_completed"
4764  [(const_int 0)]
4765{
4766  neon_split_vcombine (operands);
4767  DONE;
4768}
4769[(set_attr "type" "multiple")]
4770)
4771
4772(define_insn "neon_vtbx1v8qi"
4773  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4774          (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4775                          (match_operand:V8QI 2 "s_register_operand" "w")
4776                          (match_operand:V8QI 3 "s_register_operand" "w")]
4777                     UNSPEC_VTBX))]
4778  "TARGET_NEON"
4779  "vtbx.8\t%P0, {%P2}, %P3"
4780  [(set_attr "type" "neon_tbl1")]
4781)
4782
4783(define_insn "neon_vtbx2v8qi"
4784  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4785          (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4786                          (match_operand:TI 2 "s_register_operand" "w")
4787                          (match_operand:V8QI 3 "s_register_operand" "w")]
4788                     UNSPEC_VTBX))]
4789  "TARGET_NEON"
4790{
4791  rtx ops[4];
4792  int tabbase = REGNO (operands[2]);
4793
4794  ops[0] = operands[0];
4795  ops[1] = gen_rtx_REG (V8QImode, tabbase);
4796  ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4797  ops[3] = operands[3];
4798  output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
4799
4800  return "";
4801}
4802  [(set_attr "type" "neon_tbl2")]
4803)
4804
4805(define_insn "neon_vtbx3v8qi"
4806  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4807          (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4808                          (match_operand:EI 2 "s_register_operand" "w")
4809                          (match_operand:V8QI 3 "s_register_operand" "w")]
4810                     UNSPEC_VTBX))]
4811  "TARGET_NEON"
4812{
4813  rtx ops[5];
4814  int tabbase = REGNO (operands[2]);
4815
4816  ops[0] = operands[0];
4817  ops[1] = gen_rtx_REG (V8QImode, tabbase);
4818  ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4819  ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4820  ops[4] = operands[3];
4821  output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4822
4823  return "";
4824}
4825  [(set_attr "type" "neon_tbl3")]
4826)
4827
4828(define_insn "neon_vtbx4v8qi"
4829  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4830          (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4831                          (match_operand:OI 2 "s_register_operand" "w")
4832                          (match_operand:V8QI 3 "s_register_operand" "w")]
4833                     UNSPEC_VTBX))]
4834  "TARGET_NEON"
4835{
4836  rtx ops[6];
4837  int tabbase = REGNO (operands[2]);
4838
4839  ops[0] = operands[0];
4840  ops[1] = gen_rtx_REG (V8QImode, tabbase);
4841  ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4842  ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4843  ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4844  ops[5] = operands[3];
4845  output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4846
4847  return "";
4848}
4849  [(set_attr "type" "neon_tbl4")]
4850)
4851
4852(define_expand "@neon_vtrn<mode>_internal"
4853  [(parallel
4854    [(set (match_operand:VDQWH 0 "s_register_operand")
4855            (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
4856                               (match_operand:VDQWH 2 "s_register_operand")]
4857             UNSPEC_VTRN1))
4858     (set (match_operand:VDQWH 3 "s_register_operand")
4859            (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
4860  "TARGET_NEON"
4861  ""
4862)
4863
4864;; Note: Different operand numbering to handle tied registers correctly.
4865(define_insn "*neon_vtrn<mode>_insn"
4866  [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
4867          (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
4868                           (match_operand:VDQWH 3 "s_register_operand" "2")]
4869           UNSPEC_VTRN1))
4870   (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
4871          (unspec:VDQWH [(match_dup 1) (match_dup 3)]
4872           UNSPEC_VTRN2))]
4873  "TARGET_NEON"
4874  "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4875  [(set_attr "type" "neon_permute<q>")]
4876)
4877
4878(define_expand "@neon_vzip<mode>_internal"
4879  [(parallel
4880    [(set (match_operand:VDQWH 0 "s_register_operand")
4881            (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
4882                               (match_operand:VDQWH 2 "s_register_operand")]
4883             UNSPEC_VZIP1))
4884    (set (match_operand:VDQWH 3 "s_register_operand")
4885           (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
4886  "TARGET_NEON"
4887  ""
4888)
4889
4890;; Note: Different operand numbering to handle tied registers correctly.
4891(define_insn "*neon_vzip<mode>_insn"
4892  [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
4893          (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
4894                           (match_operand:VDQWH 3 "s_register_operand" "2")]
4895           UNSPEC_VZIP1))
4896   (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
4897          (unspec:VDQWH [(match_dup 1) (match_dup 3)]
4898           UNSPEC_VZIP2))]
4899  "TARGET_NEON"
4900  "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4901  [(set_attr "type" "neon_zip<q>")]
4902)
4903
4904(define_expand "@neon_vuzp<mode>_internal"
4905  [(parallel
4906    [(set (match_operand:VDQWH 0 "s_register_operand")
4907            (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
4908                              (match_operand:VDQWH 2 "s_register_operand")]
4909             UNSPEC_VUZP1))
4910     (set (match_operand:VDQWH 3 "s_register_operand")
4911            (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
4912  "TARGET_NEON"
4913  ""
4914)
4915
4916;; Note: Different operand numbering to handle tied registers correctly.
4917(define_insn "*neon_vuzp<mode>_insn"
4918  [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
4919          (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
4920                           (match_operand:VDQWH 3 "s_register_operand" "2")]
4921           UNSPEC_VUZP1))
4922   (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
4923          (unspec:VDQWH [(match_dup 1) (match_dup 3)]
4924           UNSPEC_VUZP2))]
4925  "TARGET_NEON"
4926  "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4927  [(set_attr "type" "neon_zip<q>")]
4928)
4929
4930(define_expand "vec_load_lanes<mode><mode>"
4931  [(set (match_operand:VDQX 0 "s_register_operand")
4932        (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
4933                     UNSPEC_VLD1))]
4934  "TARGET_NEON")
4935
4936(define_insn "neon_vld1<mode>"
4937  [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4938        (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
4939                    UNSPEC_VLD1))]
4940  "TARGET_NEON"
4941  "vld1.<V_sz_elem>\t%h0, %A1"
4942  [(set_attr "type" "neon_load1_1reg<q>")]
4943)
4944
4945;; The lane numbers in the RTL are in GCC lane order, having been flipped
4946;; in arm_expand_neon_args. The lane numbers are restored to architectural
4947;; lane order here.
4948(define_insn "neon_vld1_lane<mode>"
4949  [(set (match_operand:VDX 0 "s_register_operand" "=w")
4950        (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4951                     (match_operand:VDX 2 "s_register_operand" "0")
4952                     (match_operand:SI 3 "immediate_operand" "i")]
4953                    UNSPEC_VLD1_LANE))]
4954  "TARGET_NEON"
4955{
4956  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4957  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4958  operands[3] = GEN_INT (lane);
4959  if (max == 1)
4960    return "vld1.<V_sz_elem>\t%P0, %A1";
4961  else
4962    return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4963}
4964  [(set_attr "type" "neon_load1_one_lane<q>")]
4965)
4966
4967;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4968;; here on big endian targets.
4969(define_insn "neon_vld1_lane<mode>"
4970  [(set (match_operand:VQX 0 "s_register_operand" "=w")
4971        (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4972                     (match_operand:VQX 2 "s_register_operand" "0")
4973                     (match_operand:SI 3 "immediate_operand" "i")]
4974                    UNSPEC_VLD1_LANE))]
4975  "TARGET_NEON"
4976{
4977  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4978  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4979  operands[3] = GEN_INT (lane);
4980  int regno = REGNO (operands[0]);
4981  if (lane >= max / 2)
4982    {
4983      lane -= max / 2;
4984      regno += 2;
4985      operands[3] = GEN_INT (lane);
4986    }
4987  operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
4988  if (max == 2)
4989    return "vld1.<V_sz_elem>\t%P0, %A1";
4990  else
4991    return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4992}
4993  [(set_attr "type" "neon_load1_one_lane<q>")]
4994)
4995
4996(define_insn "neon_vld1_dup<mode>"
4997  [(set (match_operand:VD_LANE 0 "s_register_operand" "=w")
4998        (vec_duplicate:VD_LANE (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4999  "TARGET_NEON"
5000  "vld1.<V_sz_elem>\t{%P0[]}, %A1"
5001  [(set_attr "type" "neon_load1_all_lanes<q>")]
5002)
5003
5004;; Special case for DImode.  Treat it exactly like a simple load.
5005(define_expand "neon_vld1_dupdi"
5006  [(set (match_operand:DI 0 "s_register_operand")
5007        (unspec:DI [(match_operand:DI 1 "neon_struct_operand")]
5008                       UNSPEC_VLD1))]
5009  "TARGET_NEON"
5010  ""
5011)
5012
5013(define_insn "neon_vld1_dup<mode>"
5014  [(set (match_operand:VQ2 0 "s_register_operand" "=w")
5015        (vec_duplicate:VQ2 (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
5016  "TARGET_NEON"
5017{
5018  return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5019}
5020  [(set_attr "type" "neon_load1_all_lanes<q>")]
5021)
5022
5023(define_insn_and_split "neon_vld1_dupv2di"
5024   [(set (match_operand:V2DI 0 "s_register_operand" "=w")
5025    (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
5026   "TARGET_NEON"
5027   "#"
5028   "&& reload_completed"
5029   [(const_int 0)]
5030   {
5031    rtx tmprtx = gen_lowpart (DImode, operands[0]);
5032    emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
5033    emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
5034    DONE;
5035    }
5036  [(set_attr "length" "8")
5037   (set_attr "type" "neon_load1_all_lanes_q")]
5038)
5039
5040(define_expand "vec_store_lanes<mode><mode>"
5041  [(set (match_operand:VDQX 0 "neon_struct_operand")
5042          (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
5043                         UNSPEC_VST1))]
5044  "TARGET_NEON")
5045
5046(define_insn "neon_vst1<mode>"
5047  [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
5048          (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
5049                         UNSPEC_VST1))]
5050  "TARGET_NEON"
5051  "vst1.<V_sz_elem>\t%h1, %A0"
5052  [(set_attr "type" "neon_store1_1reg<q>")])
5053
5054;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5055;; here on big endian targets.
5056(define_insn "neon_vst1_lane<mode>"
5057  [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5058          (unspec:<V_elem>
5059            [(match_operand:VDX 1 "s_register_operand" "w")
5060             (match_operand:SI 2 "immediate_operand" "i")]
5061            UNSPEC_VST1_LANE))]
5062  "TARGET_NEON"
5063{
5064  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5065  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5066  operands[2] = GEN_INT (lane);
5067  if (max == 1)
5068    return "vst1.<V_sz_elem>\t{%P1}, %A0";
5069  else
5070    return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5071}
5072  [(set_attr "type" "neon_store1_one_lane<q>")]
5073)
5074
5075;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5076;; here on big endian targets.
5077(define_insn "neon_vst1_lane<mode>"
5078  [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5079          (unspec:<V_elem>
5080            [(match_operand:VQX 1 "s_register_operand" "w")
5081             (match_operand:SI 2 "immediate_operand" "i")]
5082            UNSPEC_VST1_LANE))]
5083  "TARGET_NEON"
5084{
5085  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5086  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5087  int regno = REGNO (operands[1]);
5088  if (lane >= max / 2)
5089    {
5090      lane -= max / 2;
5091      regno += 2;
5092    }
5093  operands[2] = GEN_INT (lane);
5094  operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
5095  if (max == 2)
5096    return "vst1.<V_sz_elem>\t{%P1}, %A0";
5097  else
5098    return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5099}
5100  [(set_attr "type" "neon_store1_one_lane<q>")]
5101)
5102
5103(define_expand "vec_load_lanesti<mode>"
5104  [(set (match_operand:TI 0 "s_register_operand")
5105        (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
5106                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5107                       UNSPEC_VLD2))]
5108  "TARGET_NEON")
5109
5110(define_insn "neon_vld2<mode>"
5111  [(set (match_operand:TI 0 "s_register_operand" "=w")
5112        (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
5113                    (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5114                   UNSPEC_VLD2))]
5115  "TARGET_NEON"
5116{
5117  if (<V_sz_elem> == 64)
5118    return "vld1.64\t%h0, %A1";
5119  else
5120    return "vld2.<V_sz_elem>\t%h0, %A1";
5121}
5122  [(set (attr "type")
5123      (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5124                    (const_string "neon_load1_2reg<q>")
5125                    (const_string "neon_load2_2reg<q>")))]
5126)
5127
5128(define_insn "neon_vld2<mode>"
5129  [(set (match_operand:OI 0 "s_register_operand" "=w")
5130        (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5131                    (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5132                   UNSPEC_VLD2))]
5133  "TARGET_NEON"
5134  "vld2.<V_sz_elem>\t%h0, %A1"
5135  [(set_attr "type" "neon_load2_2reg_q")])
5136
5137;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5138;; here on big endian targets.
5139(define_insn "neon_vld2_lane<mode>"
5140  [(set (match_operand:TI 0 "s_register_operand" "=w")
5141        (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5142                    (match_operand:TI 2 "s_register_operand" "0")
5143                    (match_operand:SI 3 "immediate_operand" "i")
5144                    (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5145                   UNSPEC_VLD2_LANE))]
5146  "TARGET_NEON"
5147{
5148  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5149  int regno = REGNO (operands[0]);
5150  rtx ops[4];
5151  ops[0] = gen_rtx_REG (DImode, regno);
5152  ops[1] = gen_rtx_REG (DImode, regno + 2);
5153  ops[2] = operands[1];
5154  ops[3] = GEN_INT (lane);
5155  output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5156  return "";
5157}
5158  [(set_attr "type" "neon_load2_one_lane<q>")]
5159)
5160
5161;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5162;; here on big endian targets.
5163(define_insn "neon_vld2_lane<mode>"
5164  [(set (match_operand:OI 0 "s_register_operand" "=w")
5165        (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5166                    (match_operand:OI 2 "s_register_operand" "0")
5167                    (match_operand:SI 3 "immediate_operand" "i")
5168                    (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5169                   UNSPEC_VLD2_LANE))]
5170  "TARGET_NEON"
5171{
5172  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5173  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5174  int regno = REGNO (operands[0]);
5175  rtx ops[4];
5176  if (lane >= max / 2)
5177    {
5178      lane -= max / 2;
5179      regno += 2;
5180    }
5181  ops[0] = gen_rtx_REG (DImode, regno);
5182  ops[1] = gen_rtx_REG (DImode, regno + 4);
5183  ops[2] = operands[1];
5184  ops[3] = GEN_INT (lane);
5185  output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5186  return "";
5187}
5188  [(set_attr "type" "neon_load2_one_lane<q>")]
5189)
5190
5191(define_insn "neon_vld2_dup<mode>"
5192  [(set (match_operand:TI 0 "s_register_operand" "=w")
5193        (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5194                    (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5195                   UNSPEC_VLD2_DUP))]
5196  "TARGET_NEON"
5197{
5198  if (GET_MODE_NUNITS (<MODE>mode) > 1)
5199    return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5200  else
5201    return "vld1.<V_sz_elem>\t%h0, %A1";
5202}
5203  [(set (attr "type")
5204      (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5205                    (const_string "neon_load2_all_lanes<q>")
5206                    (const_string "neon_load1_1reg<q>")))]
5207)
5208
5209(define_insn "neon_vld2_dupv8bf"
5210  [(set (match_operand:OI 0 "s_register_operand" "=w")
5211        (unspec:OI [(match_operand:V2BF 1 "neon_struct_operand" "Um")
5212                    (unspec:V8BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5213                   UNSPEC_VLD2_DUP))]
5214  "TARGET_BF16_SIMD"
5215  {
5216    rtx ops[5];
5217    int tabbase = REGNO (operands[0]);
5218
5219    ops[4] = operands[1];
5220    ops[0] = gen_rtx_REG (V4BFmode, tabbase);
5221    ops[1] = gen_rtx_REG (V4BFmode, tabbase + 2);
5222    ops[2] = gen_rtx_REG (V4BFmode, tabbase + 4);
5223    ops[3] = gen_rtx_REG (V4BFmode, tabbase + 6);
5224    output_asm_insn ("vld2.16\t{%P0, %P1, %P2, %P3}, %A4", ops);
5225    return "";
5226  }
5227  [(set_attr "type" "neon_load2_all_lanes_q")]
5228)
5229
5230(define_expand "vec_store_lanesti<mode>"
5231  [(set (match_operand:TI 0 "neon_struct_operand")
5232          (unspec:TI [(match_operand:TI 1 "s_register_operand")
5233                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5234                   UNSPEC_VST2))]
5235  "TARGET_NEON")
5236
5237(define_insn "neon_vst2<mode>"
5238  [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
5239        (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
5240                    (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5241                   UNSPEC_VST2))]
5242  "TARGET_NEON"
5243{
5244  if (<V_sz_elem> == 64)
5245    return "vst1.64\t%h1, %A0";
5246  else
5247    return "vst2.<V_sz_elem>\t%h1, %A0";
5248}
5249  [(set (attr "type")
5250      (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5251                    (const_string "neon_store1_2reg<q>")
5252                    (const_string "neon_store2_one_lane<q>")))]
5253)
5254
5255(define_insn "neon_vst2<mode>"
5256  [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5257          (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5258                        (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5259                       UNSPEC_VST2))]
5260  "TARGET_NEON"
5261  "vst2.<V_sz_elem>\t%h1, %A0"
5262  [(set_attr "type" "neon_store2_4reg<q>")]
5263)
5264
5265;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5266;; here on big endian targets.
5267(define_insn "neon_vst2_lane<mode>"
5268  [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5269          (unspec:<V_two_elem>
5270            [(match_operand:TI 1 "s_register_operand" "w")
5271             (match_operand:SI 2 "immediate_operand" "i")
5272             (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5273            UNSPEC_VST2_LANE))]
5274  "TARGET_NEON"
5275{
5276  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5277  int regno = REGNO (operands[1]);
5278  rtx ops[4];
5279  ops[0] = operands[0];
5280  ops[1] = gen_rtx_REG (DImode, regno);
5281  ops[2] = gen_rtx_REG (DImode, regno + 2);
5282  ops[3] = GEN_INT (lane);
5283  output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5284  return "";
5285}
5286  [(set_attr "type" "neon_store2_one_lane<q>")]
5287)
5288
5289;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5290;; here on big endian targets.
5291(define_insn "neon_vst2_lane<mode>"
5292  [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5293        (unspec:<V_two_elem>
5294           [(match_operand:OI 1 "s_register_operand" "w")
5295            (match_operand:SI 2 "immediate_operand" "i")
5296            (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5297           UNSPEC_VST2_LANE))]
5298  "TARGET_NEON"
5299{
5300  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5301  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5302  int regno = REGNO (operands[1]);
5303  rtx ops[4];
5304  if (lane >= max / 2)
5305    {
5306      lane -= max / 2;
5307      regno += 2;
5308    }
5309  ops[0] = operands[0];
5310  ops[1] = gen_rtx_REG (DImode, regno);
5311  ops[2] = gen_rtx_REG (DImode, regno + 4);
5312  ops[3] = GEN_INT (lane);
5313  output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5314  return "";
5315}
5316  [(set_attr "type" "neon_store2_one_lane<q>")]
5317)
5318
5319(define_expand "vec_load_lanesei<mode>"
5320  [(set (match_operand:EI 0 "s_register_operand")
5321        (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
5322                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5323                       UNSPEC_VLD3))]
5324  "TARGET_NEON")
5325
5326(define_insn "neon_vld3<mode>"
5327  [(set (match_operand:EI 0 "s_register_operand" "=w")
5328        (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
5329                    (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5330                   UNSPEC_VLD3))]
5331  "TARGET_NEON"
5332{
5333  if (<V_sz_elem> == 64)
5334    return "vld1.64\t%h0, %A1";
5335  else
5336    return "vld3.<V_sz_elem>\t%h0, %A1";
5337}
5338  [(set (attr "type")
5339      (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5340                    (const_string "neon_load1_3reg<q>")
5341                    (const_string "neon_load3_3reg<q>")))]
5342)
5343
5344(define_expand "vec_load_lanesci<mode>"
5345  [(match_operand:CI 0 "s_register_operand")
5346   (match_operand:CI 1 "neon_struct_operand")
5347   (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5348  "TARGET_NEON"
5349{
5350  emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
5351  DONE;
5352})
5353
5354(define_expand "neon_vld3<mode>"
5355  [(match_operand:CI 0 "s_register_operand")
5356   (match_operand:CI 1 "neon_struct_operand")
5357   (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5358  "TARGET_NEON"
5359{
5360  rtx mem;
5361
5362  mem = adjust_address (operands[1], EImode, 0);
5363  emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
5364  mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5365  emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
5366  DONE;
5367})
5368
5369(define_insn "neon_vld3qa<mode>"
5370  [(set (match_operand:CI 0 "s_register_operand" "=w")
5371        (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5372                    (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5373                   UNSPEC_VLD3A))]
5374  "TARGET_NEON"
5375{
5376  int regno = REGNO (operands[0]);
5377  rtx ops[4];
5378  ops[0] = gen_rtx_REG (DImode, regno);
5379  ops[1] = gen_rtx_REG (DImode, regno + 4);
5380  ops[2] = gen_rtx_REG (DImode, regno + 8);
5381  ops[3] = operands[1];
5382  output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5383  return "";
5384}
5385  [(set_attr "type" "neon_load3_3reg<q>")]
5386)
5387
5388(define_insn "neon_vld3qb<mode>"
5389  [(set (match_operand:CI 0 "s_register_operand" "=w")
5390        (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5391                    (match_operand:CI 2 "s_register_operand" "0")
5392                    (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5393                   UNSPEC_VLD3B))]
5394  "TARGET_NEON"
5395{
5396  int regno = REGNO (operands[0]);
5397  rtx ops[4];
5398  ops[0] = gen_rtx_REG (DImode, regno + 2);
5399  ops[1] = gen_rtx_REG (DImode, regno + 6);
5400  ops[2] = gen_rtx_REG (DImode, regno + 10);
5401  ops[3] = operands[1];
5402  output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5403  return "";
5404}
5405  [(set_attr "type" "neon_load3_3reg<q>")]
5406)
5407
5408;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5409;; here on big endian targets.
5410(define_insn "neon_vld3_lane<mode>"
5411  [(set (match_operand:EI 0 "s_register_operand" "=w")
5412        (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5413                    (match_operand:EI 2 "s_register_operand" "0")
5414                    (match_operand:SI 3 "immediate_operand" "i")
5415                    (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5416                   UNSPEC_VLD3_LANE))]
5417  "TARGET_NEON"
5418{
5419  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]));
5420  int regno = REGNO (operands[0]);
5421  rtx ops[5];
5422  ops[0] = gen_rtx_REG (DImode, regno);
5423  ops[1] = gen_rtx_REG (DImode, regno + 2);
5424  ops[2] = gen_rtx_REG (DImode, regno + 4);
5425  ops[3] = operands[1];
5426  ops[4] = GEN_INT (lane);
5427  output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5428                   ops);
5429  return "";
5430}
5431  [(set_attr "type" "neon_load3_one_lane<q>")]
5432)
5433
5434;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5435;; here on big endian targets.
5436(define_insn "neon_vld3_lane<mode>"
5437  [(set (match_operand:CI 0 "s_register_operand" "=w")
5438        (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5439                    (match_operand:CI 2 "s_register_operand" "0")
5440                    (match_operand:SI 3 "immediate_operand" "i")
5441                    (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5442                   UNSPEC_VLD3_LANE))]
5443  "TARGET_NEON"
5444{
5445  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5446  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5447  int regno = REGNO (operands[0]);
5448  rtx ops[5];
5449  if (lane >= max / 2)
5450    {
5451      lane -= max / 2;
5452      regno += 2;
5453    }
5454  ops[0] = gen_rtx_REG (DImode, regno);
5455  ops[1] = gen_rtx_REG (DImode, regno + 4);
5456  ops[2] = gen_rtx_REG (DImode, regno + 8);
5457  ops[3] = operands[1];
5458  ops[4] = GEN_INT (lane);
5459  output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5460                   ops);
5461  return "";
5462}
5463  [(set_attr "type" "neon_load3_one_lane<q>")]
5464)
5465
5466(define_insn "neon_vld3_dup<mode>"
5467  [(set (match_operand:EI 0 "s_register_operand" "=w")
5468        (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5469                    (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5470                   UNSPEC_VLD3_DUP))]
5471  "TARGET_NEON"
5472{
5473  if (GET_MODE_NUNITS (<MODE>mode) > 1)
5474    {
5475      int regno = REGNO (operands[0]);
5476      rtx ops[4];
5477      ops[0] = gen_rtx_REG (DImode, regno);
5478      ops[1] = gen_rtx_REG (DImode, regno + 2);
5479      ops[2] = gen_rtx_REG (DImode, regno + 4);
5480      ops[3] = operands[1];
5481      output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
5482      return "";
5483    }
5484  else
5485    return "vld1.<V_sz_elem>\t%h0, %A1";
5486}
5487  [(set (attr "type")
5488      (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5489                    (const_string "neon_load3_all_lanes<q>")
5490                    (const_string "neon_load1_1reg<q>")))])
5491
5492(define_insn "neon_vld3_dupv8bf"
5493  [(set (match_operand:CI 0 "s_register_operand" "=w")
5494        (unspec:CI [(match_operand:V2BF 1 "neon_struct_operand" "Um")
5495                    (unspec:V8BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5496                   UNSPEC_VLD2_DUP))]
5497  "TARGET_BF16_SIMD"
5498  {
5499    rtx ops[4];
5500    int tabbase = REGNO (operands[0]);
5501
5502    ops[3] = operands[1];
5503    ops[0] = gen_rtx_REG (V4BFmode, tabbase);
5504    ops[1] = gen_rtx_REG (V4BFmode, tabbase + 2);
5505    ops[2] = gen_rtx_REG (V4BFmode, tabbase + 4);
5506    output_asm_insn ("vld3.16\t{%P0[], %P1[], %P2[]}, %A3", ops);
5507    return "";
5508  }
5509  [(set_attr "type" "neon_load3_all_lanes_q")]
5510)
5511
5512(define_expand "vec_store_lanesei<mode>"
5513  [(set (match_operand:EI 0 "neon_struct_operand")
5514          (unspec:EI [(match_operand:EI 1 "s_register_operand")
5515                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5516                   UNSPEC_VST3))]
5517  "TARGET_NEON")
5518
5519(define_insn "neon_vst3<mode>"
5520  [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5521        (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
5522                    (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5523                   UNSPEC_VST3))]
5524  "TARGET_NEON"
5525{
5526  if (<V_sz_elem> == 64)
5527    return "vst1.64\t%h1, %A0";
5528  else
5529    return "vst3.<V_sz_elem>\t%h1, %A0";
5530}
5531  [(set (attr "type")
5532      (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5533                    (const_string "neon_store1_3reg<q>")
5534                    (const_string "neon_store3_one_lane<q>")))])
5535
5536(define_expand "vec_store_lanesci<mode>"
5537  [(match_operand:CI 0 "neon_struct_operand")
5538   (match_operand:CI 1 "s_register_operand")
5539   (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5540  "TARGET_NEON"
5541{
5542  emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
5543  DONE;
5544})
5545
5546(define_expand "neon_vst3<mode>"
5547  [(match_operand:CI 0 "neon_struct_operand")
5548   (match_operand:CI 1 "s_register_operand")
5549   (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5550  "TARGET_NEON"
5551{
5552  rtx mem;
5553
5554  mem = adjust_address (operands[0], EImode, 0);
5555  emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
5556  mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5557  emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
5558  DONE;
5559})
5560
5561(define_insn "neon_vst3qa<mode>"
5562  [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5563        (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5564                    (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5565                   UNSPEC_VST3A))]
5566  "TARGET_NEON"
5567{
5568  int regno = REGNO (operands[1]);
5569  rtx ops[4];
5570  ops[0] = operands[0];
5571  ops[1] = gen_rtx_REG (DImode, regno);
5572  ops[2] = gen_rtx_REG (DImode, regno + 4);
5573  ops[3] = gen_rtx_REG (DImode, regno + 8);
5574  output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
5575  return "";
5576}
5577  [(set_attr "type" "neon_store3_3reg<q>")]
5578)
5579
5580(define_insn "neon_vst3qb<mode>"
5581  [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5582        (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5583                    (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5584                   UNSPEC_VST3B))]
5585  "TARGET_NEON"
5586{
5587  int regno = REGNO (operands[1]);
5588  rtx ops[4];
5589  ops[0] = operands[0];
5590  ops[1] = gen_rtx_REG (DImode, regno + 2);
5591  ops[2] = gen_rtx_REG (DImode, regno + 6);
5592  ops[3] = gen_rtx_REG (DImode, regno + 10);
5593  output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
5594  return "";
5595}
5596  [(set_attr "type" "neon_store3_3reg<q>")]
5597)
5598
5599;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5600;; here on big endian targets.
5601(define_insn "neon_vst3_lane<mode>"
5602  [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
5603        (unspec:<V_three_elem>
5604           [(match_operand:EI 1 "s_register_operand" "w")
5605            (match_operand:SI 2 "immediate_operand" "i")
5606            (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5607           UNSPEC_VST3_LANE))]
5608  "TARGET_NEON"
5609{
5610  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5611  int regno = REGNO (operands[1]);
5612  rtx ops[5];
5613  ops[0] = operands[0];
5614  ops[1] = gen_rtx_REG (DImode, regno);
5615  ops[2] = gen_rtx_REG (DImode, regno + 2);
5616  ops[3] = gen_rtx_REG (DImode, regno + 4);
5617  ops[4] = GEN_INT (lane);
5618  output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5619                   ops);
5620  return "";
5621}
5622  [(set_attr "type" "neon_store3_one_lane<q>")]
5623)
5624
5625;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5626;; here on big endian targets.
5627(define_insn "neon_vst3_lane<mode>"
5628  [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
5629        (unspec:<V_three_elem>
5630           [(match_operand:CI 1 "s_register_operand" "w")
5631            (match_operand:SI 2 "immediate_operand" "i")
5632            (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5633           UNSPEC_VST3_LANE))]
5634  "TARGET_NEON"
5635{
5636  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5637  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5638  int regno = REGNO (operands[1]);
5639  rtx ops[5];
5640  if (lane >= max / 2)
5641    {
5642      lane -= max / 2;
5643      regno += 2;
5644    }
5645  ops[0] = operands[0];
5646  ops[1] = gen_rtx_REG (DImode, regno);
5647  ops[2] = gen_rtx_REG (DImode, regno + 4);
5648  ops[3] = gen_rtx_REG (DImode, regno + 8);
5649  ops[4] = GEN_INT (lane);
5650  output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5651                   ops);
5652  return "";
5653}
5654  [(set_attr "type" "neon_store3_one_lane<q>")]
5655)
5656
5657(define_expand "vec_load_lanesoi<mode>"
5658  [(set (match_operand:OI 0 "s_register_operand")
5659        (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5660                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5661                       UNSPEC_VLD4))]
5662  "TARGET_NEON")
5663
5664(define_insn "neon_vld4<mode>"
5665  [(set (match_operand:OI 0 "s_register_operand" "=w")
5666        (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5667                    (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5668                   UNSPEC_VLD4))]
5669  "TARGET_NEON"
5670{
5671  if (<V_sz_elem> == 64)
5672    return "vld1.64\t%h0, %A1";
5673  else
5674    return "vld4.<V_sz_elem>\t%h0, %A1";
5675}
5676  [(set (attr "type")
5677      (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5678                    (const_string "neon_load1_4reg<q>")
5679                    (const_string "neon_load4_4reg<q>")))]
5680)
5681
5682(define_expand "neon_vld4<mode>"
5683  [(match_operand:XI 0 "s_register_operand")
5684   (match_operand:XI 1 "neon_struct_operand")
5685   (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5686  "TARGET_NEON"
5687{
5688  rtx mem;
5689
5690  mem = adjust_address (operands[1], OImode, 0);
5691  emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
5692  mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5693  emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
5694  DONE;
5695})
5696
5697(define_insn "neon_vld4qa<mode>"
5698  [(set (match_operand:XI 0 "s_register_operand" "=w")
5699        (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5700                    (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5701                   UNSPEC_VLD4A))]
5702  "TARGET_NEON"
5703{
5704  int regno = REGNO (operands[0]);
5705  rtx ops[5];
5706  ops[0] = gen_rtx_REG (DImode, regno);
5707  ops[1] = gen_rtx_REG (DImode, regno + 4);
5708  ops[2] = gen_rtx_REG (DImode, regno + 8);
5709  ops[3] = gen_rtx_REG (DImode, regno + 12);
5710  ops[4] = operands[1];
5711  output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5712  return "";
5713}
5714  [(set_attr "type" "neon_load4_4reg<q>")]
5715)
5716
5717(define_insn "neon_vld4qb<mode>"
5718  [(set (match_operand:XI 0 "s_register_operand" "=w")
5719        (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5720                    (match_operand:XI 2 "s_register_operand" "0")
5721                    (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5722                   UNSPEC_VLD4B))]
5723  "TARGET_NEON"
5724{
5725  int regno = REGNO (operands[0]);
5726  rtx ops[5];
5727  ops[0] = gen_rtx_REG (DImode, regno + 2);
5728  ops[1] = gen_rtx_REG (DImode, regno + 6);
5729  ops[2] = gen_rtx_REG (DImode, regno + 10);
5730  ops[3] = gen_rtx_REG (DImode, regno + 14);
5731  ops[4] = operands[1];
5732  output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5733  return "";
5734}
5735  [(set_attr "type" "neon_load4_4reg<q>")]
5736)
5737
5738;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5739;; here on big endian targets.
5740(define_insn "neon_vld4_lane<mode>"
5741  [(set (match_operand:OI 0 "s_register_operand" "=w")
5742        (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5743                    (match_operand:OI 2 "s_register_operand" "0")
5744                    (match_operand:SI 3 "immediate_operand" "i")
5745                    (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5746                   UNSPEC_VLD4_LANE))]
5747  "TARGET_NEON"
5748{
5749  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5750  int regno = REGNO (operands[0]);
5751  rtx ops[6];
5752  ops[0] = gen_rtx_REG (DImode, regno);
5753  ops[1] = gen_rtx_REG (DImode, regno + 2);
5754  ops[2] = gen_rtx_REG (DImode, regno + 4);
5755  ops[3] = gen_rtx_REG (DImode, regno + 6);
5756  ops[4] = operands[1];
5757  ops[5] = GEN_INT (lane);
5758  output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5759                   ops);
5760  return "";
5761}
5762  [(set_attr "type" "neon_load4_one_lane<q>")]
5763)
5764
5765;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5766;; here on big endian targets.
5767(define_insn "neon_vld4_lane<mode>"
5768  [(set (match_operand:XI 0 "s_register_operand" "=w")
5769        (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5770                    (match_operand:XI 2 "s_register_operand" "0")
5771                    (match_operand:SI 3 "immediate_operand" "i")
5772                    (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5773                   UNSPEC_VLD4_LANE))]
5774  "TARGET_NEON"
5775{
5776  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5777  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5778  int regno = REGNO (operands[0]);
5779  rtx ops[6];
5780  if (lane >= max / 2)
5781    {
5782      lane -= max / 2;
5783      regno += 2;
5784    }
5785  ops[0] = gen_rtx_REG (DImode, regno);
5786  ops[1] = gen_rtx_REG (DImode, regno + 4);
5787  ops[2] = gen_rtx_REG (DImode, regno + 8);
5788  ops[3] = gen_rtx_REG (DImode, regno + 12);
5789  ops[4] = operands[1];
5790  ops[5] = GEN_INT (lane);
5791  output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5792                   ops);
5793  return "";
5794}
5795  [(set_attr "type" "neon_load4_one_lane<q>")]
5796)
5797
5798(define_insn "neon_vld4_dup<mode>"
5799  [(set (match_operand:OI 0 "s_register_operand" "=w")
5800        (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5801                    (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5802                   UNSPEC_VLD4_DUP))]
5803  "TARGET_NEON"
5804{
5805  if (GET_MODE_NUNITS (<MODE>mode) > 1)
5806    {
5807      int regno = REGNO (operands[0]);
5808      rtx ops[5];
5809      ops[0] = gen_rtx_REG (DImode, regno);
5810      ops[1] = gen_rtx_REG (DImode, regno + 2);
5811      ops[2] = gen_rtx_REG (DImode, regno + 4);
5812      ops[3] = gen_rtx_REG (DImode, regno + 6);
5813      ops[4] = operands[1];
5814      output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
5815                       ops);
5816      return "";
5817    }
5818  else
5819    return "vld1.<V_sz_elem>\t%h0, %A1";
5820}
5821  [(set (attr "type")
5822      (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5823                    (const_string "neon_load4_all_lanes<q>")
5824                    (const_string "neon_load1_1reg<q>")))]
5825)
5826
5827(define_insn "neon_vld4_dupv8bf"
5828  [(set (match_operand:XI 0 "s_register_operand" "=w")
5829        (unspec:XI [(match_operand:V2BF 1 "neon_struct_operand" "Um")
5830                    (unspec:V8BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5831                   UNSPEC_VLD2_DUP))]
5832  "TARGET_BF16_SIMD"
5833  {
5834    rtx ops[5];
5835    int tabbase = REGNO (operands[0]);
5836
5837    ops[4] = operands[1];
5838    ops[0] = gen_rtx_REG (V4BFmode, tabbase);
5839    ops[1] = gen_rtx_REG (V4BFmode, tabbase + 2);
5840    ops[2] = gen_rtx_REG (V4BFmode, tabbase + 4);
5841    ops[3] = gen_rtx_REG (V4BFmode, tabbase + 6);
5842    output_asm_insn ("vld4.16\t{%P0[], %P1[], %P2[], %P3[]}, %A4", ops);
5843    return "";
5844  }
5845  [(set_attr "type" "neon_load4_all_lanes_q")]
5846)
5847
5848(define_expand "vec_store_lanesoi<mode>"
5849  [(set (match_operand:OI 0 "neon_struct_operand")
5850          (unspec:OI [(match_operand:OI 1 "s_register_operand")
5851                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5852                   UNSPEC_VST4))]
5853  "TARGET_NEON")
5854
5855(define_insn "neon_vst4<mode>"
5856  [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5857        (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5858                    (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5859                   UNSPEC_VST4))]
5860  "TARGET_NEON"
5861{
5862  if (<V_sz_elem> == 64)
5863    return "vst1.64\t%h1, %A0";
5864  else
5865    return "vst4.<V_sz_elem>\t%h1, %A0";
5866}
5867  [(set (attr "type")
5868      (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5869                    (const_string "neon_store1_4reg<q>")
5870                    (const_string "neon_store4_4reg<q>")))]
5871)
5872
5873(define_expand "neon_vst4<mode>"
5874  [(match_operand:XI 0 "neon_struct_operand")
5875   (match_operand:XI 1 "s_register_operand")
5876   (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5877  "TARGET_NEON"
5878{
5879  rtx mem;
5880
5881  mem = adjust_address (operands[0], OImode, 0);
5882  emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
5883  mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5884  emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
5885  DONE;
5886})
5887
5888(define_insn "neon_vst4qa<mode>"
5889  [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5890        (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5891                    (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5892                   UNSPEC_VST4A))]
5893  "TARGET_NEON"
5894{
5895  int regno = REGNO (operands[1]);
5896  rtx ops[5];
5897  ops[0] = operands[0];
5898  ops[1] = gen_rtx_REG (DImode, regno);
5899  ops[2] = gen_rtx_REG (DImode, regno + 4);
5900  ops[3] = gen_rtx_REG (DImode, regno + 8);
5901  ops[4] = gen_rtx_REG (DImode, regno + 12);
5902  output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5903  return "";
5904}
5905  [(set_attr "type" "neon_store4_4reg<q>")]
5906)
5907
5908(define_insn "neon_vst4qb<mode>"
5909  [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5910        (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5911                    (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5912                   UNSPEC_VST4B))]
5913  "TARGET_NEON"
5914{
5915  int regno = REGNO (operands[1]);
5916  rtx ops[5];
5917  ops[0] = operands[0];
5918  ops[1] = gen_rtx_REG (DImode, regno + 2);
5919  ops[2] = gen_rtx_REG (DImode, regno + 6);
5920  ops[3] = gen_rtx_REG (DImode, regno + 10);
5921  ops[4] = gen_rtx_REG (DImode, regno + 14);
5922  output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5923  return "";
5924}
5925  [(set_attr "type" "neon_store4_4reg<q>")]
5926)
5927
5928;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5929;; here on big endian targets.
5930(define_insn "neon_vst4_lane<mode>"
5931  [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5932        (unspec:<V_four_elem>
5933           [(match_operand:OI 1 "s_register_operand" "w")
5934            (match_operand:SI 2 "immediate_operand" "i")
5935            (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5936           UNSPEC_VST4_LANE))]
5937  "TARGET_NEON"
5938{
5939  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5940  int regno = REGNO (operands[1]);
5941  rtx ops[6];
5942  ops[0] = operands[0];
5943  ops[1] = gen_rtx_REG (DImode, regno);
5944  ops[2] = gen_rtx_REG (DImode, regno + 2);
5945  ops[3] = gen_rtx_REG (DImode, regno + 4);
5946  ops[4] = gen_rtx_REG (DImode, regno + 6);
5947  ops[5] = GEN_INT (lane);
5948  output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5949                   ops);
5950  return "";
5951}
5952  [(set_attr "type" "neon_store4_one_lane<q>")]
5953)
5954
5955;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5956;; here on big endian targets.
5957(define_insn "neon_vst4_lane<mode>"
5958  [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5959        (unspec:<V_four_elem>
5960           [(match_operand:XI 1 "s_register_operand" "w")
5961            (match_operand:SI 2 "immediate_operand" "i")
5962            (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5963           UNSPEC_VST4_LANE))]
5964  "TARGET_NEON"
5965{
5966  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5967  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5968  int regno = REGNO (operands[1]);
5969  rtx ops[6];
5970  if (lane >= max / 2)
5971    {
5972      lane -= max / 2;
5973      regno += 2;
5974    }
5975  ops[0] = operands[0];
5976  ops[1] = gen_rtx_REG (DImode, regno);
5977  ops[2] = gen_rtx_REG (DImode, regno + 4);
5978  ops[3] = gen_rtx_REG (DImode, regno + 8);
5979  ops[4] = gen_rtx_REG (DImode, regno + 12);
5980  ops[5] = GEN_INT (lane);
5981  output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5982                   ops);
5983  return "";
5984}
5985  [(set_attr "type" "neon_store4_4reg<q>")]
5986)
5987
5988(define_insn "neon_vec_unpack<US>_lo_<mode>"
5989  [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5990        (SE:<V_unpack> (vec_select:<V_HALF>
5991                                (match_operand:VU 1 "register_operand" "w")
5992                                (match_operand:VU 2 "vect_par_constant_low" ""))))]
5993  "TARGET_NEON && !BYTES_BIG_ENDIAN"
5994  "vmovl.<US><V_sz_elem> %q0, %e1"
5995  [(set_attr "type" "neon_shift_imm_long")]
5996)
5997
5998(define_insn "neon_vec_unpack<US>_hi_<mode>"
5999  [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6000        (SE:<V_unpack> (vec_select:<V_HALF>
6001                                (match_operand:VU 1 "register_operand" "w")
6002                                (match_operand:VU 2 "vect_par_constant_high" ""))))]
6003  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6004  "vmovl.<US><V_sz_elem> %q0, %f1"
6005  [(set_attr "type" "neon_shift_imm_long")]
6006)
6007
6008(define_expand "vec_unpack<US>_hi_<mode>"
6009  [(match_operand:<V_unpack> 0 "register_operand")
6010   (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
6011 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6012  {
6013   rtvec v = rtvec_alloc (<V_mode_nunits>/2)  ;
6014   rtx t1;
6015   int i;
6016   for (i = 0; i < (<V_mode_nunits>/2); i++)
6017     RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
6018
6019   t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6020   emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
6021                                                 operands[1],
6022                                                           t1));
6023   DONE;
6024  }
6025)
6026
6027(define_expand "vec_unpack<US>_lo_<mode>"
6028  [(match_operand:<V_unpack> 0 "register_operand")
6029   (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
6030 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6031  {
6032   rtvec v = rtvec_alloc (<V_mode_nunits>/2)  ;
6033   rtx t1;
6034   int i;
6035   for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6036     RTVEC_ELT (v, i) = GEN_INT (i);
6037   t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6038   emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
6039                                                 operands[1],
6040                                                           t1));
6041   DONE;
6042  }
6043)
6044
6045(define_insn "neon_vec_<US>mult_lo_<mode>"
6046 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6047       (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6048                                 (match_operand:VU 1 "register_operand" "w")
6049                           (match_operand:VU 2 "vect_par_constant_low" "")))
6050                            (SE:<V_unpack> (vec_select:<V_HALF>
6051                           (match_operand:VU 3 "register_operand" "w")
6052                           (match_dup 2)))))]
6053  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6054  "vmull.<US><V_sz_elem> %q0, %e1, %e3"
6055  [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6056)
6057
6058(define_expand "vec_widen_<US>mult_lo_<mode>"
6059  [(match_operand:<V_unpack> 0 "register_operand")
6060   (SE:<V_unpack> (match_operand:VU 1 "register_operand"))
6061   (SE:<V_unpack> (match_operand:VU 2 "register_operand"))]
6062 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6063 {
6064   rtvec v = rtvec_alloc (<V_mode_nunits>/2)  ;
6065   rtx t1;
6066   int i;
6067   for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6068     RTVEC_ELT (v, i) = GEN_INT (i);
6069   t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6070
6071   emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
6072                                                         operands[1],
6073                                                         t1,
6074                                                         operands[2]));
6075   DONE;
6076 }
6077)
6078
6079(define_insn "neon_vec_<US>mult_hi_<mode>"
6080 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6081      (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6082                                  (match_operand:VU 1 "register_operand" "w")
6083                                  (match_operand:VU 2 "vect_par_constant_high" "")))
6084                           (SE:<V_unpack> (vec_select:<V_HALF>
6085                                  (match_operand:VU 3 "register_operand" "w")
6086                                  (match_dup 2)))))]
6087  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6088  "vmull.<US><V_sz_elem> %q0, %f1, %f3"
6089  [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6090)
6091
6092(define_expand "vec_widen_<US>mult_hi_<mode>"
6093  [(match_operand:<V_unpack> 0 "register_operand")
6094   (SE:<V_unpack> (match_operand:VU 1 "register_operand"))
6095   (SE:<V_unpack> (match_operand:VU 2 "register_operand"))]
6096 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6097 {
6098   rtvec v = rtvec_alloc (<V_mode_nunits>/2)  ;
6099   rtx t1;
6100   int i;
6101   for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6102     RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
6103   t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6104
6105   emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
6106                                                         operands[1],
6107                                                         t1,
6108                                                         operands[2]));
6109   DONE;
6110
6111 }
6112)
6113
6114(define_insn "neon_vec_<US>shiftl_<mode>"
6115 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6116       (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
6117       (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
6118  "TARGET_NEON"
6119{
6120  return "vshll.<US><V_sz_elem> %q0, %P1, %2";
6121}
6122  [(set_attr "type" "neon_shift_imm_long")]
6123)
6124
6125(define_expand "vec_widen_<US>shiftl_lo_<mode>"
6126  [(match_operand:<V_unpack> 0 "register_operand")
6127   (SE:<V_unpack> (match_operand:VU 1 "register_operand"))
6128   (match_operand:SI 2 "immediate_operand")]
6129 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6130 {
6131  emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6132                    simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
6133                    operands[2]));
6134   DONE;
6135 }
6136)
6137
6138(define_expand "vec_widen_<US>shiftl_hi_<mode>"
6139  [(match_operand:<V_unpack> 0 "register_operand")
6140   (SE:<V_unpack> (match_operand:VU 1 "register_operand"))
6141   (match_operand:SI 2 "immediate_operand")]
6142 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6143 {
6144  emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6145                simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
6146                                             GET_MODE_SIZE (<V_HALF>mode)),
6147                operands[2]));
6148   DONE;
6149 }
6150)
6151
6152;; Vectorize for non-neon-quad case
6153(define_insn "neon_unpack<US>_<mode>"
6154 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6155       (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
6156 "TARGET_NEON"
6157 "vmovl.<US><V_sz_elem> %q0, %P1"
6158  [(set_attr "type" "neon_move")]
6159)
6160
6161(define_expand "vec_unpack<US>_lo_<mode>"
6162 [(match_operand:<V_double_width> 0 "register_operand")
6163  (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6164 "TARGET_NEON"
6165{
6166  rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6167  emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6168  emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6169
6170  DONE;
6171}
6172)
6173
6174(define_expand "vec_unpack<US>_hi_<mode>"
6175 [(match_operand:<V_double_width> 0 "register_operand")
6176  (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6177 "TARGET_NEON"
6178{
6179  rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6180  emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6181  emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6182
6183  DONE;
6184}
6185)
6186
6187(define_insn "neon_vec_<US>mult_<mode>"
6188 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6189       (mult:<V_widen> (SE:<V_widen>
6190                                 (match_operand:VDI 1 "register_operand" "w"))
6191                           (SE:<V_widen>
6192                                 (match_operand:VDI 2 "register_operand" "w"))))]
6193  "TARGET_NEON"
6194  "vmull.<US><V_sz_elem> %q0, %P1, %P2"
6195  [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6196)
6197
6198(define_expand "vec_widen_<US>mult_hi_<mode>"
6199  [(match_operand:<V_double_width> 0 "register_operand")
6200   (SE:<V_double_width> (match_operand:VDI 1 "register_operand"))
6201   (SE:<V_double_width> (match_operand:VDI 2 "register_operand"))]
6202 "TARGET_NEON"
6203 {
6204   rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6205   emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6206   emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6207
6208   DONE;
6209
6210 }
6211)
6212
6213(define_expand "vec_widen_<US>mult_lo_<mode>"
6214  [(match_operand:<V_double_width> 0 "register_operand")
6215   (SE:<V_double_width> (match_operand:VDI 1 "register_operand"))
6216   (SE:<V_double_width> (match_operand:VDI 2 "register_operand"))]
6217 "TARGET_NEON"
6218 {
6219   rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6220   emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6221   emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6222
6223   DONE;
6224
6225 }
6226)
6227
6228(define_expand "vec_widen_<US>shiftl_hi_<mode>"
6229 [(match_operand:<V_double_width> 0 "register_operand")
6230   (SE:<V_double_width> (match_operand:VDI 1 "register_operand"))
6231   (match_operand:SI 2 "immediate_operand")]
6232 "TARGET_NEON"
6233 {
6234   rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6235   emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6236   emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6237
6238   DONE;
6239 }
6240)
6241
6242(define_expand "vec_widen_<US>shiftl_lo_<mode>"
6243  [(match_operand:<V_double_width> 0 "register_operand")
6244   (SE:<V_double_width> (match_operand:VDI 1 "register_operand"))
6245   (match_operand:SI 2 "immediate_operand")]
6246 "TARGET_NEON"
6247 {
6248   rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6249   emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6250   emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6251
6252   DONE;
6253 }
6254)
6255
6256; FIXME: These instruction patterns can't be used safely in big-endian mode
6257; because the ordering of vector elements in Q registers is different from what
6258; the semantics of the instructions require.
6259
6260(define_insn "vec_pack_trunc_<mode>"
6261 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
6262       (vec_concat:<V_narrow_pack>
6263                    (truncate:<V_narrow>
6264                              (match_operand:VN 1 "register_operand" "w"))
6265                    (truncate:<V_narrow>
6266                              (match_operand:VN 2 "register_operand" "w"))))]
6267 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6268 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
6269 [(set_attr "type" "multiple")
6270  (set_attr "length" "8")]
6271)
6272
6273;; For the non-quad case.
6274(define_insn "neon_vec_pack_trunc_<mode>"
6275 [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
6276       (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
6277 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6278 "vmovn.i<V_sz_elem>\t%P0, %q1"
6279 [(set_attr "type" "neon_move_narrow_q")]
6280)
6281
6282(define_expand "vec_pack_trunc_<mode>"
6283 [(match_operand:<V_narrow_pack> 0 "register_operand")
6284  (match_operand:VSHFT 1 "register_operand")
6285  (match_operand:VSHFT 2 "register_operand")]
6286 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6287{
6288  rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
6289
6290  emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
6291  emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
6292  emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
6293  DONE;
6294})
6295
6296(define_insn "neon_vabd<mode>_2"
6297 [(set (match_operand:VF 0 "s_register_operand" "=w")
6298       (abs:VF (minus:VF (match_operand:VF 1 "s_register_operand" "w")
6299                               (match_operand:VF 2 "s_register_operand" "w"))))]
6300 "ARM_HAVE_NEON_<MODE>_ARITH"
6301 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6302 [(set_attr "type" "neon_fp_abd_s<q>")]
6303)
6304
6305(define_insn "neon_vabd<mode>_3"
6306 [(set (match_operand:VF 0 "s_register_operand" "=w")
6307       (abs:VF (unspec:VF [(match_operand:VF 1 "s_register_operand" "w")
6308                                  (match_operand:VF 2 "s_register_operand" "w")]
6309                    UNSPEC_VSUB)))]
6310 "ARM_HAVE_NEON_<MODE>_ARITH"
6311 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6312 [(set_attr "type" "neon_fp_abd_s<q>")]
6313)
6314
6315(define_insn "neon_<sup>mmlav16qi"
6316  [(set (match_operand:V4SI 0 "register_operand" "=w")
6317          (plus:V4SI
6318           (unspec:V4SI [(match_operand:V16QI 2 "register_operand" "w")
6319                           (match_operand:V16QI 3 "register_operand" "w")] MATMUL)
6320           (match_operand:V4SI 1 "register_operand" "0")))]
6321  "TARGET_I8MM"
6322  "v<sup>mmla.<mmla_sfx>\t%q0, %q2, %q3"
6323  [(set_attr "type" "neon_mla_s_q")]
6324)
6325
6326(define_insn "neon_vbfdot<VCVTF:mode>"
6327  [(set (match_operand:VCVTF 0 "register_operand" "=w")
6328          (plus:VCVTF (match_operand:VCVTF 1 "register_operand" "0")
6329                        (unspec:VCVTF [
6330                                  (match_operand:<VSF2BF> 2 "register_operand" "w")
6331                                  (match_operand:<VSF2BF> 3 "register_operand" "w")]
6332                         UNSPEC_DOT_S)))]
6333  "TARGET_BF16_SIMD"
6334  "vdot.bf16\\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
6335  [(set_attr "type" "neon_dot<q>")]
6336)
6337
6338(define_insn "neon_vbfdot_lanev4bf<VCVTF:mode>"
6339  [(set (match_operand:VCVTF 0 "register_operand" "=w")
6340          (plus:VCVTF (match_operand:VCVTF 1 "register_operand" "0")
6341                        (unspec:VCVTF [
6342                                  (match_operand:<VSF2BF> 2 "register_operand" "w")
6343                                  (match_operand:V4BF 3 "register_operand" "x")
6344                                  (match_operand:SI 4 "immediate_operand" "i")]
6345                         UNSPEC_DOT_S)))]
6346  "TARGET_BF16_SIMD"
6347  "vdot.bf16\\t%<V_reg>0, %<V_reg>2, %P3[%c4]"
6348  [(set_attr "type" "neon_dot<q>")]
6349)
6350
6351(define_insn "neon_vbfdot_lanev8bf<VCVTF:mode>"
6352  [(set (match_operand:VCVTF 0 "register_operand" "=w")
6353          (plus:VCVTF (match_operand:VCVTF 1 "register_operand" "0")
6354                        (unspec:VCVTF [
6355                                  (match_operand:<VSF2BF> 2 "register_operand" "w")
6356                                  (match_operand:V8BF 3 "register_operand" "x")
6357                                  (match_operand:SI 4 "immediate_operand" "i")]
6358                         UNSPEC_DOT_S)))]
6359  "TARGET_BF16_SIMD"
6360  {
6361    int lane = INTVAL (operands[4]);
6362    int half = GET_MODE_NUNITS (GET_MODE (operands[3])) / 4;
6363    if (lane < half)
6364      return "vdot.bf16\\t%<V_reg>0, %<V_reg>2, %e3[%c4]";
6365    else
6366      {
6367          operands[4] = GEN_INT (lane - half);
6368          return "vdot.bf16\\t%<V_reg>0, %<V_reg>2, %f3[%c4]";
6369      }
6370  }
6371  [(set_attr "type" "neon_dot<q>")]
6372)
6373
6374(define_insn "neon_vbfcvtv4sf<VBFCVT:mode>"
6375  [(set (match_operand:VBFCVT 0 "register_operand" "=w")
6376       (unspec:VBFCVT [(match_operand:V4SF 1 "register_operand" "w")]
6377          UNSPEC_BFCVT))]
6378  "TARGET_BF16_SIMD"
6379  "vcvt.bf16.f32\\t%<V_bf_low>0, %q1"
6380  [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
6381)
6382
6383(define_insn "neon_vbfcvtv4sf_highv8bf"
6384  [(set (match_operand:V8BF 0 "register_operand" "=w")
6385       (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "0")
6386                         (match_operand:V4SF 2 "register_operand" "w")]
6387          UNSPEC_BFCVT_HIGH))]
6388  "TARGET_BF16_SIMD"
6389  "vcvt.bf16.f32\\t%f0, %q2"
6390  [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
6391)
6392
6393(define_insn "neon_vbfcvtsf"
6394  [(set (match_operand:BF 0 "register_operand" "=t")
6395       (unspec:BF [(match_operand:SF 1 "register_operand" "t")]
6396          UNSPEC_BFCVT))]
6397  "TARGET_BF16_FP"
6398  "vcvtb.bf16.f32\\t%0, %1"
6399  [(set_attr "type" "f_cvt")]
6400)
6401
6402(define_insn "neon_vbfcvt<VBFCVT:mode>"
6403  [(set (match_operand:V4SF 0 "register_operand" "=w")
6404       (unspec:V4SF [(match_operand:VBFCVT 1 "register_operand" "w")]
6405          UNSPEC_BFCVT))]
6406  "TARGET_BF16_SIMD"
6407  "vshll.u32\\t%q0, %<V_bf_low>1, #16"
6408  [(set_attr "type" "neon_shift_imm_q")]
6409)
6410
6411(define_insn "neon_vbfcvt_highv8bf"
6412  [(set (match_operand:V4SF 0 "register_operand" "=w")
6413       (unspec:V4SF [(match_operand:V8BF 1 "register_operand" "w")]
6414          UNSPEC_BFCVT_HIGH))]
6415  "TARGET_BF16_SIMD"
6416  "vshll.u32\\t%q0, %f1, #16"
6417  [(set_attr "type" "neon_shift_imm_q")]
6418)
6419
6420;; Convert a BF scalar operand to SF via VSHL.
6421;; VSHL doesn't accept 32-bit registers where the BF and SF scalar operands
6422;; would be allocated, therefore the operands must be converted to intermediate
6423;; vectors (i.e. V2SI) in order to apply 64-bit registers.
6424(define_expand "neon_vbfcvtbf"
6425  [(match_operand:SF 0 "register_operand")
6426   (unspec:SF [(match_operand:BF 1 "register_operand")] UNSPEC_BFCVT)]
6427  "TARGET_BF16_FP"
6428{
6429  rtx op0 = gen_reg_rtx (V2SImode);
6430  rtx op1 = gen_reg_rtx (V2SImode);
6431  emit_insn (gen_neon_vbfcvtbf_cvtmodev2si (op1, operands[1]));
6432  emit_insn (gen_neon_vshl_nv2si (op0, op1, gen_int_mode(16, SImode)));
6433  emit_insn (gen_neon_vbfcvtbf_cvtmodesf (operands[0], op0));
6434  DONE;
6435})
6436
6437;; Convert BF mode to V2SI and V2SI to SF.
6438;; Implement this by allocating a 32-bit operand in the low half of a 64-bit
6439;; register indexed by a 32-bit sub-register number.
6440;; This will generate reloads but compiler can optimize out the moves.
6441;; Use 'x' constraint to guarantee the 32-bit sub-registers in an indexable
6442;; range so that to avoid extra moves.
6443(define_insn "neon_vbfcvtbf_cvtmode<mode>"
6444  [(set (match_operand:VBFCVTM 0 "register_operand" "=x")
6445       (unspec:VBFCVTM [(match_operand:<V_bf_cvt_m> 1 "register_operand" "0")]
6446          UNSPEC_BFCVT))]
6447  "TARGET_BF16_FP"
6448  ""
6449)
6450
6451(define_insn "neon_vmmlav8bf"
6452  [(set (match_operand:V4SF 0 "register_operand" "=w")
6453        (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
6454                   (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
6455                                 (match_operand:V8BF 3 "register_operand" "w")]
6456                    UNSPEC_BFMMLA)))]
6457  "TARGET_BF16_SIMD"
6458  "vmmla.bf16\\t%q0, %q2, %q3"
6459  [(set_attr "type" "neon_fp_mla_s_q")]
6460)
6461
6462(define_insn "neon_vfma<bt>v8bf"
6463  [(set (match_operand:V4SF 0 "register_operand" "=w")
6464        (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
6465                    (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
6466                                  (match_operand:V8BF 3 "register_operand" "w")]
6467                     BF_MA)))]
6468  "TARGET_BF16_SIMD"
6469  "vfma<bt>.bf16\\t%q0, %q2, %q3"
6470  [(set_attr "type" "neon_fp_mla_s_q")]
6471)
6472
6473(define_insn "neon_vfma<bt>_lanev8bf"
6474  [(set (match_operand:V4SF 0 "register_operand" "=w")
6475        (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
6476                    (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
6477                                  (match_operand:V4BF 3 "register_operand" "x")
6478                                  (match_operand:SI 4 "const_int_operand" "n")]
6479                     BF_MA)))]
6480  "TARGET_BF16_SIMD"
6481  "vfma<bt>.bf16\\t%q0, %q2, %P3[%c4]"
6482  [(set_attr "type" "neon_fp_mla_s_scalar_q")]
6483)
6484
6485(define_expand "neon_vfma<bt>_laneqv8bf"
6486  [(set (match_operand:V4SF 0 "register_operand" "=w")
6487        (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
6488                    (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
6489                                  (match_operand:V8BF 3 "register_operand" "x")
6490                                  (match_operand:SI 4 "const_int_operand" "n")]
6491                     BF_MA)))]
6492  "TARGET_BF16_SIMD"
6493  {
6494    int lane = INTVAL (operands[4]);
6495    gcc_assert (IN_RANGE(lane, 0, 7));
6496    if (lane < 4)
6497    {
6498          emit_insn (gen_neon_vfma<bt>_lanev8bf (operands[0], operands[1], operands[2], operands[3], operands[4]));
6499    }
6500    else
6501      {
6502          rtx op_highpart = gen_reg_rtx (V4BFmode);
6503          emit_insn (gen_neon_vget_highv8bf (op_highpart, operands[3]));
6504          operands[4] = GEN_INT (lane - 4);
6505          emit_insn (gen_neon_vfma<bt>_lanev8bf (operands[0], operands[1], operands[2], op_highpart, operands[4]));
6506      }
6507    DONE;
6508  }
6509  [(set_attr "type" "neon_fp_mla_s_scalar_q")]
6510)
6511