1;; ARM NEON coprocessor Machine Description 2;; Copyright (C) 2006-2022 Free Software Foundation, Inc. 3;; Written by CodeSourcery. 4;; 5;; This file is part of GCC. 6;; 7;; GCC is free software; you can redistribute it and/or modify it 8;; under the terms of the GNU General Public License as published by 9;; the Free Software Foundation; either version 3, or (at your option) 10;; any later version. 11;; 12;; GCC is distributed in the hope that it will be useful, but 13;; WITHOUT ANY WARRANTY; without even the implied warranty of 14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15;; General Public License for more details. 16;; 17;; You should have received a copy of the GNU General Public License 18;; along with GCC; see the file COPYING3. If not see 19;; <http://www.gnu.org/licenses/>. 20 21 22;; Attribute used to permit string comparisons against <VQH_mnem> in 23;; type attribute definitions. 24(define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd")) 25 26(define_insn "unaligned_storev8qi" 27 [(set (match_operand:V8QI 0 "memory_operand" "=Un") 28 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")] 29 UNSPEC_UNALIGNED_STORE))] 30 "TARGET_NEON" 31 "* 32 return output_move_neon (operands); 33 " 34 [(set_attr "type" "neon_store1_1reg")]) 35 36(define_insn "*neon_mov<mode>" 37 [(set (match_operand:VDXMOV 0 "nonimmediate_operand" 38 "=w,Un,w, w, w, ?r,?w,?r, ?Us,*r") 39 (match_operand:VDXMOV 1 "general_operand" 40 " w,w, Dm,Dn,Uni, w, r, Usi,r,*r"))] 41 "TARGET_NEON 42 && (register_operand (operands[0], <MODE>mode) 43 || register_operand (operands[1], <MODE>mode))" 44{ 45 if (which_alternative == 2 || which_alternative == 3) 46 { 47 int width, is_valid; 48 static char templ[40]; 49 50 is_valid = simd_immediate_valid_for_move (operands[1], <MODE>mode, 51 &operands[1], &width); 52 53 gcc_assert (is_valid != 0); 54 55 if (width == 0) 56 return "vmov.f32\t%P0, %1 @ <mode>"; 57 else 58 sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width); 59 60 return templ; 61 } 62 63 switch (which_alternative) 64 { 65 case 0: return "vmov\t%P0, %P1 @ <mode>"; 66 case 1: case 4: return output_move_neon (operands); 67 case 2: case 3: gcc_unreachable (); 68 case 5: return "vmov\t%Q0, %R0, %P1 @ <mode>"; 69 case 6: return "vmov\t%P0, %Q1, %R1 @ <mode>"; 70 case 9: return "#"; 71 default: return output_move_double (operands, true, NULL); 72 } 73} 74 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\ 75 neon_move<q>,neon_load1_1reg, neon_to_gp<q>,\ 76 neon_from_gp<q>,neon_load1_2reg, neon_store1_2reg,\ 77 multiple") 78 (set_attr "length" "4,4,4,4,4,4,4,8,8,8") 79 (set_attr "arm_pool_range" "*,*,*,*,1020,*,*,1020,*,*") 80 (set_attr "thumb2_pool_range" "*,*,*,*,1018,*,*,1018,*,*") 81 (set_attr "neg_pool_range" "*,*,*,*,1004,*,*,1004,*,*")]) 82 83(define_insn "*neon_mov<mode>" 84 [(set (match_operand:VQXMOV 0 "nonimmediate_operand" 85 "=w,Un,w, w, w, ?r,?w,?r,?r, ?Us") 86 (match_operand:VQXMOV 1 "general_operand" 87 " w,w, Dm,DN,Uni, w, r, r, Usi, r"))] 88 "TARGET_NEON 89 && (register_operand (operands[0], <MODE>mode) 90 || register_operand (operands[1], <MODE>mode))" 91{ 92 if (which_alternative == 2 || which_alternative == 3) 93 { 94 int width, is_valid; 95 static char templ[40]; 96 97 is_valid = simd_immediate_valid_for_move (operands[1], <MODE>mode, 98 &operands[1], &width); 99 100 gcc_assert (is_valid != 0); 101 102 if (width == 0) 103 return "vmov.f32\t%q0, %1 @ <mode>"; 104 else 105 sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width); 106 107 return templ; 108 } 109 110 switch (which_alternative) 111 { 112 case 0: return "vmov\t%q0, %q1 @ <mode>"; 113 case 1: case 4: return output_move_neon (operands); 114 case 2: case 3: gcc_unreachable (); 115 case 5: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1"; 116 case 6: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1"; 117 default: return output_move_quad (operands); 118 } 119} 120 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\ 121 neon_move_q,neon_load2_2reg_q,neon_to_gp_q,\ 122 neon_from_gp_q,mov_reg,neon_load1_4reg,neon_store1_4reg") 123 (set_attr "length" "4,8,4,4,8,8,8,16,8,16") 124 (set_attr "arm_pool_range" "*,*,*,*,1020,*,*,*,1020,*") 125 (set_attr "thumb2_pool_range" "*,*,*,*,1018,*,*,*,1018,*") 126 (set_attr "neg_pool_range" "*,*,*,*,996,*,*,*,996,*")]) 127 128/* We define these mov expanders to match the standard mov$a optab to prevent 129 the mid-end from trying to do a subreg for these modes which is the most 130 inefficient way to expand the move. Also big-endian subreg's aren't 131 allowed for a subset of modes, See TARGET_CAN_CHANGE_MODE_CLASS. 132 Without these RTL generation patterns the mid-end would attempt to take a 133 sub-reg and may ICE if it can't. */ 134 135(define_expand "movti" 136 [(set (match_operand:TI 0 "nonimmediate_operand") 137 (match_operand:TI 1 "general_operand"))] 138 "TARGET_NEON" 139{ 140 gcc_checking_assert (aligned_operand (operands[0], TImode)); 141 gcc_checking_assert (aligned_operand (operands[1], TImode)); 142 if (can_create_pseudo_p ()) 143 { 144 if (!REG_P (operands[0])) 145 operands[1] = force_reg (TImode, operands[1]); 146 } 147}) 148 149(define_expand "mov<mode>" 150 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand") 151 (match_operand:VSTRUCT 1 "general_operand"))] 152 "TARGET_NEON || TARGET_HAVE_MVE" 153{ 154 gcc_checking_assert (aligned_operand (operands[0], <MODE>mode)); 155 gcc_checking_assert (aligned_operand (operands[1], <MODE>mode)); 156 if (can_create_pseudo_p ()) 157 { 158 if (!REG_P (operands[0])) 159 operands[1] = force_reg (<MODE>mode, operands[1]); 160 } 161}) 162 163;; The pattern mov<mode> where mode is v8hf, v4hf, v4bf and v8bf are split into 164;; two groups. The pattern movv8hf is common for MVE and NEON, so it is moved 165;; into vec-common.md file. Remaining mov expand patterns with half float and 166;; bfloats are implemented below. 167(define_expand "mov<mode>" 168 [(set (match_operand:VHFBF_split 0 "s_register_operand") 169 (match_operand:VHFBF_split 1 "s_register_operand"))] 170 "TARGET_NEON" 171{ 172 gcc_checking_assert (aligned_operand (operands[0], <MODE>mode)); 173 gcc_checking_assert (aligned_operand (operands[1], <MODE>mode)); 174 if (can_create_pseudo_p ()) 175 { 176 if (!REG_P (operands[0])) 177 operands[1] = force_reg (<MODE>mode, operands[1]); 178 } 179}) 180 181(define_insn "*neon_mov<mode>" 182 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w") 183 (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))] 184 "(TARGET_NEON || TARGET_HAVE_MVE) 185 && (register_operand (operands[0], <MODE>mode) 186 || register_operand (operands[1], <MODE>mode))" 187{ 188 switch (which_alternative) 189 { 190 case 0: return "#"; 191 case 1: case 2: return output_move_neon (operands); 192 default: gcc_unreachable (); 193 } 194} 195 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q") 196 (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))]) 197 198(define_split 199 [(set (match_operand:EI 0 "s_register_operand" "") 200 (match_operand:EI 1 "s_register_operand" ""))] 201 "TARGET_NEON && reload_completed" 202 [(set (match_dup 0) (match_dup 1)) 203 (set (match_dup 2) (match_dup 3))] 204{ 205 int rdest = REGNO (operands[0]); 206 int rsrc = REGNO (operands[1]); 207 rtx dest[2], src[2]; 208 209 dest[0] = gen_rtx_REG (TImode, rdest); 210 src[0] = gen_rtx_REG (TImode, rsrc); 211 dest[1] = gen_rtx_REG (DImode, rdest + 4); 212 src[1] = gen_rtx_REG (DImode, rsrc + 4); 213 214 neon_disambiguate_copy (operands, dest, src, 2); 215}) 216 217(define_split 218 [(set (match_operand:OI 0 "s_register_operand" "") 219 (match_operand:OI 1 "s_register_operand" ""))] 220 "(TARGET_NEON || TARGET_HAVE_MVE)&& reload_completed" 221 [(set (match_dup 0) (match_dup 1)) 222 (set (match_dup 2) (match_dup 3))] 223{ 224 int rdest = REGNO (operands[0]); 225 int rsrc = REGNO (operands[1]); 226 rtx dest[2], src[2]; 227 228 dest[0] = gen_rtx_REG (TImode, rdest); 229 src[0] = gen_rtx_REG (TImode, rsrc); 230 dest[1] = gen_rtx_REG (TImode, rdest + 4); 231 src[1] = gen_rtx_REG (TImode, rsrc + 4); 232 233 neon_disambiguate_copy (operands, dest, src, 2); 234}) 235 236(define_split 237 [(set (match_operand:CI 0 "s_register_operand" "") 238 (match_operand:CI 1 "s_register_operand" ""))] 239 "TARGET_NEON && reload_completed" 240 [(set (match_dup 0) (match_dup 1)) 241 (set (match_dup 2) (match_dup 3)) 242 (set (match_dup 4) (match_dup 5))] 243{ 244 int rdest = REGNO (operands[0]); 245 int rsrc = REGNO (operands[1]); 246 rtx dest[3], src[3]; 247 248 dest[0] = gen_rtx_REG (TImode, rdest); 249 src[0] = gen_rtx_REG (TImode, rsrc); 250 dest[1] = gen_rtx_REG (TImode, rdest + 4); 251 src[1] = gen_rtx_REG (TImode, rsrc + 4); 252 dest[2] = gen_rtx_REG (TImode, rdest + 8); 253 src[2] = gen_rtx_REG (TImode, rsrc + 8); 254 255 neon_disambiguate_copy (operands, dest, src, 3); 256}) 257 258(define_split 259 [(set (match_operand:XI 0 "s_register_operand" "") 260 (match_operand:XI 1 "s_register_operand" ""))] 261 "(TARGET_NEON || TARGET_HAVE_MVE) && reload_completed" 262 [(set (match_dup 0) (match_dup 1)) 263 (set (match_dup 2) (match_dup 3)) 264 (set (match_dup 4) (match_dup 5)) 265 (set (match_dup 6) (match_dup 7))] 266{ 267 int rdest = REGNO (operands[0]); 268 int rsrc = REGNO (operands[1]); 269 rtx dest[4], src[4]; 270 271 dest[0] = gen_rtx_REG (TImode, rdest); 272 src[0] = gen_rtx_REG (TImode, rsrc); 273 dest[1] = gen_rtx_REG (TImode, rdest + 4); 274 src[1] = gen_rtx_REG (TImode, rsrc + 4); 275 dest[2] = gen_rtx_REG (TImode, rdest + 8); 276 src[2] = gen_rtx_REG (TImode, rsrc + 8); 277 dest[3] = gen_rtx_REG (TImode, rdest + 12); 278 src[3] = gen_rtx_REG (TImode, rsrc + 12); 279 280 neon_disambiguate_copy (operands, dest, src, 4); 281}) 282 283(define_insn "*movmisalign<mode>_neon_store" 284 [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um") 285 (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")] 286 UNSPEC_MISALIGNED_ACCESS))] 287 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access" 288 "vst1.<V_sz_elem>\t{%P1}, %A0" 289 [(set_attr "type" "neon_store1_1reg<q>")]) 290 291(define_insn "*movmisalign<mode>_neon_load" 292 [(set (match_operand:VDX 0 "s_register_operand" "=w") 293 (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand" 294 " Um")] 295 UNSPEC_MISALIGNED_ACCESS))] 296 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access" 297 "vld1.<V_sz_elem>\t{%P0}, %A1" 298 [(set_attr "type" "neon_load1_1reg<q>")]) 299 300(define_insn "*movmisalign<mode>_neon_store" 301 [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um") 302 (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")] 303 UNSPEC_MISALIGNED_ACCESS))] 304 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access" 305 "vst1.<V_sz_elem>\t{%q1}, %A0" 306 [(set_attr "type" "neon_store1_1reg<q>")]) 307 308(define_insn "*movmisalign<mode>_neon_load" 309 [(set (match_operand:VQX 0 "s_register_operand" "=w") 310 (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand" 311 " Um")] 312 UNSPEC_MISALIGNED_ACCESS))] 313 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access" 314 "vld1.<V_sz_elem>\t{%q0}, %A1" 315 [(set_attr "type" "neon_load1_1reg<q>")]) 316 317(define_insn "@vec_set<mode>_internal" 318 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w,w") 319 (vec_merge:VD_LANE 320 (vec_duplicate:VD_LANE 321 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r")) 322 (match_operand:VD_LANE 3 "s_register_operand" "0,0") 323 (match_operand:SI 2 "immediate_operand" "i,i")))] 324 "TARGET_NEON" 325{ 326 int elt = ffs ((int) INTVAL (operands[2])) - 1; 327 if (BYTES_BIG_ENDIAN) 328 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt; 329 operands[2] = GEN_INT (elt); 330 331 if (which_alternative == 0) 332 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1"; 333 else 334 return "vmov.<V_sz_elem>\t%P0[%c2], %1"; 335} 336 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]) 337 338(define_insn "@vec_set<mode>_internal" 339 [(set (match_operand:VQ2 0 "s_register_operand" "=w,w") 340 (vec_merge:VQ2 341 (vec_duplicate:VQ2 342 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r")) 343 (match_operand:VQ2 3 "s_register_operand" "0,0") 344 (match_operand:SI 2 "immediate_operand" "i,i")))] 345 "TARGET_NEON" 346{ 347 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1; 348 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2; 349 int elt = elem % half_elts; 350 int hi = (elem / half_elts) * 2; 351 int regno = REGNO (operands[0]); 352 353 if (BYTES_BIG_ENDIAN) 354 elt = half_elts - 1 - elt; 355 356 operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi); 357 operands[2] = GEN_INT (elt); 358 359 if (which_alternative == 0) 360 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1"; 361 else 362 return "vmov.<V_sz_elem>\t%P0[%c2], %1"; 363} 364 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")] 365) 366 367(define_insn "@vec_set<mode>_internal" 368 [(set (match_operand:V2DI_ONLY 0 "s_register_operand" "=w,w") 369 (vec_merge:V2DI_ONLY 370 (vec_duplicate:V2DI_ONLY 371 (match_operand:DI 1 "nonimmediate_operand" "Um,r")) 372 (match_operand:V2DI_ONLY 3 "s_register_operand" "0,0") 373 (match_operand:SI 2 "immediate_operand" "i,i")))] 374 "TARGET_NEON" 375{ 376 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1; 377 int regno = REGNO (operands[0]) + 2 * elem; 378 379 operands[0] = gen_rtx_REG (DImode, regno); 380 381 if (which_alternative == 0) 382 return "vld1.64\t%P0, %A1"; 383 else 384 return "vmov\t%P0, %Q1, %R1"; 385} 386 [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")] 387) 388 389(define_insn "vec_extract<mode><V_elem_l>" 390 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r") 391 (vec_select:<V_elem> 392 (match_operand:VD_LANE 1 "s_register_operand" "w,w") 393 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))] 394 "TARGET_NEON" 395{ 396 if (BYTES_BIG_ENDIAN) 397 { 398 int elt = INTVAL (operands[2]); 399 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt; 400 operands[2] = GEN_INT (elt); 401 } 402 403 if (which_alternative == 0) 404 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0"; 405 else 406 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]"; 407} 408 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")] 409) 410 411;; This pattern is renamed from "vec_extract<mode><V_elem_l>" to 412;; "neon_vec_extract<mode><V_elem_l>" and this pattern is called 413;; by define_expand in vec-common.md file. 414(define_insn "neon_vec_extract<mode><V_elem_l>" 415 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r") 416 (vec_select:<V_elem> 417 (match_operand:VQ2 1 "s_register_operand" "w,w") 418 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))] 419 "TARGET_NEON" 420{ 421 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2; 422 int elt = INTVAL (operands[2]) % half_elts; 423 int hi = (INTVAL (operands[2]) / half_elts) * 2; 424 int regno = REGNO (operands[1]); 425 426 if (BYTES_BIG_ENDIAN) 427 elt = half_elts - 1 - elt; 428 429 operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi); 430 operands[2] = GEN_INT (elt); 431 432 if (which_alternative == 0) 433 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0"; 434 else 435 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]"; 436} 437 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")] 438) 439 440;; This pattern is renamed from "vec_extractv2didi" to "neon_vec_extractv2didi" 441;; and this pattern is called by define_expand in vec-common.md file. 442(define_insn "neon_vec_extractv2didi" 443 [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r") 444 (vec_select:DI 445 (match_operand:V2DI 1 "s_register_operand" "w,w") 446 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))] 447 "TARGET_NEON" 448{ 449 int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]); 450 451 operands[1] = gen_rtx_REG (DImode, regno); 452 453 if (which_alternative == 0) 454 return "vst1.64\t{%P1}, %A0 @ v2di"; 455 else 456 return "vmov\t%Q0, %R0, %P1 @ v2di"; 457} 458 [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")] 459) 460 461;; Doubleword and quadword arithmetic. 462 463;; NOTE: some other instructions also support 64-bit integer 464;; element size, which we could potentially use for "long long" operations. 465 466(define_insn "*add<mode>3_neon" 467 [(set (match_operand:VDQ 0 "s_register_operand" "=w") 468 (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w") 469 (match_operand:VDQ 2 "s_register_operand" "w")))] 470 "ARM_HAVE_NEON_<MODE>_ARITH" 471 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 472 [(set (attr "type") 473 (if_then_else (match_test "<Is_float_mode>") 474 (const_string "neon_fp_addsub_s<q>") 475 (const_string "neon_add<q>")))] 476) 477 478(define_insn "*sub<mode>3_neon" 479 [(set (match_operand:VDQ 0 "s_register_operand" "=w") 480 (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w") 481 (match_operand:VDQ 2 "s_register_operand" "w")))] 482 "ARM_HAVE_NEON_<MODE>_ARITH" 483 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 484 [(set (attr "type") 485 (if_then_else (match_test "<Is_float_mode>") 486 (const_string "neon_fp_addsub_s<q>") 487 (const_string "neon_sub<q>")))] 488) 489 490(define_insn "*mul<mode>3_neon" 491 [(set (match_operand:VDQW 0 "s_register_operand" "=w") 492 (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w") 493 (match_operand:VDQW 2 "s_register_operand" "w")))] 494 "ARM_HAVE_NEON_<MODE>_ARITH" 495 "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 496 [(set (attr "type") 497 (if_then_else (match_test "<Is_float_mode>") 498 (const_string "neon_fp_mul_s<q>") 499 (const_string "neon_mul_<V_elem_ch><q>")))] 500) 501 502/* Perform division using multiply-by-reciprocal. 503 Reciprocal is calculated using Newton-Raphson method. 504 Enabled with -funsafe-math-optimizations -freciprocal-math 505 and disabled for -Os since it increases code size . */ 506 507(define_expand "div<VCVTF:mode>3" 508 [(set (match_operand:VCVTF 0 "s_register_operand") 509 (div:VCVTF (match_operand:VCVTF 1 "s_register_operand") 510 (match_operand:VCVTF 2 "s_register_operand")))] 511 "ARM_HAVE_NEON_<MODE>_ARITH && !optimize_size 512 && flag_reciprocal_math" 513 { 514 rtx rec = gen_reg_rtx (<MODE>mode); 515 rtx vrecps_temp = gen_reg_rtx (<MODE>mode); 516 517 /* Reciprocal estimate. */ 518 emit_insn (gen_neon_vrecpe<mode> (rec, operands[2])); 519 520 /* Perform 2 iterations of newton-raphson method. */ 521 for (int i = 0; i < 2; i++) 522 { 523 emit_insn (gen_neon_vrecps<mode> (vrecps_temp, rec, operands[2])); 524 emit_insn (gen_mul<mode>3 (rec, rec, vrecps_temp)); 525 } 526 527 /* We now have reciprocal in rec, perform operands[0] = operands[1] * rec. */ 528 emit_insn (gen_mul<mode>3 (operands[0], operands[1], rec)); 529 DONE; 530 } 531) 532 533 534(define_insn "mul<mode>3add<mode>_neon" 535 [(set (match_operand:VDQW 0 "s_register_operand" "=w") 536 (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w") 537 (match_operand:VDQW 3 "s_register_operand" "w")) 538 (match_operand:VDQW 1 "s_register_operand" "0")))] 539 "ARM_HAVE_NEON_<MODE>_ARITH" 540 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" 541 [(set (attr "type") 542 (if_then_else (match_test "<Is_float_mode>") 543 (const_string "neon_fp_mla_s<q>") 544 (const_string "neon_mla_<V_elem_ch><q>")))] 545) 546 547(define_insn "mul<mode>3add<mode>_neon" 548 [(set (match_operand:VH 0 "s_register_operand" "=w") 549 (plus:VH (mult:VH (match_operand:VH 2 "s_register_operand" "w") 550 (match_operand:VH 3 "s_register_operand" "w")) 551 (match_operand:VH 1 "s_register_operand" "0")))] 552 "ARM_HAVE_NEON_<MODE>_ARITH" 553 "vmla.f16\t%<V_reg>0, %<V_reg>2, %<V_reg>3" 554 [(set_attr "type" "neon_fp_mla_s<q>")] 555) 556 557(define_insn "mul<mode>3neg<mode>add<mode>_neon" 558 [(set (match_operand:VDQW 0 "s_register_operand" "=w") 559 (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0") 560 (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w") 561 (match_operand:VDQW 3 "s_register_operand" "w"))))] 562 "ARM_HAVE_NEON_<MODE>_ARITH" 563 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" 564 [(set (attr "type") 565 (if_then_else (match_test "<Is_float_mode>") 566 (const_string "neon_fp_mla_s<q>") 567 (const_string "neon_mla_<V_elem_ch><q>")))] 568) 569 570;; Fused multiply-accumulate 571;; We define each insn twice here: 572;; 1: with flag_unsafe_math_optimizations for the widening multiply phase 573;; to be able to use when converting to FMA. 574;; 2: without flag_unsafe_math_optimizations for the intrinsics to use. 575(define_insn "fma<VCVTF:mode>4" 576 [(set (match_operand:VCVTF 0 "register_operand" "=w") 577 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w") 578 (match_operand:VCVTF 2 "register_operand" "w") 579 (match_operand:VCVTF 3 "register_operand" "0")))] 580 "ARM_HAVE_NEON_<MODE>_ARITH && TARGET_FMA" 581 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 582 [(set_attr "type" "neon_fp_mla_s<q>")] 583) 584 585(define_insn "fma<VCVTF:mode>4_intrinsic" 586 [(set (match_operand:VCVTF 0 "register_operand" "=w") 587 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w") 588 (match_operand:VCVTF 2 "register_operand" "w") 589 (match_operand:VCVTF 3 "register_operand" "0")))] 590 "TARGET_NEON && TARGET_FMA" 591 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 592 [(set_attr "type" "neon_fp_mla_s<q>")] 593) 594 595(define_insn "fma<VH:mode>4" 596 [(set (match_operand:VH 0 "register_operand" "=w") 597 (fma:VH 598 (match_operand:VH 1 "register_operand" "w") 599 (match_operand:VH 2 "register_operand" "w") 600 (match_operand:VH 3 "register_operand" "0")))] 601 "ARM_HAVE_NEON_<MODE>_ARITH" 602 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 603 [(set_attr "type" "neon_fp_mla_s<q>")] 604) 605 606(define_insn "*fmsub<VCVTF:mode>4" 607 [(set (match_operand:VCVTF 0 "register_operand" "=w") 608 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w")) 609 (match_operand:VCVTF 2 "register_operand" "w") 610 (match_operand:VCVTF 3 "register_operand" "0")))] 611 "ARM_HAVE_NEON_<MODE>_ARITH && TARGET_FMA" 612 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 613 [(set_attr "type" "neon_fp_mla_s<q>")] 614) 615 616(define_insn "fmsub<VCVTF:mode>4_intrinsic" 617 [(set (match_operand:VCVTF 0 "register_operand" "=w") 618 (fma:VCVTF 619 (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w")) 620 (match_operand:VCVTF 2 "register_operand" "w") 621 (match_operand:VCVTF 3 "register_operand" "0")))] 622 "TARGET_NEON && TARGET_FMA" 623 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 624 [(set_attr "type" "neon_fp_mla_s<q>")] 625) 626 627(define_insn "fmsub<VH:mode>4_intrinsic" 628 [(set (match_operand:VH 0 "register_operand" "=w") 629 (fma:VH 630 (neg:VH (match_operand:VH 1 "register_operand" "w")) 631 (match_operand:VH 2 "register_operand" "w") 632 (match_operand:VH 3 "register_operand" "0")))] 633 "TARGET_NEON_FP16INST" 634 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 635 [(set_attr "type" "neon_fp_mla_s<q>")] 636) 637 638(define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>" 639 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 640 (unspec:VCVTF [(match_operand:VCVTF 1 641 "s_register_operand" "w")] 642 NEON_VRINT))] 643 "TARGET_NEON && TARGET_VFP5" 644 "vrint<nvrint_variant>.f32\\t%<V_reg>0, %<V_reg>1" 645 [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")] 646) 647 648(define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>" 649 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w") 650 (FIXUORS:<V_cmp_result> (unspec:VCVTF 651 [(match_operand:VCVTF 1 "register_operand" "w")] 652 NEON_VCVT)))] 653 "TARGET_NEON && TARGET_VFP5" 654 "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1" 655 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>") 656 (set_attr "predicable" "no")] 657) 658 659(define_insn "ior<mode>3_neon" 660 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w") 661 (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0") 662 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))] 663 "TARGET_NEON" 664{ 665 switch (which_alternative) 666 { 667 case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2"; 668 case 1: return neon_output_logic_immediate ("vorr", &operands[2], 669 <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode)); 670 default: gcc_unreachable (); 671 } 672} 673 [(set_attr "type" "neon_logic<q>")] 674) 675 676;; The concrete forms of the Neon immediate-logic instructions are vbic and 677;; vorr. We support the pseudo-instruction vand instead, because that 678;; corresponds to the canonical form the middle-end expects to use for 679;; immediate bitwise-ANDs. 680 681(define_insn "and<mode>3_neon" 682 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w") 683 (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0") 684 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))] 685 "TARGET_NEON" 686{ 687 switch (which_alternative) 688 { 689 case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2"; 690 case 1: return neon_output_logic_immediate ("vand", &operands[2], 691 <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode)); 692 default: gcc_unreachable (); 693 } 694} 695 [(set_attr "type" "neon_logic<q>")] 696) 697 698(define_insn "orn<mode>3_neon" 699 [(set (match_operand:VDQ 0 "s_register_operand" "=w") 700 (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w")) 701 (match_operand:VDQ 1 "s_register_operand" "w")))] 702 "TARGET_NEON" 703 "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 704 [(set_attr "type" "neon_logic<q>")] 705) 706 707(define_insn "bic<mode>3_neon" 708 [(set (match_operand:VDQ 0 "s_register_operand" "=w") 709 (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w")) 710 (match_operand:VDQ 1 "s_register_operand" "w")))] 711 "TARGET_NEON" 712 "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 713 [(set_attr "type" "neon_logic<q>")] 714) 715 716(define_insn "xor<mode>3_neon" 717 [(set (match_operand:VDQ 0 "s_register_operand" "=w") 718 (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w") 719 (match_operand:VDQ 2 "s_register_operand" "w")))] 720 "TARGET_NEON" 721 "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 722 [(set_attr "type" "neon_logic<q>")] 723) 724 725(define_insn "one_cmpl<mode>2_neon" 726 [(set (match_operand:VDQ 0 "s_register_operand" "=w") 727 (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))] 728 "TARGET_NEON" 729 "vmvn\t%<V_reg>0, %<V_reg>1" 730 [(set_attr "type" "neon_move<q>")] 731) 732 733(define_insn "neon_abs<mode>2" 734 [(set (match_operand:VDQW 0 "s_register_operand" "=w") 735 (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))] 736 "TARGET_NEON" 737 "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1" 738 [(set (attr "type") 739 (if_then_else (match_test "<Is_float_mode>") 740 (const_string "neon_fp_abs_s<q>") 741 (const_string "neon_abs<q>")))] 742) 743 744(define_insn "neon_neg<mode>2" 745 [(set (match_operand:VDQW 0 "s_register_operand" "=w") 746 (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))] 747 "TARGET_NEON" 748 "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1" 749 [(set (attr "type") 750 (if_then_else (match_test "<Is_float_mode>") 751 (const_string "neon_fp_neg_s<q>") 752 (const_string "neon_neg<q>")))] 753) 754 755(define_insn "neon_<absneg_str><mode>2" 756 [(set (match_operand:VH 0 "s_register_operand" "=w") 757 (ABSNEG:VH (match_operand:VH 1 "s_register_operand" "w")))] 758 "TARGET_NEON_FP16INST" 759 "v<absneg_str>.<V_s_elem>\t%<V_reg>0, %<V_reg>1" 760 [(set_attr "type" "neon_abs<q>")] 761) 762 763(define_expand "neon_v<absneg_str><mode>" 764 [(set 765 (match_operand:VH 0 "s_register_operand") 766 (ABSNEG:VH (match_operand:VH 1 "s_register_operand")))] 767 "TARGET_NEON_FP16INST" 768{ 769 emit_insn (gen_neon_<absneg_str><mode>2 (operands[0], operands[1])); 770 DONE; 771}) 772 773(define_insn "neon_v<fp16_rnd_str><mode>" 774 [(set (match_operand:VH 0 "s_register_operand" "=w") 775 (unspec:VH 776 [(match_operand:VH 1 "s_register_operand" "w")] 777 FP16_RND))] 778 "TARGET_NEON_FP16INST" 779 "<fp16_rnd_insn>.<V_s_elem>\t%<V_reg>0, %<V_reg>1" 780 [(set_attr "type" "neon_fp_round_s<q>")] 781) 782 783(define_insn "neon_vrsqrte<mode>" 784 [(set (match_operand:VH 0 "s_register_operand" "=w") 785 (unspec:VH 786 [(match_operand:VH 1 "s_register_operand" "w")] 787 UNSPEC_VRSQRTE))] 788 "TARGET_NEON_FP16INST" 789 "vrsqrte.f16\t%<V_reg>0, %<V_reg>1" 790 [(set_attr "type" "neon_fp_rsqrte_s<q>")] 791) 792 793(define_insn "*umin<mode>3_neon" 794 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 795 (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w") 796 (match_operand:VDQIW 2 "s_register_operand" "w")))] 797 "TARGET_NEON" 798 "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 799 [(set_attr "type" "neon_minmax<q>")] 800) 801 802(define_insn "*umax<mode>3_neon" 803 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 804 (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w") 805 (match_operand:VDQIW 2 "s_register_operand" "w")))] 806 "TARGET_NEON" 807 "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 808 [(set_attr "type" "neon_minmax<q>")] 809) 810 811(define_insn "*smin<mode>3_neon" 812 [(set (match_operand:VDQW 0 "s_register_operand" "=w") 813 (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w") 814 (match_operand:VDQW 2 "s_register_operand" "w")))] 815 "TARGET_NEON" 816 "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 817 [(set (attr "type") 818 (if_then_else (match_test "<Is_float_mode>") 819 (const_string "neon_fp_minmax_s<q>") 820 (const_string "neon_minmax<q>")))] 821) 822 823(define_insn "*smax<mode>3_neon" 824 [(set (match_operand:VDQW 0 "s_register_operand" "=w") 825 (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w") 826 (match_operand:VDQW 2 "s_register_operand" "w")))] 827 "TARGET_NEON" 828 "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 829 [(set (attr "type") 830 (if_then_else (match_test "<Is_float_mode>") 831 (const_string "neon_fp_minmax_s<q>") 832 (const_string "neon_minmax<q>")))] 833) 834 835; TODO: V2DI shifts are current disabled because there are bugs in the 836; generic vectorizer code. It ends up creating a V2DI constructor with 837; SImode elements. 838 839(define_insn "vashr<mode>3_imm" 840 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 841 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w") 842 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dm")))] 843 "TARGET_NEON" 844 { 845 return neon_output_shift_immediate ("vshr", 's', &operands[2], 846 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode), 847 false); 848 } 849 [(set_attr "type" "neon_shift_imm<q>")] 850) 851 852(define_insn "vlshr<mode>3_imm" 853 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 854 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w") 855 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dm")))] 856 "TARGET_NEON" 857 { 858 return neon_output_shift_immediate ("vshr", 'u', &operands[2], 859 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode), 860 false); 861 } 862 [(set_attr "type" "neon_shift_imm<q>")] 863) 864 865; Used for implementing logical shift-right, which is a left-shift by a negative 866; amount, with signed operands. This is essentially the same as ashl<mode>3 867; above, but using an unspec in case GCC tries anything tricky with negative 868; shift amounts. 869 870(define_insn "ashl<mode>3_signed" 871 [(set (match_operand:VDQI 0 "s_register_operand" "=w") 872 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w") 873 (match_operand:VDQI 2 "s_register_operand" "w")] 874 UNSPEC_ASHIFT_SIGNED))] 875 "TARGET_NEON" 876 "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 877 [(set_attr "type" "neon_shift_reg<q>")] 878) 879 880; Used for implementing logical shift-right, which is a left-shift by a negative 881; amount, with unsigned operands. 882 883(define_insn "ashl<mode>3_unsigned" 884 [(set (match_operand:VDQI 0 "s_register_operand" "=w") 885 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w") 886 (match_operand:VDQI 2 "s_register_operand" "w")] 887 UNSPEC_ASHIFT_UNSIGNED))] 888 "TARGET_NEON" 889 "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 890 [(set_attr "type" "neon_shift_reg<q>")] 891) 892 893;; 64-bit shifts 894 895;; This pattern loads a 32-bit shift count into a 64-bit NEON register, 896;; leaving the upper half uninitalized. This is OK since the shift 897;; instruction only looks at the low 8 bits anyway. To avoid confusing 898;; data flow analysis however, we pretend the full register is set 899;; using an unspec. 900(define_insn "neon_load_count" 901 [(set (match_operand:DI 0 "s_register_operand" "=w,w") 902 (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")] 903 UNSPEC_LOAD_COUNT))] 904 "TARGET_NEON" 905 "@ 906 vld1.32\t{%P0[0]}, %A1 907 vmov.32\t%P0[0], %1" 908 [(set_attr "type" "neon_load1_1reg,neon_from_gp")] 909) 910 911;; Widening operations 912 913(define_expand "widen_ssum<mode>3" 914 [(set (match_operand:<V_double_width> 0 "s_register_operand") 915 (plus:<V_double_width> 916 (sign_extend:<V_double_width> 917 (match_operand:VQI 1 "s_register_operand")) 918 (match_operand:<V_double_width> 2 "s_register_operand")))] 919 "TARGET_NEON" 920 { 921 machine_mode mode = GET_MODE (operands[1]); 922 rtx p1, p2; 923 924 p1 = arm_simd_vect_par_cnst_half (mode, false); 925 p2 = arm_simd_vect_par_cnst_half (mode, true); 926 927 if (operands[0] != operands[2]) 928 emit_move_insn (operands[0], operands[2]); 929 930 emit_insn (gen_vec_sel_widen_ssum_lo<mode><V_half>3 (operands[0], 931 operands[1], 932 p1, 933 operands[0])); 934 emit_insn (gen_vec_sel_widen_ssum_hi<mode><V_half>3 (operands[0], 935 operands[1], 936 p2, 937 operands[0])); 938 DONE; 939 } 940) 941 942(define_insn "vec_sel_widen_ssum_lo<mode><V_half>3" 943 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w") 944 (plus:<V_double_width> 945 (sign_extend:<V_double_width> 946 (vec_select:<V_HALF> 947 (match_operand:VQI 1 "s_register_operand" "%w") 948 (match_operand:VQI 2 "vect_par_constant_low" ""))) 949 (match_operand:<V_double_width> 3 "s_register_operand" "0")))] 950 "TARGET_NEON" 951{ 952 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %f1" : 953 "vaddw.<V_s_elem>\t%q0, %q3, %e1"; 954} 955 [(set_attr "type" "neon_add_widen")]) 956 957(define_insn "vec_sel_widen_ssum_hi<mode><V_half>3" 958 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w") 959 (plus:<V_double_width> 960 (sign_extend:<V_double_width> 961 (vec_select:<V_HALF> 962 (match_operand:VQI 1 "s_register_operand" "%w") 963 (match_operand:VQI 2 "vect_par_constant_high" ""))) 964 (match_operand:<V_double_width> 3 "s_register_operand" "0")))] 965 "TARGET_NEON" 966{ 967 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %e1" : 968 "vaddw.<V_s_elem>\t%q0, %q3, %f1"; 969} 970 [(set_attr "type" "neon_add_widen")]) 971 972(define_insn "widen_ssum<mode>3" 973 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 974 (plus:<V_widen> 975 (sign_extend:<V_widen> 976 (match_operand:VW 1 "s_register_operand" "%w")) 977 (match_operand:<V_widen> 2 "s_register_operand" "w")))] 978 "TARGET_NEON" 979 "vaddw.<V_s_elem>\t%q0, %q2, %P1" 980 [(set_attr "type" "neon_add_widen")] 981) 982 983(define_expand "widen_usum<mode>3" 984 [(set (match_operand:<V_double_width> 0 "s_register_operand") 985 (plus:<V_double_width> 986 (zero_extend:<V_double_width> 987 (match_operand:VQI 1 "s_register_operand")) 988 (match_operand:<V_double_width> 2 "s_register_operand")))] 989 "TARGET_NEON" 990 { 991 machine_mode mode = GET_MODE (operands[1]); 992 rtx p1, p2; 993 994 p1 = arm_simd_vect_par_cnst_half (mode, false); 995 p2 = arm_simd_vect_par_cnst_half (mode, true); 996 997 if (operands[0] != operands[2]) 998 emit_move_insn (operands[0], operands[2]); 999 1000 emit_insn (gen_vec_sel_widen_usum_lo<mode><V_half>3 (operands[0], 1001 operands[1], 1002 p1, 1003 operands[0])); 1004 emit_insn (gen_vec_sel_widen_usum_hi<mode><V_half>3 (operands[0], 1005 operands[1], 1006 p2, 1007 operands[0])); 1008 DONE; 1009 } 1010) 1011 1012(define_insn "vec_sel_widen_usum_lo<mode><V_half>3" 1013 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w") 1014 (plus:<V_double_width> 1015 (zero_extend:<V_double_width> 1016 (vec_select:<V_HALF> 1017 (match_operand:VQI 1 "s_register_operand" "%w") 1018 (match_operand:VQI 2 "vect_par_constant_low" ""))) 1019 (match_operand:<V_double_width> 3 "s_register_operand" "0")))] 1020 "TARGET_NEON" 1021{ 1022 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %f1" : 1023 "vaddw.<V_u_elem>\t%q0, %q3, %e1"; 1024} 1025 [(set_attr "type" "neon_add_widen")]) 1026 1027(define_insn "vec_sel_widen_usum_hi<mode><V_half>3" 1028 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w") 1029 (plus:<V_double_width> 1030 (zero_extend:<V_double_width> 1031 (vec_select:<V_HALF> 1032 (match_operand:VQI 1 "s_register_operand" "%w") 1033 (match_operand:VQI 2 "vect_par_constant_high" ""))) 1034 (match_operand:<V_double_width> 3 "s_register_operand" "0")))] 1035 "TARGET_NEON" 1036{ 1037 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %e1" : 1038 "vaddw.<V_u_elem>\t%q0, %q3, %f1"; 1039} 1040 [(set_attr "type" "neon_add_widen")]) 1041 1042(define_insn "widen_usum<mode>3" 1043 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 1044 (plus:<V_widen> (zero_extend:<V_widen> 1045 (match_operand:VW 1 "s_register_operand" "%w")) 1046 (match_operand:<V_widen> 2 "s_register_operand" "w")))] 1047 "TARGET_NEON" 1048 "vaddw.<V_u_elem>\t%q0, %q2, %P1" 1049 [(set_attr "type" "neon_add_widen")] 1050) 1051 1052;; Helpers for quad-word reduction operations 1053 1054; Add (or smin, smax...) the low N/2 elements of the N-element vector 1055; operand[1] to the high N/2 elements of same. Put the result in operand[0], an 1056; N/2-element vector. 1057 1058(define_insn "quad_halves_<code>v4si" 1059 [(set (match_operand:V2SI 0 "s_register_operand" "=w") 1060 (VQH_OPS:V2SI 1061 (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w") 1062 (parallel [(const_int 0) (const_int 1)])) 1063 (vec_select:V2SI (match_dup 1) 1064 (parallel [(const_int 2) (const_int 3)]))))] 1065 "TARGET_NEON" 1066 "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1" 1067 [(set_attr "vqh_mnem" "<VQH_mnem>") 1068 (set_attr "type" "neon_reduc_<VQH_type>_q")] 1069) 1070 1071(define_insn "quad_halves_<code>v4sf" 1072 [(set (match_operand:V2SF 0 "s_register_operand" "=w") 1073 (VQHS_OPS:V2SF 1074 (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w") 1075 (parallel [(const_int 0) (const_int 1)])) 1076 (vec_select:V2SF (match_dup 1) 1077 (parallel [(const_int 2) (const_int 3)]))))] 1078 "ARM_HAVE_NEON_V4SF_ARITH" 1079 "<VQH_mnem>.f32\t%P0, %e1, %f1" 1080 [(set_attr "vqh_mnem" "<VQH_mnem>") 1081 (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")] 1082) 1083 1084(define_insn "quad_halves_<code>v8hi" 1085 [(set (match_operand:V4HI 0 "s_register_operand" "+w") 1086 (VQH_OPS:V4HI 1087 (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w") 1088 (parallel [(const_int 0) (const_int 1) 1089 (const_int 2) (const_int 3)])) 1090 (vec_select:V4HI (match_dup 1) 1091 (parallel [(const_int 4) (const_int 5) 1092 (const_int 6) (const_int 7)]))))] 1093 "TARGET_NEON" 1094 "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1" 1095 [(set_attr "vqh_mnem" "<VQH_mnem>") 1096 (set_attr "type" "neon_reduc_<VQH_type>_q")] 1097) 1098 1099(define_insn "quad_halves_<code>v16qi" 1100 [(set (match_operand:V8QI 0 "s_register_operand" "+w") 1101 (VQH_OPS:V8QI 1102 (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w") 1103 (parallel [(const_int 0) (const_int 1) 1104 (const_int 2) (const_int 3) 1105 (const_int 4) (const_int 5) 1106 (const_int 6) (const_int 7)])) 1107 (vec_select:V8QI (match_dup 1) 1108 (parallel [(const_int 8) (const_int 9) 1109 (const_int 10) (const_int 11) 1110 (const_int 12) (const_int 13) 1111 (const_int 14) (const_int 15)]))))] 1112 "TARGET_NEON" 1113 "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1" 1114 [(set_attr "vqh_mnem" "<VQH_mnem>") 1115 (set_attr "type" "neon_reduc_<VQH_type>_q")] 1116) 1117 1118(define_expand "move_hi_quad_<mode>" 1119 [(match_operand:ANY128 0 "s_register_operand") 1120 (match_operand:<V_HALF> 1 "s_register_operand")] 1121 "TARGET_NEON" 1122{ 1123 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode, 1124 GET_MODE_SIZE (<V_HALF>mode)), 1125 operands[1]); 1126 DONE; 1127}) 1128 1129(define_expand "move_lo_quad_<mode>" 1130 [(match_operand:ANY128 0 "s_register_operand") 1131 (match_operand:<V_HALF> 1 "s_register_operand")] 1132 "TARGET_NEON" 1133{ 1134 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], 1135 <MODE>mode, 0), 1136 operands[1]); 1137 DONE; 1138}) 1139 1140;; Reduction operations 1141 1142(define_expand "reduc_plus_scal_<mode>" 1143 [(match_operand:<V_elem> 0 "nonimmediate_operand") 1144 (match_operand:VD 1 "s_register_operand")] 1145 "ARM_HAVE_NEON_<MODE>_ARITH" 1146{ 1147 rtx vec = gen_reg_rtx (<MODE>mode); 1148 neon_pairwise_reduce (vec, operands[1], <MODE>mode, 1149 &gen_neon_vpadd_internal<mode>); 1150 /* The same result is actually computed into every element. */ 1151 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx)); 1152 DONE; 1153}) 1154 1155(define_expand "reduc_plus_scal_v2di" 1156 [(match_operand:DI 0 "nonimmediate_operand") 1157 (match_operand:V2DI 1 "s_register_operand")] 1158 "TARGET_NEON && !BYTES_BIG_ENDIAN" 1159{ 1160 rtx vec = gen_reg_rtx (V2DImode); 1161 1162 emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1])); 1163 emit_insn (gen_vec_extractv2didi (operands[0], vec, const0_rtx)); 1164 1165 DONE; 1166}) 1167 1168(define_insn "arm_reduc_plus_internal_v2di" 1169 [(set (match_operand:V2DI 0 "s_register_operand" "=w") 1170 (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")] 1171 UNSPEC_VPADD))] 1172 "TARGET_NEON && !BYTES_BIG_ENDIAN" 1173 "vadd.i64\t%e0, %e1, %f1" 1174 [(set_attr "type" "neon_add_q")] 1175) 1176 1177(define_expand "reduc_smin_scal_<mode>" 1178 [(match_operand:<V_elem> 0 "nonimmediate_operand") 1179 (match_operand:VD 1 "s_register_operand")] 1180 "ARM_HAVE_NEON_<MODE>_ARITH" 1181{ 1182 rtx vec = gen_reg_rtx (<MODE>mode); 1183 1184 neon_pairwise_reduce (vec, operands[1], <MODE>mode, 1185 &gen_neon_vpsmin<mode>); 1186 /* The result is computed into every element of the vector. */ 1187 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx)); 1188 DONE; 1189}) 1190 1191(define_expand "reduc_smin_scal_<mode>" 1192 [(match_operand:<V_elem> 0 "nonimmediate_operand") 1193 (match_operand:VQ 1 "s_register_operand")] 1194 "ARM_HAVE_NEON_<MODE>_ARITH && !BYTES_BIG_ENDIAN" 1195{ 1196 rtx step1 = gen_reg_rtx (<V_HALF>mode); 1197 1198 emit_insn (gen_quad_halves_smin<mode> (step1, operands[1])); 1199 emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1)); 1200 1201 DONE; 1202}) 1203 1204(define_expand "reduc_smax_scal_<mode>" 1205 [(match_operand:<V_elem> 0 "nonimmediate_operand") 1206 (match_operand:VD 1 "s_register_operand")] 1207 "ARM_HAVE_NEON_<MODE>_ARITH" 1208{ 1209 rtx vec = gen_reg_rtx (<MODE>mode); 1210 neon_pairwise_reduce (vec, operands[1], <MODE>mode, 1211 &gen_neon_vpsmax<mode>); 1212 /* The result is computed into every element of the vector. */ 1213 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx)); 1214 DONE; 1215}) 1216 1217(define_expand "reduc_smax_scal_<mode>" 1218 [(match_operand:<V_elem> 0 "nonimmediate_operand") 1219 (match_operand:VQ 1 "s_register_operand")] 1220 "ARM_HAVE_NEON_<MODE>_ARITH && !BYTES_BIG_ENDIAN" 1221{ 1222 rtx step1 = gen_reg_rtx (<V_HALF>mode); 1223 1224 emit_insn (gen_quad_halves_smax<mode> (step1, operands[1])); 1225 emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1)); 1226 1227 DONE; 1228}) 1229 1230(define_expand "reduc_umin_scal_<mode>" 1231 [(match_operand:<V_elem> 0 "nonimmediate_operand") 1232 (match_operand:VDI 1 "s_register_operand")] 1233 "TARGET_NEON" 1234{ 1235 rtx vec = gen_reg_rtx (<MODE>mode); 1236 neon_pairwise_reduce (vec, operands[1], <MODE>mode, 1237 &gen_neon_vpumin<mode>); 1238 /* The result is computed into every element of the vector. */ 1239 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx)); 1240 DONE; 1241}) 1242 1243(define_expand "reduc_umin_scal_<mode>" 1244 [(match_operand:<V_elem> 0 "nonimmediate_operand") 1245 (match_operand:VQI 1 "s_register_operand")] 1246 "TARGET_NEON && !BYTES_BIG_ENDIAN" 1247{ 1248 rtx step1 = gen_reg_rtx (<V_HALF>mode); 1249 1250 emit_insn (gen_quad_halves_umin<mode> (step1, operands[1])); 1251 emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1)); 1252 1253 DONE; 1254}) 1255 1256(define_expand "reduc_umax_scal_<mode>" 1257 [(match_operand:<V_elem> 0 "nonimmediate_operand") 1258 (match_operand:VDI 1 "s_register_operand")] 1259 "TARGET_NEON" 1260{ 1261 rtx vec = gen_reg_rtx (<MODE>mode); 1262 neon_pairwise_reduce (vec, operands[1], <MODE>mode, 1263 &gen_neon_vpumax<mode>); 1264 /* The result is computed into every element of the vector. */ 1265 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx)); 1266 DONE; 1267}) 1268 1269(define_expand "reduc_umax_scal_<mode>" 1270 [(match_operand:<V_elem> 0 "nonimmediate_operand") 1271 (match_operand:VQI 1 "s_register_operand")] 1272 "TARGET_NEON && !BYTES_BIG_ENDIAN" 1273{ 1274 rtx step1 = gen_reg_rtx (<V_HALF>mode); 1275 1276 emit_insn (gen_quad_halves_umax<mode> (step1, operands[1])); 1277 emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1)); 1278 1279 DONE; 1280}) 1281 1282(define_insn "neon_vpadd_internal<mode>" 1283 [(set (match_operand:VD 0 "s_register_operand" "=w") 1284 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w") 1285 (match_operand:VD 2 "s_register_operand" "w")] 1286 UNSPEC_VPADD))] 1287 "TARGET_NEON" 1288 "vpadd.<V_if_elem>\t%P0, %P1, %P2" 1289 ;; Assume this schedules like vadd. 1290 [(set (attr "type") 1291 (if_then_else (match_test "<Is_float_mode>") 1292 (const_string "neon_fp_reduc_add_s<q>") 1293 (const_string "neon_reduc_add<q>")))] 1294) 1295 1296(define_insn "neon_vpaddv4hf" 1297 [(set 1298 (match_operand:V4HF 0 "s_register_operand" "=w") 1299 (unspec:V4HF [(match_operand:V4HF 1 "s_register_operand" "w") 1300 (match_operand:V4HF 2 "s_register_operand" "w")] 1301 UNSPEC_VPADD))] 1302 "TARGET_NEON_FP16INST" 1303 "vpadd.f16\t%P0, %P1, %P2" 1304 [(set_attr "type" "neon_reduc_add")] 1305) 1306 1307(define_insn "neon_vpsmin<mode>" 1308 [(set (match_operand:VD 0 "s_register_operand" "=w") 1309 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w") 1310 (match_operand:VD 2 "s_register_operand" "w")] 1311 UNSPEC_VPSMIN))] 1312 "TARGET_NEON" 1313 "vpmin.<V_s_elem>\t%P0, %P1, %P2" 1314 [(set (attr "type") 1315 (if_then_else (match_test "<Is_float_mode>") 1316 (const_string "neon_fp_reduc_minmax_s<q>") 1317 (const_string "neon_reduc_minmax<q>")))] 1318) 1319 1320(define_insn "neon_vpsmax<mode>" 1321 [(set (match_operand:VD 0 "s_register_operand" "=w") 1322 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w") 1323 (match_operand:VD 2 "s_register_operand" "w")] 1324 UNSPEC_VPSMAX))] 1325 "TARGET_NEON" 1326 "vpmax.<V_s_elem>\t%P0, %P1, %P2" 1327 [(set (attr "type") 1328 (if_then_else (match_test "<Is_float_mode>") 1329 (const_string "neon_fp_reduc_minmax_s<q>") 1330 (const_string "neon_reduc_minmax<q>")))] 1331) 1332 1333(define_insn "neon_vpumin<mode>" 1334 [(set (match_operand:VDI 0 "s_register_operand" "=w") 1335 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w") 1336 (match_operand:VDI 2 "s_register_operand" "w")] 1337 UNSPEC_VPUMIN))] 1338 "TARGET_NEON" 1339 "vpmin.<V_u_elem>\t%P0, %P1, %P2" 1340 [(set_attr "type" "neon_reduc_minmax<q>")] 1341) 1342 1343(define_insn "neon_vpumax<mode>" 1344 [(set (match_operand:VDI 0 "s_register_operand" "=w") 1345 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w") 1346 (match_operand:VDI 2 "s_register_operand" "w")] 1347 UNSPEC_VPUMAX))] 1348 "TARGET_NEON" 1349 "vpmax.<V_u_elem>\t%P0, %P1, %P2" 1350 [(set_attr "type" "neon_reduc_minmax<q>")] 1351) 1352 1353;; Saturating arithmetic 1354 1355; NOTE: Neon supports many more saturating variants of instructions than the 1356; following, but these are all GCC currently understands. 1357; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself 1358; yet either, although these patterns may be used by intrinsics when they're 1359; added. 1360 1361(define_insn "*ss_add<mode>_neon" 1362 [(set (match_operand:VD 0 "s_register_operand" "=w") 1363 (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w") 1364 (match_operand:VD 2 "s_register_operand" "w")))] 1365 "TARGET_NEON" 1366 "vqadd.<V_s_elem>\t%P0, %P1, %P2" 1367 [(set_attr "type" "neon_qadd<q>")] 1368) 1369 1370(define_insn "*us_add<mode>_neon" 1371 [(set (match_operand:VD 0 "s_register_operand" "=w") 1372 (us_plus:VD (match_operand:VD 1 "s_register_operand" "w") 1373 (match_operand:VD 2 "s_register_operand" "w")))] 1374 "TARGET_NEON" 1375 "vqadd.<V_u_elem>\t%P0, %P1, %P2" 1376 [(set_attr "type" "neon_qadd<q>")] 1377) 1378 1379(define_insn "*ss_sub<mode>_neon" 1380 [(set (match_operand:VD 0 "s_register_operand" "=w") 1381 (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w") 1382 (match_operand:VD 2 "s_register_operand" "w")))] 1383 "TARGET_NEON" 1384 "vqsub.<V_s_elem>\t%P0, %P1, %P2" 1385 [(set_attr "type" "neon_qsub<q>")] 1386) 1387 1388(define_insn "*us_sub<mode>_neon" 1389 [(set (match_operand:VD 0 "s_register_operand" "=w") 1390 (us_minus:VD (match_operand:VD 1 "s_register_operand" "w") 1391 (match_operand:VD 2 "s_register_operand" "w")))] 1392 "TARGET_NEON" 1393 "vqsub.<V_u_elem>\t%P0, %P1, %P2" 1394 [(set_attr "type" "neon_qsub<q>")] 1395) 1396 1397(define_expand "vec_cmp<mode><v_cmp_result>" 1398 [(set (match_operand:<V_cmp_result> 0 "s_register_operand") 1399 (match_operator:<V_cmp_result> 1 "comparison_operator" 1400 [(match_operand:VDQWH 2 "s_register_operand") 1401 (match_operand:VDQWH 3 "reg_or_zero_operand")]))] 1402 "TARGET_NEON 1403 && (!<Is_float_mode> || flag_unsafe_math_optimizations)" 1404{ 1405 arm_expand_vector_compare (operands[0], GET_CODE (operands[1]), 1406 operands[2], operands[3], false); 1407 DONE; 1408}) 1409 1410(define_expand "vec_cmpu<mode><mode>" 1411 [(set (match_operand:VDQIW 0 "s_register_operand") 1412 (match_operator:VDQIW 1 "comparison_operator" 1413 [(match_operand:VDQIW 2 "s_register_operand") 1414 (match_operand:VDQIW 3 "reg_or_zero_operand")]))] 1415 "TARGET_NEON" 1416{ 1417 arm_expand_vector_compare (operands[0], GET_CODE (operands[1]), 1418 operands[2], operands[3], false); 1419 DONE; 1420}) 1421 1422(define_expand "vcond_mask_<mode><v_cmp_result>" 1423 [(set (match_operand:VDQWH 0 "s_register_operand") 1424 (if_then_else:VDQWH 1425 (match_operand:<V_cmp_result> 3 "s_register_operand") 1426 (match_operand:VDQWH 1 "s_register_operand") 1427 (match_operand:VDQWH 2 "s_register_operand")))] 1428 "TARGET_NEON 1429 && (!<Is_float_mode> || flag_unsafe_math_optimizations)" 1430{ 1431 emit_insn (gen_neon_vbsl<mode> (operands[0], operands[3], operands[1], 1432 operands[2])); 1433 DONE; 1434}) 1435 1436;; Patterns for builtins. 1437 1438; good for plain vadd, vaddq. 1439 1440(define_expand "neon_vadd<mode>" 1441 [(match_operand:VCVTF 0 "s_register_operand") 1442 (match_operand:VCVTF 1 "s_register_operand") 1443 (match_operand:VCVTF 2 "s_register_operand")] 1444 "TARGET_NEON" 1445{ 1446 if (ARM_HAVE_NEON_<MODE>_ARITH) 1447 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2])); 1448 else 1449 emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1], 1450 operands[2])); 1451 DONE; 1452}) 1453 1454(define_expand "neon_vadd<mode>" 1455 [(match_operand:VH 0 "s_register_operand") 1456 (match_operand:VH 1 "s_register_operand") 1457 (match_operand:VH 2 "s_register_operand")] 1458 "TARGET_NEON_FP16INST" 1459{ 1460 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2])); 1461 DONE; 1462}) 1463 1464(define_expand "neon_vsub<mode>" 1465 [(match_operand:VH 0 "s_register_operand") 1466 (match_operand:VH 1 "s_register_operand") 1467 (match_operand:VH 2 "s_register_operand")] 1468 "TARGET_NEON_FP16INST" 1469{ 1470 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2])); 1471 DONE; 1472}) 1473 1474; Note that NEON operations don't support the full IEEE 754 standard: in 1475; particular, denormal values are flushed to zero. This means that GCC cannot 1476; use those instructions for autovectorization, etc. unless 1477; -funsafe-math-optimizations is in effect (in which case flush-to-zero 1478; behavior is permissible). Intrinsic operations (provided by the arm_neon.h 1479; header) must work in either case: if -funsafe-math-optimizations is given, 1480; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics 1481; expand to unspecs (which may potentially limit the extent to which they might 1482; be optimized by generic code). 1483 1484; Used for intrinsics when flag_unsafe_math_optimizations is false. 1485 1486(define_insn "neon_vadd<mode>_unspec" 1487 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 1488 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") 1489 (match_operand:VCVTF 2 "s_register_operand" "w")] 1490 UNSPEC_VADD))] 1491 "TARGET_NEON" 1492 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 1493 [(set (attr "type") 1494 (if_then_else (match_test "<Is_float_mode>") 1495 (const_string "neon_fp_addsub_s<q>") 1496 (const_string "neon_add<q>")))] 1497) 1498 1499(define_insn "neon_vaddl<sup><mode>" 1500 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 1501 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w") 1502 (match_operand:VDI 2 "s_register_operand" "w")] 1503 VADDL))] 1504 "TARGET_NEON" 1505 "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2" 1506 [(set_attr "type" "neon_add_long")] 1507) 1508 1509(define_insn "neon_vaddw<sup><mode>" 1510 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 1511 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w") 1512 (match_operand:VDI 2 "s_register_operand" "w")] 1513 VADDW))] 1514 "TARGET_NEON" 1515 "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2" 1516 [(set_attr "type" "neon_add_widen")] 1517) 1518 1519; vhadd and vrhadd. 1520 1521(define_insn "@neon_v<r>hadd<sup><mode>" 1522 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 1523 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") 1524 (match_operand:VDQIW 2 "s_register_operand" "w")] 1525 VHADD))] 1526 "TARGET_NEON" 1527 "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 1528 [(set_attr "type" "neon_add_halve_q")] 1529) 1530 1531(define_insn "neon_vqadd<sup><mode>" 1532 [(set (match_operand:VDQIX 0 "s_register_operand" "=w") 1533 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") 1534 (match_operand:VDQIX 2 "s_register_operand" "w")] 1535 VQADD))] 1536 "TARGET_NEON" 1537 "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 1538 [(set_attr "type" "neon_qadd<q>")] 1539) 1540 1541(define_insn "neon_v<r>addhn<mode>" 1542 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") 1543 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w") 1544 (match_operand:VN 2 "s_register_operand" "w")] 1545 VADDHN))] 1546 "TARGET_NEON" 1547 "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2" 1548 [(set_attr "type" "neon_add_halve_narrow_q")] 1549) 1550 1551;; Polynomial and Float multiplication. 1552(define_insn "neon_vmul<pf><mode>" 1553 [(set (match_operand:VPF 0 "s_register_operand" "=w") 1554 (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w") 1555 (match_operand:VPF 2 "s_register_operand" "w")] 1556 UNSPEC_VMUL))] 1557 "TARGET_NEON" 1558 "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 1559 [(set (attr "type") 1560 (if_then_else (match_test "<Is_float_mode>") 1561 (const_string "neon_fp_mul_s<q>") 1562 (const_string "neon_mul_<V_elem_ch><q>")))] 1563) 1564 1565(define_insn "neon_vmulf<mode>" 1566 [(set 1567 (match_operand:VH 0 "s_register_operand" "=w") 1568 (mult:VH 1569 (match_operand:VH 1 "s_register_operand" "w") 1570 (match_operand:VH 2 "s_register_operand" "w")))] 1571 "TARGET_NEON_FP16INST" 1572 "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 1573 [(set_attr "type" "neon_mul_<VH_elem_ch><q>")] 1574) 1575 1576(define_expand "neon_vmla<mode>" 1577 [(match_operand:VDQW 0 "s_register_operand") 1578 (match_operand:VDQW 1 "s_register_operand") 1579 (match_operand:VDQW 2 "s_register_operand") 1580 (match_operand:VDQW 3 "s_register_operand")] 1581 "TARGET_NEON" 1582{ 1583 if (ARM_HAVE_NEON_<MODE>_ARITH) 1584 emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1], 1585 operands[2], operands[3])); 1586 else 1587 emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1], 1588 operands[2], operands[3])); 1589 DONE; 1590}) 1591 1592(define_expand "neon_vfma<VCVTF:mode>" 1593 [(match_operand:VCVTF 0 "s_register_operand") 1594 (match_operand:VCVTF 1 "s_register_operand") 1595 (match_operand:VCVTF 2 "s_register_operand") 1596 (match_operand:VCVTF 3 "s_register_operand")] 1597 "TARGET_NEON && TARGET_FMA" 1598{ 1599 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3], 1600 operands[1])); 1601 DONE; 1602}) 1603 1604(define_expand "neon_vfma<VH:mode>" 1605 [(match_operand:VH 0 "s_register_operand") 1606 (match_operand:VH 1 "s_register_operand") 1607 (match_operand:VH 2 "s_register_operand") 1608 (match_operand:VH 3 "s_register_operand")] 1609 "TARGET_NEON_FP16INST" 1610{ 1611 emit_insn (gen_fma<mode>4 (operands[0], operands[2], operands[3], 1612 operands[1])); 1613 DONE; 1614}) 1615 1616(define_expand "neon_vfms<VCVTF:mode>" 1617 [(match_operand:VCVTF 0 "s_register_operand") 1618 (match_operand:VCVTF 1 "s_register_operand") 1619 (match_operand:VCVTF 2 "s_register_operand") 1620 (match_operand:VCVTF 3 "s_register_operand")] 1621 "TARGET_NEON && TARGET_FMA" 1622{ 1623 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3], 1624 operands[1])); 1625 DONE; 1626}) 1627 1628(define_expand "neon_vfms<VH:mode>" 1629 [(match_operand:VH 0 "s_register_operand") 1630 (match_operand:VH 1 "s_register_operand") 1631 (match_operand:VH 2 "s_register_operand") 1632 (match_operand:VH 3 "s_register_operand")] 1633 "TARGET_NEON_FP16INST" 1634{ 1635 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3], 1636 operands[1])); 1637 DONE; 1638}) 1639 1640;; The expand RTL structure here is not important. 1641;; We use the gen_* functions anyway. 1642;; We just need something to wrap the iterators around. 1643 1644(define_expand "neon_vfm<vfml_op>l_<vfml_half><mode>" 1645 [(set (match_operand:VCVTF 0 "s_register_operand") 1646 (unspec:VCVTF 1647 [(match_operand:VCVTF 1 "s_register_operand") 1648 (PLUSMINUS:<VFML> 1649 (match_operand:<VFML> 2 "s_register_operand") 1650 (match_operand:<VFML> 3 "s_register_operand"))] VFMLHALVES))] 1651 "TARGET_FP16FML" 1652{ 1653 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>); 1654 emit_insn (gen_vfm<vfml_op>l_<vfml_half><mode>_intrinsic (operands[0], 1655 operands[1], 1656 operands[2], 1657 operands[3], 1658 half, half)); 1659 DONE; 1660}) 1661 1662(define_insn "vfmal_low<mode>_intrinsic" 1663 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 1664 (fma:VCVTF 1665 (float_extend:VCVTF 1666 (vec_select:<VFMLSEL> 1667 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") 1668 (match_operand:<VFML> 4 "vect_par_constant_low" ""))) 1669 (float_extend:VCVTF 1670 (vec_select:<VFMLSEL> 1671 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>") 1672 (match_operand:<VFML> 5 "vect_par_constant_low" ""))) 1673 (match_operand:VCVTF 1 "s_register_operand" "0")))] 1674 "TARGET_FP16FML" 1675 "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3" 1676 [(set_attr "type" "neon_fp_mla_s<q>")] 1677) 1678 1679(define_insn "vfmsl_high<mode>_intrinsic" 1680 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 1681 (fma:VCVTF 1682 (float_extend:VCVTF 1683 (neg:<VFMLSEL> 1684 (vec_select:<VFMLSEL> 1685 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") 1686 (match_operand:<VFML> 4 "vect_par_constant_high" "")))) 1687 (float_extend:VCVTF 1688 (vec_select:<VFMLSEL> 1689 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>") 1690 (match_operand:<VFML> 5 "vect_par_constant_high" ""))) 1691 (match_operand:VCVTF 1 "s_register_operand" "0")))] 1692 "TARGET_FP16FML" 1693 "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3" 1694 [(set_attr "type" "neon_fp_mla_s<q>")] 1695) 1696 1697(define_insn "vfmal_high<mode>_intrinsic" 1698 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 1699 (fma:VCVTF 1700 (float_extend:VCVTF 1701 (vec_select:<VFMLSEL> 1702 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") 1703 (match_operand:<VFML> 4 "vect_par_constant_high" ""))) 1704 (float_extend:VCVTF 1705 (vec_select:<VFMLSEL> 1706 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>") 1707 (match_operand:<VFML> 5 "vect_par_constant_high" ""))) 1708 (match_operand:VCVTF 1 "s_register_operand" "0")))] 1709 "TARGET_FP16FML" 1710 "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3" 1711 [(set_attr "type" "neon_fp_mla_s<q>")] 1712) 1713 1714(define_insn "vfmsl_low<mode>_intrinsic" 1715 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 1716 (fma:VCVTF 1717 (float_extend:VCVTF 1718 (neg:<VFMLSEL> 1719 (vec_select:<VFMLSEL> 1720 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") 1721 (match_operand:<VFML> 4 "vect_par_constant_low" "")))) 1722 (float_extend:VCVTF 1723 (vec_select:<VFMLSEL> 1724 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>") 1725 (match_operand:<VFML> 5 "vect_par_constant_low" ""))) 1726 (match_operand:VCVTF 1 "s_register_operand" "0")))] 1727 "TARGET_FP16FML" 1728 "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3" 1729 [(set_attr "type" "neon_fp_mla_s<q>")] 1730) 1731 1732(define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><VCVTF:mode>" 1733 [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand") 1734 (unspec:VCVTF 1735 [(match_operand:VCVTF 1 "s_register_operand") 1736 (PLUSMINUS:<VFML> 1737 (match_operand:<VFML> 2 "s_register_operand") 1738 (match_operand:<VFML> 3 "s_register_operand")) 1739 (match_operand:SI 4 "const_int_operand")] VFMLHALVES))] 1740 "TARGET_FP16FML" 1741{ 1742 rtx lane = GEN_INT (NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[4]))); 1743 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>); 1744 emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><mode>_intrinsic 1745 (operands[0], operands[1], 1746 operands[2], operands[3], 1747 half, lane)); 1748 DONE; 1749}) 1750 1751(define_insn "vfmal_lane_low<mode>_intrinsic" 1752 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 1753 (fma:VCVTF 1754 (float_extend:VCVTF 1755 (vec_select:<VFMLSEL> 1756 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") 1757 (match_operand:<VFML> 4 "vect_par_constant_low" ""))) 1758 (float_extend:VCVTF 1759 (vec_duplicate:<VFMLSEL> 1760 (vec_select:HF 1761 (match_operand:<VFML> 3 "s_register_operand" "x") 1762 (parallel [(match_operand:SI 5 "const_int_operand" "n")])))) 1763 (match_operand:VCVTF 1 "s_register_operand" "0")))] 1764 "TARGET_FP16FML" 1765 { 1766 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5])); 1767 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1) 1768 { 1769 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode)); 1770 return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]"; 1771 } 1772 else 1773 { 1774 operands[5] = GEN_INT (lane); 1775 return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]"; 1776 } 1777 } 1778 [(set_attr "type" "neon_fp_mla_s<q>")] 1779) 1780 1781(define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>" 1782 [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand") 1783 (unspec:VCVTF 1784 [(match_operand:VCVTF 1 "s_register_operand") 1785 (PLUSMINUS:<VFML> 1786 (match_operand:<VFML> 2 "s_register_operand") 1787 (match_operand:<VFMLSEL2> 3 "s_register_operand")) 1788 (match_operand:SI 4 "const_int_operand")] VFMLHALVES))] 1789 "TARGET_FP16FML" 1790{ 1791 rtx lane 1792 = GEN_INT (NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[4]))); 1793 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>); 1794 emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>_intrinsic 1795 (operands[0], operands[1], operands[2], operands[3], 1796 half, lane)); 1797 DONE; 1798}) 1799 1800;; Used to implement the intrinsics: 1801;; float32x4_t vfmlalq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane) 1802;; float32x2_t vfmlal_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane) 1803;; Needs a bit of care to get the modes of the different sub-expressions right 1804;; due to 'a' and 'b' having different sizes and make sure we use the right 1805;; S or D subregister to select the appropriate lane from. 1806 1807(define_insn "vfmal_lane_low<vfmlsel2><mode>_intrinsic" 1808 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 1809 (fma:VCVTF 1810 (float_extend:VCVTF 1811 (vec_select:<VFMLSEL> 1812 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") 1813 (match_operand:<VFML> 4 "vect_par_constant_low" ""))) 1814 (float_extend:VCVTF 1815 (vec_duplicate:<VFMLSEL> 1816 (vec_select:HF 1817 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x") 1818 (parallel [(match_operand:SI 5 "const_int_operand" "n")])))) 1819 (match_operand:VCVTF 1 "s_register_operand" "0")))] 1820 "TARGET_FP16FML" 1821 { 1822 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5])); 1823 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode); 1824 int new_lane = lane % elts_per_reg; 1825 int regdiff = lane / elts_per_reg; 1826 operands[5] = GEN_INT (new_lane); 1827 /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes 1828 because we want the print_operand code to print the appropriate 1829 S or D register prefix. */ 1830 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff); 1831 operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2])); 1832 return "vfmal.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]"; 1833 } 1834 [(set_attr "type" "neon_fp_mla_s<q>")] 1835) 1836 1837;; Used to implement the intrinsics: 1838;; float32x4_t vfmlalq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane) 1839;; float32x2_t vfmlal_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane) 1840;; Needs a bit of care to get the modes of the different sub-expressions right 1841;; due to 'a' and 'b' having different sizes and make sure we use the right 1842;; S or D subregister to select the appropriate lane from. 1843 1844(define_insn "vfmal_lane_high<vfmlsel2><mode>_intrinsic" 1845 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 1846 (fma:VCVTF 1847 (float_extend:VCVTF 1848 (vec_select:<VFMLSEL> 1849 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") 1850 (match_operand:<VFML> 4 "vect_par_constant_high" ""))) 1851 (float_extend:VCVTF 1852 (vec_duplicate:<VFMLSEL> 1853 (vec_select:HF 1854 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x") 1855 (parallel [(match_operand:SI 5 "const_int_operand" "n")])))) 1856 (match_operand:VCVTF 1 "s_register_operand" "0")))] 1857 "TARGET_FP16FML" 1858 { 1859 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5])); 1860 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode); 1861 int new_lane = lane % elts_per_reg; 1862 int regdiff = lane / elts_per_reg; 1863 operands[5] = GEN_INT (new_lane); 1864 /* We re-create operands[3] in the halved VFMLSEL mode 1865 because we've calculated the correct half-width subreg to extract 1866 the lane from and we want to print *that* subreg instead. */ 1867 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff); 1868 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]"; 1869 } 1870 [(set_attr "type" "neon_fp_mla_s<q>")] 1871) 1872 1873(define_insn "vfmal_lane_high<mode>_intrinsic" 1874 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 1875 (fma:VCVTF 1876 (float_extend:VCVTF 1877 (vec_select:<VFMLSEL> 1878 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") 1879 (match_operand:<VFML> 4 "vect_par_constant_high" ""))) 1880 (float_extend:VCVTF 1881 (vec_duplicate:<VFMLSEL> 1882 (vec_select:HF 1883 (match_operand:<VFML> 3 "s_register_operand" "x") 1884 (parallel [(match_operand:SI 5 "const_int_operand" "n")])))) 1885 (match_operand:VCVTF 1 "s_register_operand" "0")))] 1886 "TARGET_FP16FML" 1887 { 1888 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5])); 1889 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1) 1890 { 1891 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode)); 1892 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]"; 1893 } 1894 else 1895 { 1896 operands[5] = GEN_INT (lane); 1897 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]"; 1898 } 1899 } 1900 [(set_attr "type" "neon_fp_mla_s<q>")] 1901) 1902 1903(define_insn "vfmsl_lane_low<mode>_intrinsic" 1904 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 1905 (fma:VCVTF 1906 (float_extend:VCVTF 1907 (neg:<VFMLSEL> 1908 (vec_select:<VFMLSEL> 1909 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") 1910 (match_operand:<VFML> 4 "vect_par_constant_low" "")))) 1911 (float_extend:VCVTF 1912 (vec_duplicate:<VFMLSEL> 1913 (vec_select:HF 1914 (match_operand:<VFML> 3 "s_register_operand" "x") 1915 (parallel [(match_operand:SI 5 "const_int_operand" "n")])))) 1916 (match_operand:VCVTF 1 "s_register_operand" "0")))] 1917 "TARGET_FP16FML" 1918 { 1919 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5])); 1920 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1) 1921 { 1922 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode)); 1923 return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]"; 1924 } 1925 else 1926 { 1927 operands[5] = GEN_INT (lane); 1928 return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]"; 1929 } 1930 } 1931 [(set_attr "type" "neon_fp_mla_s<q>")] 1932) 1933 1934;; Used to implement the intrinsics: 1935;; float32x4_t vfmlslq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane) 1936;; float32x2_t vfmlsl_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane) 1937;; Needs a bit of care to get the modes of the different sub-expressions right 1938;; due to 'a' and 'b' having different sizes and make sure we use the right 1939;; S or D subregister to select the appropriate lane from. 1940 1941(define_insn "vfmsl_lane_low<vfmlsel2><mode>_intrinsic" 1942 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 1943 (fma:VCVTF 1944 (float_extend:VCVTF 1945 (neg:<VFMLSEL> 1946 (vec_select:<VFMLSEL> 1947 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") 1948 (match_operand:<VFML> 4 "vect_par_constant_low" "")))) 1949 (float_extend:VCVTF 1950 (vec_duplicate:<VFMLSEL> 1951 (vec_select:HF 1952 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x") 1953 (parallel [(match_operand:SI 5 "const_int_operand" "n")])))) 1954 (match_operand:VCVTF 1 "s_register_operand" "0")))] 1955 "TARGET_FP16FML" 1956 { 1957 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5])); 1958 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode); 1959 int new_lane = lane % elts_per_reg; 1960 int regdiff = lane / elts_per_reg; 1961 operands[5] = GEN_INT (new_lane); 1962 /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes 1963 because we want the print_operand code to print the appropriate 1964 S or D register prefix. */ 1965 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff); 1966 operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2])); 1967 return "vfmsl.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]"; 1968 } 1969 [(set_attr "type" "neon_fp_mla_s<q>")] 1970) 1971 1972;; Used to implement the intrinsics: 1973;; float32x4_t vfmlslq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane) 1974;; float32x2_t vfmlsl_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane) 1975;; Needs a bit of care to get the modes of the different sub-expressions right 1976;; due to 'a' and 'b' having different sizes and make sure we use the right 1977;; S or D subregister to select the appropriate lane from. 1978 1979(define_insn "vfmsl_lane_high<vfmlsel2><mode>_intrinsic" 1980 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 1981 (fma:VCVTF 1982 (float_extend:VCVTF 1983 (neg:<VFMLSEL> 1984 (vec_select:<VFMLSEL> 1985 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") 1986 (match_operand:<VFML> 4 "vect_par_constant_high" "")))) 1987 (float_extend:VCVTF 1988 (vec_duplicate:<VFMLSEL> 1989 (vec_select:HF 1990 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x") 1991 (parallel [(match_operand:SI 5 "const_int_operand" "n")])))) 1992 (match_operand:VCVTF 1 "s_register_operand" "0")))] 1993 "TARGET_FP16FML" 1994 { 1995 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5])); 1996 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode); 1997 int new_lane = lane % elts_per_reg; 1998 int regdiff = lane / elts_per_reg; 1999 operands[5] = GEN_INT (new_lane); 2000 /* We re-create operands[3] in the halved VFMLSEL mode 2001 because we've calculated the correct half-width subreg to extract 2002 the lane from and we want to print *that* subreg instead. */ 2003 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff); 2004 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]"; 2005 } 2006 [(set_attr "type" "neon_fp_mla_s<q>")] 2007) 2008 2009(define_insn "vfmsl_lane_high<mode>_intrinsic" 2010 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 2011 (fma:VCVTF 2012 (float_extend:VCVTF 2013 (neg:<VFMLSEL> 2014 (vec_select:<VFMLSEL> 2015 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") 2016 (match_operand:<VFML> 4 "vect_par_constant_high" "")))) 2017 (float_extend:VCVTF 2018 (vec_duplicate:<VFMLSEL> 2019 (vec_select:HF 2020 (match_operand:<VFML> 3 "s_register_operand" "x") 2021 (parallel [(match_operand:SI 5 "const_int_operand" "n")])))) 2022 (match_operand:VCVTF 1 "s_register_operand" "0")))] 2023 "TARGET_FP16FML" 2024 { 2025 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5])); 2026 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1) 2027 { 2028 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode)); 2029 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]"; 2030 } 2031 else 2032 { 2033 operands[5] = GEN_INT (lane); 2034 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]"; 2035 } 2036 } 2037 [(set_attr "type" "neon_fp_mla_s<q>")] 2038) 2039 2040; Used for intrinsics when flag_unsafe_math_optimizations is false. 2041 2042(define_insn "neon_vmla<mode>_unspec" 2043 [(set (match_operand:VDQW 0 "s_register_operand" "=w") 2044 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") 2045 (match_operand:VDQW 2 "s_register_operand" "w") 2046 (match_operand:VDQW 3 "s_register_operand" "w")] 2047 UNSPEC_VMLA))] 2048 "TARGET_NEON" 2049 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" 2050 [(set (attr "type") 2051 (if_then_else (match_test "<Is_float_mode>") 2052 (const_string "neon_fp_mla_s<q>") 2053 (const_string "neon_mla_<V_elem_ch><q>")))] 2054) 2055 2056(define_insn "neon_vmlal<sup><mode>" 2057 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 2058 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") 2059 (match_operand:VW 2 "s_register_operand" "w") 2060 (match_operand:VW 3 "s_register_operand" "w")] 2061 VMLAL))] 2062 "TARGET_NEON" 2063 "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3" 2064 [(set_attr "type" "neon_mla_<V_elem_ch>_long")] 2065) 2066 2067(define_expand "neon_vmls<mode>" 2068 [(match_operand:VDQW 0 "s_register_operand") 2069 (match_operand:VDQW 1 "s_register_operand") 2070 (match_operand:VDQW 2 "s_register_operand") 2071 (match_operand:VDQW 3 "s_register_operand")] 2072 "TARGET_NEON" 2073{ 2074 if (ARM_HAVE_NEON_<MODE>_ARITH) 2075 emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0], 2076 operands[1], operands[2], operands[3])); 2077 else 2078 emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1], 2079 operands[2], operands[3])); 2080 DONE; 2081}) 2082 2083; Used for intrinsics when flag_unsafe_math_optimizations is false. 2084 2085(define_insn "neon_vmls<mode>_unspec" 2086 [(set (match_operand:VDQW 0 "s_register_operand" "=w") 2087 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") 2088 (match_operand:VDQW 2 "s_register_operand" "w") 2089 (match_operand:VDQW 3 "s_register_operand" "w")] 2090 UNSPEC_VMLS))] 2091 "TARGET_NEON" 2092 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" 2093 [(set (attr "type") 2094 (if_then_else (match_test "<Is_float_mode>") 2095 (const_string "neon_fp_mla_s<q>") 2096 (const_string "neon_mla_<V_elem_ch><q>")))] 2097) 2098 2099(define_insn "neon_vmlsl<sup><mode>" 2100 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 2101 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") 2102 (match_operand:VW 2 "s_register_operand" "w") 2103 (match_operand:VW 3 "s_register_operand" "w")] 2104 VMLSL))] 2105 "TARGET_NEON" 2106 "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3" 2107 [(set_attr "type" "neon_mla_<V_elem_ch>_long")] 2108) 2109 2110;; vqdmulh, vqrdmulh 2111(define_insn "neon_vq<r>dmulh<mode>" 2112 [(set (match_operand:VMDQI 0 "s_register_operand" "=w") 2113 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w") 2114 (match_operand:VMDQI 2 "s_register_operand" "w")] 2115 VQDMULH))] 2116 "TARGET_NEON" 2117 "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2118 [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")] 2119) 2120 2121;; vqrdmlah, vqrdmlsh 2122(define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>" 2123 [(set (match_operand:VMDQI 0 "s_register_operand" "=w") 2124 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "0") 2125 (match_operand:VMDQI 2 "s_register_operand" "w") 2126 (match_operand:VMDQI 3 "s_register_operand" "w")] 2127 VQRDMLH_AS))] 2128 "TARGET_NEON_RDMA" 2129 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" 2130 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")] 2131) 2132 2133(define_insn "neon_vqdmlal<mode>" 2134 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 2135 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") 2136 (match_operand:VMDI 2 "s_register_operand" "w") 2137 (match_operand:VMDI 3 "s_register_operand" "w")] 2138 UNSPEC_VQDMLAL))] 2139 "TARGET_NEON" 2140 "vqdmlal.<V_s_elem>\t%q0, %P2, %P3" 2141 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")] 2142) 2143 2144(define_insn "neon_vqdmlsl<mode>" 2145 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 2146 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") 2147 (match_operand:VMDI 2 "s_register_operand" "w") 2148 (match_operand:VMDI 3 "s_register_operand" "w")] 2149 UNSPEC_VQDMLSL))] 2150 "TARGET_NEON" 2151 "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3" 2152 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")] 2153) 2154 2155(define_insn "neon_vmull<sup><mode>" 2156 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 2157 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w") 2158 (match_operand:VW 2 "s_register_operand" "w")] 2159 VMULL))] 2160 "TARGET_NEON" 2161 "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2" 2162 [(set_attr "type" "neon_mul_<V_elem_ch>_long")] 2163) 2164 2165(define_insn "neon_vqdmull<mode>" 2166 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 2167 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w") 2168 (match_operand:VMDI 2 "s_register_operand" "w")] 2169 UNSPEC_VQDMULL))] 2170 "TARGET_NEON" 2171 "vqdmull.<V_s_elem>\t%q0, %P1, %P2" 2172 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")] 2173) 2174 2175(define_expand "neon_vsub<mode>" 2176 [(match_operand:VCVTF 0 "s_register_operand") 2177 (match_operand:VCVTF 1 "s_register_operand") 2178 (match_operand:VCVTF 2 "s_register_operand")] 2179 "TARGET_NEON" 2180{ 2181 if (ARM_HAVE_NEON_<MODE>_ARITH) 2182 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2])); 2183 else 2184 emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1], 2185 operands[2])); 2186 DONE; 2187}) 2188 2189; Used for intrinsics when flag_unsafe_math_optimizations is false. 2190 2191(define_insn "neon_vsub<mode>_unspec" 2192 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 2193 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") 2194 (match_operand:VCVTF 2 "s_register_operand" "w")] 2195 UNSPEC_VSUB))] 2196 "TARGET_NEON" 2197 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2198 [(set (attr "type") 2199 (if_then_else (match_test "<Is_float_mode>") 2200 (const_string "neon_fp_addsub_s<q>") 2201 (const_string "neon_sub<q>")))] 2202) 2203 2204(define_insn "neon_vsubl<sup><mode>" 2205 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 2206 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w") 2207 (match_operand:VDI 2 "s_register_operand" "w")] 2208 VSUBL))] 2209 "TARGET_NEON" 2210 "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2" 2211 [(set_attr "type" "neon_sub_long")] 2212) 2213 2214(define_insn "neon_vsubw<sup><mode>" 2215 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 2216 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w") 2217 (match_operand:VDI 2 "s_register_operand" "w")] 2218 VSUBW))] 2219 "TARGET_NEON" 2220 "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2" 2221 [(set_attr "type" "neon_sub_widen")] 2222) 2223 2224(define_insn "neon_vqsub<sup><mode>" 2225 [(set (match_operand:VDQIX 0 "s_register_operand" "=w") 2226 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") 2227 (match_operand:VDQIX 2 "s_register_operand" "w")] 2228 VQSUB))] 2229 "TARGET_NEON" 2230 "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2231 [(set_attr "type" "neon_qsub<q>")] 2232) 2233 2234(define_insn "neon_vhsub<sup><mode>" 2235 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 2236 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") 2237 (match_operand:VDQIW 2 "s_register_operand" "w")] 2238 VHSUB))] 2239 "TARGET_NEON" 2240 "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2241 [(set_attr "type" "neon_sub_halve<q>")] 2242) 2243 2244(define_insn "neon_v<r>subhn<mode>" 2245 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") 2246 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w") 2247 (match_operand:VN 2 "s_register_operand" "w")] 2248 VSUBHN))] 2249 "TARGET_NEON" 2250 "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2" 2251 [(set_attr "type" "neon_sub_halve_narrow_q")] 2252) 2253 2254;; These may expand to an UNSPEC pattern when a floating point mode is used 2255;; without unsafe math optimizations. 2256(define_expand "@neon_vc<cmp_op><mode>" 2257 [(match_operand:<V_cmp_result> 0 "s_register_operand") 2258 (neg:<V_cmp_result> 2259 (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand") 2260 (match_operand:VDQW 2 "reg_or_zero_operand")))] 2261 "TARGET_NEON" 2262 { 2263 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations 2264 are enabled. */ 2265 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT 2266 && !flag_unsafe_math_optimizations) 2267 { 2268 /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because 2269 we define gen_neon_vceq<mode>_insn_unspec only for float modes 2270 whereas this expander iterates over the integer modes as well, 2271 but we will never expand to UNSPECs for the integer comparisons. */ 2272 switch (<MODE>mode) 2273 { 2274 case E_V2SFmode: 2275 emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0], 2276 operands[1], 2277 operands[2])); 2278 break; 2279 case E_V4SFmode: 2280 emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0], 2281 operands[1], 2282 operands[2])); 2283 break; 2284 default: 2285 gcc_unreachable (); 2286 } 2287 } 2288 else 2289 emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0], 2290 operands[1], 2291 operands[2])); 2292 DONE; 2293 } 2294) 2295 2296(define_insn "@neon_vc<cmp_op><mode>_insn" 2297 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w") 2298 (neg:<V_cmp_result> 2299 (COMPARISONS:<V_cmp_result> 2300 (match_operand:VDQW 1 "s_register_operand" "w,w") 2301 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))] 2302 "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT 2303 && !flag_unsafe_math_optimizations)" 2304 { 2305 char pattern[100]; 2306 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0," 2307 " %%<V_reg>1, %s", 2308 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT 2309 ? "f" : "<cmp_type>", 2310 which_alternative == 0 2311 ? "%<V_reg>2" : "#0"); 2312 output_asm_insn (pattern, operands); 2313 return ""; 2314 } 2315 [(set (attr "type") 2316 (if_then_else (match_operand 2 "zero_operand") 2317 (const_string "neon_compare_zero<q>") 2318 (const_string "neon_compare<q>")))] 2319) 2320 2321(define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec" 2322 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w") 2323 (unspec:<V_cmp_result> 2324 [(match_operand:VCVTF 1 "s_register_operand" "w,w") 2325 (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")] 2326 NEON_VCMP))] 2327 "TARGET_NEON" 2328 { 2329 char pattern[100]; 2330 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0," 2331 " %%<V_reg>1, %s", 2332 which_alternative == 0 2333 ? "%<V_reg>2" : "#0"); 2334 output_asm_insn (pattern, operands); 2335 return ""; 2336} 2337 [(set_attr "type" "neon_fp_compare_s<q>")] 2338) 2339 2340(define_expand "@neon_vc<cmp_op><mode>" 2341 [(match_operand:<V_cmp_result> 0 "s_register_operand") 2342 (neg:<V_cmp_result> 2343 (COMPARISONS:VH 2344 (match_operand:VH 1 "s_register_operand") 2345 (match_operand:VH 2 "reg_or_zero_operand")))] 2346 "TARGET_NEON_FP16INST" 2347{ 2348 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations 2349 are enabled. */ 2350 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT 2351 && !flag_unsafe_math_optimizations) 2352 emit_insn 2353 (gen_neon_vc<cmp_op><mode>_fp16insn_unspec 2354 (operands[0], operands[1], operands[2])); 2355 else 2356 emit_insn 2357 (gen_neon_vc<cmp_op><mode>_fp16insn 2358 (operands[0], operands[1], operands[2])); 2359 DONE; 2360}) 2361 2362(define_insn "neon_vc<cmp_op><mode>_fp16insn" 2363 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w") 2364 (neg:<V_cmp_result> 2365 (COMPARISONS:<V_cmp_result> 2366 (match_operand:VH 1 "s_register_operand" "w,w") 2367 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz"))))] 2368 "TARGET_NEON_FP16INST 2369 && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT 2370 && !flag_unsafe_math_optimizations)" 2371{ 2372 char pattern[100]; 2373 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0," 2374 " %%<V_reg>1, %s", 2375 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT 2376 ? "f" : "<cmp_type>", 2377 which_alternative == 0 2378 ? "%<V_reg>2" : "#0"); 2379 output_asm_insn (pattern, operands); 2380 return ""; 2381} 2382 [(set (attr "type") 2383 (if_then_else (match_operand 2 "zero_operand") 2384 (const_string "neon_compare_zero<q>") 2385 (const_string "neon_compare<q>")))]) 2386 2387(define_insn "neon_vc<cmp_op_unsp><mode>_fp16insn_unspec" 2388 [(set 2389 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w") 2390 (unspec:<V_cmp_result> 2391 [(match_operand:VH 1 "s_register_operand" "w,w") 2392 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz")] 2393 NEON_VCMP))] 2394 "TARGET_NEON_FP16INST" 2395{ 2396 char pattern[100]; 2397 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0," 2398 " %%<V_reg>1, %s", 2399 which_alternative == 0 2400 ? "%<V_reg>2" : "#0"); 2401 output_asm_insn (pattern, operands); 2402 return ""; 2403} 2404 [(set_attr "type" "neon_fp_compare_s<q>")]) 2405 2406(define_insn "@neon_vc<code><mode>" 2407 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w") 2408 (neg:<V_cmp_result> 2409 (GTUGEU:<V_cmp_result> 2410 (match_operand:VDQIW 1 "s_register_operand" "w") 2411 (match_operand:VDQIW 2 "s_register_operand" "w"))))] 2412 "TARGET_NEON" 2413 "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2414 [(set_attr "type" "neon_compare<q>")] 2415) 2416 2417(define_expand "neon_vca<cmp_op><mode>" 2418 [(set (match_operand:<V_cmp_result> 0 "s_register_operand") 2419 (neg:<V_cmp_result> 2420 (GLTE:<V_cmp_result> 2421 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand")) 2422 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))] 2423 "TARGET_NEON" 2424 { 2425 if (flag_unsafe_math_optimizations) 2426 emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1], 2427 operands[2])); 2428 else 2429 emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0], 2430 operands[1], 2431 operands[2])); 2432 DONE; 2433 } 2434) 2435 2436(define_insn "neon_vca<cmp_op><mode>_insn" 2437 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w") 2438 (neg:<V_cmp_result> 2439 (GLTE:<V_cmp_result> 2440 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w")) 2441 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))] 2442 "TARGET_NEON && flag_unsafe_math_optimizations" 2443 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2444 [(set_attr "type" "neon_fp_compare_s<q>")] 2445) 2446 2447(define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec" 2448 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w") 2449 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w") 2450 (match_operand:VCVTF 2 "s_register_operand" "w")] 2451 NEON_VAGLTE))] 2452 "TARGET_NEON" 2453 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2454 [(set_attr "type" "neon_fp_compare_s<q>")] 2455) 2456 2457(define_expand "neon_vca<cmp_op><mode>" 2458 [(set 2459 (match_operand:<V_cmp_result> 0 "s_register_operand") 2460 (neg:<V_cmp_result> 2461 (GLTE:<V_cmp_result> 2462 (abs:VH (match_operand:VH 1 "s_register_operand")) 2463 (abs:VH (match_operand:VH 2 "s_register_operand")))))] 2464 "TARGET_NEON_FP16INST" 2465{ 2466 if (flag_unsafe_math_optimizations) 2467 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn 2468 (operands[0], operands[1], operands[2])); 2469 else 2470 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn_unspec 2471 (operands[0], operands[1], operands[2])); 2472 DONE; 2473}) 2474 2475(define_insn "neon_vca<cmp_op><mode>_fp16insn" 2476 [(set 2477 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w") 2478 (neg:<V_cmp_result> 2479 (GLTE:<V_cmp_result> 2480 (abs:VH (match_operand:VH 1 "s_register_operand" "w")) 2481 (abs:VH (match_operand:VH 2 "s_register_operand" "w")))))] 2482 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations" 2483 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2484 [(set_attr "type" "neon_fp_compare_s<q>")] 2485) 2486 2487(define_insn "neon_vca<cmp_op_unsp><mode>_fp16insn_unspec" 2488 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w") 2489 (unspec:<V_cmp_result> 2490 [(match_operand:VH 1 "s_register_operand" "w") 2491 (match_operand:VH 2 "s_register_operand" "w")] 2492 NEON_VAGLTE))] 2493 "TARGET_NEON" 2494 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2495 [(set_attr "type" "neon_fp_compare_s<q>")] 2496) 2497 2498(define_expand "neon_vc<cmp_op>z<mode>" 2499 [(set 2500 (match_operand:<V_cmp_result> 0 "s_register_operand") 2501 (COMPARISONS:<V_cmp_result> 2502 (match_operand:VH 1 "s_register_operand") 2503 (const_int 0)))] 2504 "TARGET_NEON_FP16INST" 2505 { 2506 emit_insn (gen_neon_vc<cmp_op><mode> (operands[0], operands[1], 2507 CONST0_RTX (<MODE>mode))); 2508 DONE; 2509}) 2510 2511(define_insn "neon_vtst_combine<mode>" 2512 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 2513 (plus:VDQIW 2514 (eq:VDQIW 2515 (and:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w") 2516 (match_operand:VDQIW 2 "s_register_operand" "w")) 2517 (match_operand:VDQIW 3 "zero_operand" "i")) 2518 (match_operand:VDQIW 4 "minus_one_operand" "i")))] 2519 "TARGET_NEON" 2520 "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2521 [(set_attr "type" "neon_tst<q>")] 2522) 2523 2524(define_insn "neon_vabd<sup><mode>" 2525 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 2526 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") 2527 (match_operand:VDQIW 2 "s_register_operand" "w")] 2528 VABD))] 2529 "TARGET_NEON" 2530 "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2531 [(set_attr "type" "neon_abd<q>")] 2532) 2533 2534(define_insn "neon_vabd<mode>" 2535 [(set (match_operand:VH 0 "s_register_operand" "=w") 2536 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w") 2537 (match_operand:VH 2 "s_register_operand" "w")] 2538 UNSPEC_VABD_F))] 2539 "TARGET_NEON_FP16INST" 2540 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2541 [(set_attr "type" "neon_abd<q>")] 2542) 2543 2544(define_insn "neon_vabdf<mode>" 2545 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 2546 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") 2547 (match_operand:VCVTF 2 "s_register_operand" "w")] 2548 UNSPEC_VABD_F))] 2549 "TARGET_NEON" 2550 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2551 [(set_attr "type" "neon_fp_abd_s<q>")] 2552) 2553 2554(define_insn "neon_vabdl<sup><mode>" 2555 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 2556 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w") 2557 (match_operand:VW 2 "s_register_operand" "w")] 2558 VABDL))] 2559 "TARGET_NEON" 2560 "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2" 2561 [(set_attr "type" "neon_abd_long")] 2562) 2563 2564(define_insn "neon_vaba<sup><mode>" 2565 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 2566 (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w") 2567 (match_operand:VDQIW 3 "s_register_operand" "w")] 2568 VABD) 2569 (match_operand:VDQIW 1 "s_register_operand" "0")))] 2570 "TARGET_NEON" 2571 "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" 2572 [(set_attr "type" "neon_arith_acc<q>")] 2573) 2574 2575(define_insn "neon_vabal<sup><mode>" 2576 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 2577 (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w") 2578 (match_operand:VW 3 "s_register_operand" "w")] 2579 VABDL) 2580 (match_operand:<V_widen> 1 "s_register_operand" "0")))] 2581 "TARGET_NEON" 2582 "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3" 2583 [(set_attr "type" "neon_arith_acc<q>")] 2584) 2585 2586(define_expand "<sup>sadv16qi" 2587 [(use (match_operand:V4SI 0 "register_operand")) 2588 (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand")) 2589 (use (match_operand:V16QI 2 "register_operand"))] VABAL) 2590 (use (match_operand:V4SI 3 "register_operand"))] 2591 "TARGET_NEON" 2592 { 2593 rtx reduc = gen_reg_rtx (V8HImode); 2594 rtx op1_highpart = gen_reg_rtx (V8QImode); 2595 rtx op2_highpart = gen_reg_rtx (V8QImode); 2596 2597 emit_insn (gen_neon_vabdl<sup>v8qi (reduc, 2598 gen_lowpart (V8QImode, operands[1]), 2599 gen_lowpart (V8QImode, operands[2]))); 2600 2601 emit_insn (gen_neon_vget_highv16qi (op1_highpart, operands[1])); 2602 emit_insn (gen_neon_vget_highv16qi (op2_highpart, operands[2])); 2603 emit_insn (gen_neon_vabal<sup>v8qi (reduc, reduc, 2604 op1_highpart, op2_highpart)); 2605 emit_insn (gen_neon_vpadal<sup>v8hi (operands[3], operands[3], reduc)); 2606 2607 emit_move_insn (operands[0], operands[3]); 2608 DONE; 2609 } 2610) 2611 2612(define_insn "neon_v<maxmin><sup><mode>" 2613 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 2614 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") 2615 (match_operand:VDQIW 2 "s_register_operand" "w")] 2616 VMAXMIN))] 2617 "TARGET_NEON" 2618 "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2619 [(set_attr "type" "neon_minmax<q>")] 2620) 2621 2622(define_insn "neon_v<maxmin>f<mode>" 2623 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 2624 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") 2625 (match_operand:VCVTF 2 "s_register_operand" "w")] 2626 VMAXMINF))] 2627 "TARGET_NEON" 2628 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2629 [(set_attr "type" "neon_fp_minmax_s<q>")] 2630) 2631 2632(define_insn "neon_v<maxmin>f<mode>" 2633 [(set (match_operand:VH 0 "s_register_operand" "=w") 2634 (unspec:VH 2635 [(match_operand:VH 1 "s_register_operand" "w") 2636 (match_operand:VH 2 "s_register_operand" "w")] 2637 VMAXMINF))] 2638 "TARGET_NEON_FP16INST" 2639 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2640 [(set_attr "type" "neon_fp_minmax_s<q>")] 2641) 2642 2643(define_insn "neon_vp<maxmin>fv4hf" 2644 [(set (match_operand:V4HF 0 "s_register_operand" "=w") 2645 (unspec:V4HF 2646 [(match_operand:V4HF 1 "s_register_operand" "w") 2647 (match_operand:V4HF 2 "s_register_operand" "w")] 2648 VPMAXMINF))] 2649 "TARGET_NEON_FP16INST" 2650 "vp<maxmin>.f16\t%P0, %P1, %P2" 2651 [(set_attr "type" "neon_reduc_minmax")] 2652) 2653 2654(define_insn "neon_<fmaxmin_op><mode>" 2655 [(set 2656 (match_operand:VH 0 "s_register_operand" "=w") 2657 (unspec:VH 2658 [(match_operand:VH 1 "s_register_operand" "w") 2659 (match_operand:VH 2 "s_register_operand" "w")] 2660 VMAXMINFNM))] 2661 "TARGET_NEON_FP16INST" 2662 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2663 [(set_attr "type" "neon_fp_minmax_s<q>")] 2664) 2665 2666;; v<maxmin>nm intrinsics. 2667(define_insn "neon_<fmaxmin_op><mode>" 2668 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 2669 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") 2670 (match_operand:VCVTF 2 "s_register_operand" "w")] 2671 VMAXMINFNM))] 2672 "TARGET_NEON && TARGET_VFP5" 2673 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2674 [(set_attr "type" "neon_fp_minmax_s<q>")] 2675) 2676 2677;; Vector forms for the IEEE-754 fmax()/fmin() functions 2678(define_insn "<fmaxmin><mode>3" 2679 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 2680 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") 2681 (match_operand:VCVTF 2 "s_register_operand" "w")] 2682 VMAXMINFNM))] 2683 "TARGET_NEON && TARGET_VFP5" 2684 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2685 [(set_attr "type" "neon_fp_minmax_s<q>")] 2686) 2687 2688(define_expand "neon_vpadd<mode>" 2689 [(match_operand:VD 0 "s_register_operand") 2690 (match_operand:VD 1 "s_register_operand") 2691 (match_operand:VD 2 "s_register_operand")] 2692 "TARGET_NEON" 2693{ 2694 emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1], 2695 operands[2])); 2696 DONE; 2697}) 2698 2699(define_insn "neon_vpaddl<sup><mode>" 2700 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w") 2701 (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")] 2702 VPADDL))] 2703 "TARGET_NEON" 2704 "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1" 2705 [(set_attr "type" "neon_reduc_add_long")] 2706) 2707 2708(define_insn "neon_vpadal<sup><mode>" 2709 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w") 2710 (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0") 2711 (match_operand:VDQIW 2 "s_register_operand" "w")] 2712 VPADAL))] 2713 "TARGET_NEON" 2714 "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2" 2715 [(set_attr "type" "neon_reduc_add_acc")] 2716) 2717 2718(define_insn "neon_vp<maxmin><sup><mode>" 2719 [(set (match_operand:VDI 0 "s_register_operand" "=w") 2720 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w") 2721 (match_operand:VDI 2 "s_register_operand" "w")] 2722 VPMAXMIN))] 2723 "TARGET_NEON" 2724 "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2725 [(set_attr "type" "neon_reduc_minmax<q>")] 2726) 2727 2728(define_insn "neon_vp<maxmin>f<mode>" 2729 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 2730 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") 2731 (match_operand:VCVTF 2 "s_register_operand" "w")] 2732 VPMAXMINF))] 2733 "TARGET_NEON" 2734 "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2735 [(set_attr "type" "neon_fp_reduc_minmax_s<q>")] 2736) 2737 2738(define_insn "neon_vrecps<mode>" 2739 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 2740 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") 2741 (match_operand:VCVTF 2 "s_register_operand" "w")] 2742 UNSPEC_VRECPS))] 2743 "TARGET_NEON" 2744 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2745 [(set_attr "type" "neon_fp_recps_s<q>")] 2746) 2747 2748(define_insn "neon_vrecps<mode>" 2749 [(set 2750 (match_operand:VH 0 "s_register_operand" "=w") 2751 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w") 2752 (match_operand:VH 2 "s_register_operand" "w")] 2753 UNSPEC_VRECPS))] 2754 "TARGET_NEON_FP16INST" 2755 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2756 [(set_attr "type" "neon_fp_recps_s<q>")] 2757) 2758 2759(define_insn "neon_vrsqrts<mode>" 2760 [(set (match_operand:VCVTF 0 "s_register_operand" "=w") 2761 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") 2762 (match_operand:VCVTF 2 "s_register_operand" "w")] 2763 UNSPEC_VRSQRTS))] 2764 "TARGET_NEON" 2765 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2766 [(set_attr "type" "neon_fp_rsqrts_s<q>")] 2767) 2768 2769(define_insn "neon_vrsqrts<mode>" 2770 [(set 2771 (match_operand:VH 0 "s_register_operand" "=w") 2772 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w") 2773 (match_operand:VH 2 "s_register_operand" "w")] 2774 UNSPEC_VRSQRTS))] 2775 "TARGET_NEON_FP16INST" 2776 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2777 [(set_attr "type" "neon_fp_rsqrts_s<q>")] 2778) 2779 2780(define_expand "neon_vabs<mode>" 2781 [(match_operand:VDQW 0 "s_register_operand") 2782 (match_operand:VDQW 1 "s_register_operand")] 2783 "TARGET_NEON" 2784{ 2785 emit_insn (gen_abs<mode>2 (operands[0], operands[1])); 2786 DONE; 2787}) 2788 2789(define_insn "neon_vqabs<mode>" 2790 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 2791 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")] 2792 UNSPEC_VQABS))] 2793 "TARGET_NEON" 2794 "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1" 2795 [(set_attr "type" "neon_qabs<q>")] 2796) 2797 2798(define_insn "neon_bswap<mode>" 2799 [(set (match_operand:VDQHSD 0 "register_operand" "=w") 2800 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))] 2801 "TARGET_NEON" 2802 "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1" 2803 [(set_attr "type" "neon_rev<q>")] 2804) 2805 2806(define_expand "neon_vneg<mode>" 2807 [(match_operand:VDQW 0 "s_register_operand") 2808 (match_operand:VDQW 1 "s_register_operand")] 2809 "TARGET_NEON" 2810{ 2811 emit_insn (gen_neon_neg<mode>2 (operands[0], operands[1])); 2812 DONE; 2813}) 2814 2815 2816;; The vcadd and vcmla patterns are made UNSPEC for the explicitly due to the 2817;; fact that their usage need to guarantee that the source vectors are 2818;; contiguous. It would be wrong to describe the operation without being able 2819;; to describe the permute that is also required, but even if that is done 2820;; the permute would have been created as a LOAD_LANES which means the values 2821;; in the registers are in the wrong order. 2822(define_insn "neon_vcadd<rot><mode>" 2823 [(set (match_operand:VF 0 "register_operand" "=w") 2824 (unspec:VF [(match_operand:VF 1 "register_operand" "w") 2825 (match_operand:VF 2 "register_operand" "w")] 2826 VCADD))] 2827 "TARGET_COMPLEX" 2828 "vcadd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, #<rot>" 2829 [(set_attr "type" "neon_fcadd")] 2830) 2831 2832(define_insn "neon_vcmla<rot><mode>" 2833 [(set (match_operand:VF 0 "register_operand" "=w") 2834 (plus:VF (match_operand:VF 1 "register_operand" "0") 2835 (unspec:VF [(match_operand:VF 2 "register_operand" "w") 2836 (match_operand:VF 3 "register_operand" "w")] 2837 VCMLA)))] 2838 "TARGET_COMPLEX" 2839 "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3, #<rot>" 2840 [(set_attr "type" "neon_fcmla")] 2841) 2842 2843(define_insn "neon_vcmla_lane<rot><mode>" 2844 [(set (match_operand:VF 0 "s_register_operand" "=w") 2845 (plus:VF (match_operand:VF 1 "s_register_operand" "0") 2846 (unspec:VF [(match_operand:VF 2 "s_register_operand" "w") 2847 (match_operand:VF 3 "s_register_operand" "<VF_constraint>") 2848 (match_operand:SI 4 "const_int_operand" "n")] 2849 VCMLA)))] 2850 "TARGET_COMPLEX" 2851 { 2852 operands = neon_vcmla_lane_prepare_operands (operands); 2853 return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>"; 2854 } 2855 [(set_attr "type" "neon_fcmla")] 2856) 2857 2858(define_insn "neon_vcmla_laneq<rot><mode>" 2859 [(set (match_operand:VDF 0 "s_register_operand" "=w") 2860 (plus:VDF (match_operand:VDF 1 "s_register_operand" "0") 2861 (unspec:VDF [(match_operand:VDF 2 "s_register_operand" "w") 2862 (match_operand:<V_DOUBLE> 3 "s_register_operand" "<VF_constraint>") 2863 (match_operand:SI 4 "const_int_operand" "n")] 2864 VCMLA)))] 2865 "TARGET_COMPLEX" 2866 { 2867 operands = neon_vcmla_lane_prepare_operands (operands); 2868 return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>"; 2869 } 2870 [(set_attr "type" "neon_fcmla")] 2871) 2872 2873(define_insn "neon_vcmlaq_lane<rot><mode>" 2874 [(set (match_operand:VQ_HSF 0 "s_register_operand" "=w") 2875 (plus:VQ_HSF (match_operand:VQ_HSF 1 "s_register_operand" "0") 2876 (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "s_register_operand" "w") 2877 (match_operand:<V_HALF> 3 "s_register_operand" "<VF_constraint>") 2878 (match_operand:SI 4 "const_int_operand" "n")] 2879 VCMLA)))] 2880 "TARGET_COMPLEX" 2881 { 2882 operands = neon_vcmla_lane_prepare_operands (operands); 2883 return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>"; 2884 } 2885 [(set_attr "type" "neon_fcmla")] 2886) 2887 2888;; The complex mul operations always need to expand to two instructions. 2889;; The first operation does half the computation and the second does the 2890;; remainder. Because of this, expand early. 2891(define_expand "cmul<conj_op><mode>3" 2892 [(set (match_operand:VDF 0 "register_operand") 2893 (unspec:VDF [(match_operand:VDF 1 "register_operand") 2894 (match_operand:VDF 2 "register_operand")] 2895 VCMUL_OP))] 2896 "TARGET_COMPLEX && !BYTES_BIG_ENDIAN" 2897{ 2898 rtx res1 = gen_reg_rtx (<MODE>mode); 2899 rtx tmp = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode)); 2900 emit_insn (gen_neon_vcmla<rotsplit1><mode> (res1, tmp, 2901 operands[2], operands[1])); 2902 emit_insn (gen_neon_vcmla<rotsplit2><mode> (operands[0], res1, 2903 operands[2], operands[1])); 2904 DONE; 2905}) 2906 2907 2908;; These map to the auto-vectorizer Dot Product optab. 2909;; The auto-vectorizer expects a dot product builtin that also does an 2910;; accumulation into the provided register. 2911;; Given the following pattern 2912;; 2913;; for (i=0; i<len; i++) { 2914;; c = a[i] * b[i]; 2915;; r += c; 2916;; } 2917;; return result; 2918;; 2919;; This can be auto-vectorized to 2920;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3]; 2921;; 2922;; given enough iterations. However the vectorizer can keep unrolling the loop 2923;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7]; 2924;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11]; 2925;; ... 2926;; 2927;; and so the vectorizer provides r, in which the result has to be accumulated. 2928(define_insn "<sup>dot_prod<vsi2qi>" 2929 [(set (match_operand:VCVTI 0 "register_operand" "=w") 2930 (plus:VCVTI 2931 (unspec:VCVTI [(match_operand:<VSI2QI> 1 "register_operand" "w") 2932 (match_operand:<VSI2QI> 2 "register_operand" "w")] 2933 DOTPROD) 2934 (match_operand:VCVTI 3 "register_operand" "0")))] 2935 "TARGET_DOTPROD" 2936 "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 2937 [(set_attr "type" "neon_dot<q>")] 2938) 2939 2940;; These instructions map to the __builtins for the Dot Product operations 2941(define_expand "neon_<sup>dot<vsi2qi>" 2942 [(set (match_operand:VCVTI 0 "register_operand" "=w") 2943 (plus:VCVTI 2944 (unspec:VCVTI [(match_operand:<VSI2QI> 2 "register_operand") 2945 (match_operand:<VSI2QI> 3 "register_operand")] 2946 DOTPROD) 2947 (match_operand:VCVTI 1 "register_operand")))] 2948 "TARGET_DOTPROD" 2949) 2950 2951;; These instructions map to the __builtins for the Dot Product operations. 2952(define_insn "neon_usdot<vsi2qi>" 2953 [(set (match_operand:VCVTI 0 "register_operand" "=w") 2954 (plus:VCVTI 2955 (unspec:VCVTI 2956 [(match_operand:<VSI2QI> 2 "register_operand" "w") 2957 (match_operand:<VSI2QI> 3 "register_operand" "w")] 2958 UNSPEC_DOT_US) 2959 (match_operand:VCVTI 1 "register_operand" "0")))] 2960 "TARGET_I8MM" 2961 "vusdot.s8\\t%<V_reg>0, %<V_reg>2, %<V_reg>3" 2962 [(set_attr "type" "neon_dot<q>")] 2963) 2964 2965;; These instructions map to the __builtins for the Dot Product 2966;; indexed operations. 2967(define_insn "neon_<sup>dot_lane<vsi2qi>" 2968 [(set (match_operand:VCVTI 0 "register_operand" "=w") 2969 (plus:VCVTI 2970 (unspec:VCVTI [(match_operand:<VSI2QI> 2 "register_operand" "w") 2971 (match_operand:V8QI 3 "register_operand" "t") 2972 (match_operand:SI 4 "immediate_operand" "i")] 2973 DOTPROD) 2974 (match_operand:VCVTI 1 "register_operand" "0")))] 2975 "TARGET_DOTPROD" 2976 "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]"; 2977 [(set_attr "type" "neon_dot<q>")] 2978) 2979 2980;; These instructions map to the __builtins for the Dot Product 2981;; indexed operations. 2982(define_insn "neon_<sup>dot_laneq<vsi2qi>" 2983 [(set (match_operand:VCVTI 0 "register_operand" "=w") 2984 (plus:VCVTI 2985 (unspec:VCVTI [(match_operand:<VSI2QI> 2 "register_operand" "w") 2986 (match_operand:V16QI 3 "register_operand" "t") 2987 (match_operand:SI 4 "immediate_operand" "i")] 2988 DOTPROD) 2989 (match_operand:VCVTI 1 "register_operand" "0")))] 2990 "TARGET_DOTPROD" 2991 { 2992 int lane = INTVAL (operands[4]); 2993 if (lane > GET_MODE_NUNITS (V2SImode) - 1) 2994 { 2995 operands[4] = GEN_INT (lane - GET_MODE_NUNITS (V2SImode)); 2996 return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %f3[%c4]"; 2997 } 2998 else 2999 { 3000 operands[4] = GEN_INT (lane); 3001 return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %e3[%c4]"; 3002 } 3003 } 3004 [(set_attr "type" "neon_dot<q>")] 3005) 3006 3007;; These instructions map to the __builtins for the Dot Product 3008;; indexed operations in the v8.6 I8MM extension. 3009(define_insn "neon_<sup>dot_lane<vsi2qi>" 3010 [(set (match_operand:VCVTI 0 "register_operand" "=w") 3011 (plus:VCVTI 3012 (unspec:VCVTI 3013 [(match_operand:<VSI2QI> 2 "register_operand" "w") 3014 (match_operand:V8QI 3 "register_operand" "t") 3015 (match_operand:SI 4 "immediate_operand" "i")] 3016 DOTPROD_I8MM) 3017 (match_operand:VCVTI 1 "register_operand" "0")))] 3018 "TARGET_I8MM" 3019 "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]" 3020 [(set_attr "type" "neon_dot<q>")] 3021) 3022 3023;; These instructions map to the __builtins for the Dot Product 3024;; indexed operations in the v8.6 I8MM extension. 3025(define_insn "neon_<sup>dot_laneq<vsi2qi>" 3026 [(set (match_operand:VCVTI 0 "register_operand" "=w") 3027 (plus:VCVTI 3028 (unspec:VCVTI [(match_operand:<VSI2QI> 2 "register_operand" "w") 3029 (match_operand:V16QI 3 "register_operand" "t") 3030 (match_operand:SI 4 "immediate_operand" "i")] 3031 DOTPROD_I8MM) 3032 (match_operand:VCVTI 1 "register_operand" "0")))] 3033 "TARGET_I8MM" 3034 { 3035 int lane = INTVAL (operands[4]); 3036 if (lane > GET_MODE_NUNITS (V2SImode) - 1) 3037 { 3038 operands[4] = GEN_INT (lane - GET_MODE_NUNITS (V2SImode)); 3039 return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %f3[%c4]"; 3040 } 3041 else 3042 { 3043 operands[4] = GEN_INT (lane); 3044 return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %e3[%c4]"; 3045 } 3046 } 3047 [(set_attr "type" "neon_dot<q>")] 3048) 3049 3050;; Auto-vectorizer pattern for usdot 3051(define_expand "usdot_prod<vsi2qi>" 3052 [(set (match_operand:VCVTI 0 "register_operand") 3053 (plus:VCVTI (unspec:VCVTI [(match_operand:<VSI2QI> 1 3054 "register_operand") 3055 (match_operand:<VSI2QI> 2 3056 "register_operand")] 3057 UNSPEC_DOT_US) 3058 (match_operand:VCVTI 3 "register_operand")))] 3059 "TARGET_I8MM" 3060) 3061 3062(define_expand "neon_copysignf<mode>" 3063 [(match_operand:VCVTF 0 "register_operand") 3064 (match_operand:VCVTF 1 "register_operand") 3065 (match_operand:VCVTF 2 "register_operand")] 3066 "TARGET_NEON" 3067 "{ 3068 rtx v_bitmask_cast; 3069 rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode); 3070 rtx c = gen_int_mode (0x80000000, SImode); 3071 3072 emit_move_insn (v_bitmask, 3073 gen_const_vec_duplicate (<VCVTF:V_cmp_result>mode, c)); 3074 emit_move_insn (operands[0], operands[2]); 3075 v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask, 3076 <VCVTF:V_cmp_result>mode, 0); 3077 emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0], 3078 operands[1])); 3079 3080 DONE; 3081 }" 3082) 3083 3084(define_insn "neon_vqneg<mode>" 3085 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 3086 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")] 3087 UNSPEC_VQNEG))] 3088 "TARGET_NEON" 3089 "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1" 3090 [(set_attr "type" "neon_qneg<q>")] 3091) 3092 3093(define_insn "neon_vcls<mode>" 3094 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 3095 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")] 3096 UNSPEC_VCLS))] 3097 "TARGET_NEON" 3098 "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1" 3099 [(set_attr "type" "neon_cls<q>")] 3100) 3101 3102(define_insn "neon_vclz<mode>" 3103 [(set (match_operand:VDQIW 0 "s_register_operand" "=w") 3104 (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))] 3105 "TARGET_NEON" 3106 "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1" 3107 [(set_attr "type" "neon_cnt<q>")] 3108) 3109 3110(define_insn "popcount<mode>2" 3111 [(set (match_operand:VE 0 "s_register_operand" "=w") 3112 (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))] 3113 "TARGET_NEON" 3114 "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1" 3115 [(set_attr "type" "neon_cnt<q>")] 3116) 3117 3118(define_expand "neon_vcnt<mode>" 3119 [(match_operand:VE 0 "s_register_operand") 3120 (match_operand:VE 1 "s_register_operand")] 3121 "TARGET_NEON" 3122{ 3123 emit_insn (gen_popcount<mode>2 (operands[0], operands[1])); 3124 DONE; 3125}) 3126 3127(define_insn "neon_vrecpe<mode>" 3128 [(set (match_operand:VH 0 "s_register_operand" "=w") 3129 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")] 3130 UNSPEC_VRECPE))] 3131 "TARGET_NEON_FP16INST" 3132 "vrecpe.f16\t%<V_reg>0, %<V_reg>1" 3133 [(set_attr "type" "neon_fp_recpe_s<q>")] 3134) 3135 3136(define_insn "neon_vrecpe<mode>" 3137 [(set (match_operand:V32 0 "s_register_operand" "=w") 3138 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")] 3139 UNSPEC_VRECPE))] 3140 "TARGET_NEON" 3141 "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1" 3142 [(set_attr "type" "neon_fp_recpe_s<q>")] 3143) 3144 3145(define_insn "neon_vrsqrte<mode>" 3146 [(set (match_operand:V32 0 "s_register_operand" "=w") 3147 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")] 3148 UNSPEC_VRSQRTE))] 3149 "TARGET_NEON" 3150 "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1" 3151 [(set_attr "type" "neon_fp_rsqrte_s<q>")] 3152) 3153 3154(define_expand "neon_vmvn<mode>" 3155 [(match_operand:VDQIW 0 "s_register_operand") 3156 (match_operand:VDQIW 1 "s_register_operand")] 3157 "TARGET_NEON" 3158{ 3159 emit_insn (gen_one_cmpl<mode>2_neon (operands[0], operands[1])); 3160 DONE; 3161}) 3162 3163(define_insn "neon_vget_lane<mode>_sext_internal" 3164 [(set (match_operand:SI 0 "s_register_operand" "=r") 3165 (sign_extend:SI 3166 (vec_select:<V_elem> 3167 (match_operand:VD 1 "s_register_operand" "w") 3168 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] 3169 "TARGET_NEON" 3170{ 3171 if (BYTES_BIG_ENDIAN) 3172 { 3173 int elt = INTVAL (operands[2]); 3174 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt; 3175 operands[2] = GEN_INT (elt); 3176 } 3177 return "vmov.s<V_sz_elem>\t%0, %P1[%c2]"; 3178} 3179 [(set_attr "type" "neon_to_gp")] 3180) 3181 3182(define_insn "neon_vget_lane<mode>_zext_internal" 3183 [(set (match_operand:SI 0 "s_register_operand" "=r") 3184 (zero_extend:SI 3185 (vec_select:<V_elem> 3186 (match_operand:VD 1 "s_register_operand" "w") 3187 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] 3188 "TARGET_NEON" 3189{ 3190 if (BYTES_BIG_ENDIAN) 3191 { 3192 int elt = INTVAL (operands[2]); 3193 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt; 3194 operands[2] = GEN_INT (elt); 3195 } 3196 return "vmov.u<V_sz_elem>\t%0, %P1[%c2]"; 3197} 3198 [(set_attr "type" "neon_to_gp")] 3199) 3200 3201(define_insn "neon_vget_lane<mode>_sext_internal" 3202 [(set (match_operand:SI 0 "s_register_operand" "=r") 3203 (sign_extend:SI 3204 (vec_select:<V_elem> 3205 (match_operand:VQ2 1 "s_register_operand" "w") 3206 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] 3207 "TARGET_NEON" 3208{ 3209 rtx ops[3]; 3210 int regno = REGNO (operands[1]); 3211 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2; 3212 unsigned int elt = INTVAL (operands[2]); 3213 unsigned int elt_adj = elt % halfelts; 3214 3215 if (BYTES_BIG_ENDIAN) 3216 elt_adj = halfelts - 1 - elt_adj; 3217 3218 ops[0] = operands[0]; 3219 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts)); 3220 ops[2] = GEN_INT (elt_adj); 3221 output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops); 3222 3223 return ""; 3224} 3225 [(set_attr "type" "neon_to_gp_q")] 3226) 3227 3228(define_insn "neon_vget_lane<mode>_zext_internal" 3229 [(set (match_operand:SI 0 "s_register_operand" "=r") 3230 (zero_extend:SI 3231 (vec_select:<V_elem> 3232 (match_operand:VQ2 1 "s_register_operand" "w") 3233 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] 3234 "TARGET_NEON" 3235{ 3236 rtx ops[3]; 3237 int regno = REGNO (operands[1]); 3238 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2; 3239 unsigned int elt = INTVAL (operands[2]); 3240 unsigned int elt_adj = elt % halfelts; 3241 3242 if (BYTES_BIG_ENDIAN) 3243 elt_adj = halfelts - 1 - elt_adj; 3244 3245 ops[0] = operands[0]; 3246 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts)); 3247 ops[2] = GEN_INT (elt_adj); 3248 output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops); 3249 3250 return ""; 3251} 3252 [(set_attr "type" "neon_to_gp_q")] 3253) 3254 3255(define_expand "neon_vget_lane<mode>" 3256 [(match_operand:<V_ext> 0 "s_register_operand") 3257 (match_operand:VDQW 1 "s_register_operand") 3258 (match_operand:SI 2 "immediate_operand")] 3259 "TARGET_NEON" 3260{ 3261 if (BYTES_BIG_ENDIAN) 3262 { 3263 /* The intrinsics are defined in terms of a model where the 3264 element ordering in memory is vldm order, whereas the generic 3265 RTL is defined in terms of a model where the element ordering 3266 in memory is array order. Convert the lane number to conform 3267 to this model. */ 3268 unsigned int elt = INTVAL (operands[2]); 3269 unsigned int reg_nelts 3270 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode); 3271 elt ^= reg_nelts - 1; 3272 operands[2] = GEN_INT (elt); 3273 } 3274 3275 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32) 3276 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1], 3277 operands[2])); 3278 else 3279 emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0], 3280 operands[1], 3281 operands[2])); 3282 DONE; 3283}) 3284 3285(define_expand "neon_vget_laneu<mode>" 3286 [(match_operand:<V_ext> 0 "s_register_operand") 3287 (match_operand:VDQIW 1 "s_register_operand") 3288 (match_operand:SI 2 "immediate_operand")] 3289 "TARGET_NEON" 3290{ 3291 if (BYTES_BIG_ENDIAN) 3292 { 3293 /* The intrinsics are defined in terms of a model where the 3294 element ordering in memory is vldm order, whereas the generic 3295 RTL is defined in terms of a model where the element ordering 3296 in memory is array order. Convert the lane number to conform 3297 to this model. */ 3298 unsigned int elt = INTVAL (operands[2]); 3299 unsigned int reg_nelts 3300 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode); 3301 elt ^= reg_nelts - 1; 3302 operands[2] = GEN_INT (elt); 3303 } 3304 3305 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32) 3306 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1], 3307 operands[2])); 3308 else 3309 emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0], 3310 operands[1], 3311 operands[2])); 3312 DONE; 3313}) 3314 3315(define_expand "neon_vget_lanedi" 3316 [(match_operand:DI 0 "s_register_operand") 3317 (match_operand:DI 1 "s_register_operand") 3318 (match_operand:SI 2 "immediate_operand")] 3319 "TARGET_NEON" 3320{ 3321 emit_move_insn (operands[0], operands[1]); 3322 DONE; 3323}) 3324 3325(define_expand "neon_vget_lanev2di" 3326 [(match_operand:DI 0 "s_register_operand") 3327 (match_operand:V2DI 1 "s_register_operand") 3328 (match_operand:SI 2 "immediate_operand")] 3329 "TARGET_NEON" 3330{ 3331 int lane; 3332 3333if (BYTES_BIG_ENDIAN) 3334 { 3335 /* The intrinsics are defined in terms of a model where the 3336 element ordering in memory is vldm order, whereas the generic 3337 RTL is defined in terms of a model where the element ordering 3338 in memory is array order. Convert the lane number to conform 3339 to this model. */ 3340 unsigned int elt = INTVAL (operands[2]); 3341 unsigned int reg_nelts = 2; 3342 elt ^= reg_nelts - 1; 3343 operands[2] = GEN_INT (elt); 3344 } 3345 3346 lane = INTVAL (operands[2]); 3347 gcc_assert ((lane ==0) || (lane == 1)); 3348 emit_move_insn (operands[0], lane == 0 3349 ? gen_lowpart (DImode, operands[1]) 3350 : gen_highpart (DImode, operands[1])); 3351 DONE; 3352}) 3353 3354(define_expand "neon_vset_lane<mode>" 3355 [(match_operand:VDQ 0 "s_register_operand") 3356 (match_operand:<V_elem> 1 "s_register_operand") 3357 (match_operand:VDQ 2 "s_register_operand") 3358 (match_operand:SI 3 "immediate_operand")] 3359 "TARGET_NEON" 3360{ 3361 unsigned int elt = INTVAL (operands[3]); 3362 3363 if (BYTES_BIG_ENDIAN) 3364 { 3365 unsigned int reg_nelts 3366 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode); 3367 elt ^= reg_nelts - 1; 3368 } 3369 3370 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1], 3371 GEN_INT (1 << elt), operands[2])); 3372 DONE; 3373}) 3374 3375; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored. 3376 3377(define_expand "neon_vset_lanedi" 3378 [(match_operand:DI 0 "s_register_operand") 3379 (match_operand:DI 1 "s_register_operand") 3380 (match_operand:DI 2 "s_register_operand") 3381 (match_operand:SI 3 "immediate_operand")] 3382 "TARGET_NEON" 3383{ 3384 emit_move_insn (operands[0], operands[1]); 3385 DONE; 3386}) 3387 3388(define_expand "neon_vcreate<mode>" 3389 [(match_operand:VD_RE 0 "s_register_operand") 3390 (match_operand:DI 1 "general_operand")] 3391 "TARGET_NEON" 3392{ 3393 rtx src = gen_lowpart (<MODE>mode, operands[1]); 3394 emit_move_insn (operands[0], src); 3395 DONE; 3396}) 3397 3398(define_insn "neon_vdup_n<mode>" 3399 [(set (match_operand:VX 0 "s_register_operand" "=w") 3400 (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))] 3401 "TARGET_NEON" 3402 "vdup.<V_sz_elem>\t%<V_reg>0, %1" 3403 [(set_attr "type" "neon_from_gp<q>")] 3404) 3405 3406(define_insn "neon_vdup_nv4hf" 3407 [(set (match_operand:V4HF 0 "s_register_operand" "=w") 3408 (vec_duplicate:V4HF (match_operand:HF 1 "s_register_operand" "r")))] 3409 "TARGET_NEON" 3410 "vdup.16\t%P0, %1" 3411 [(set_attr "type" "neon_from_gp")] 3412) 3413 3414(define_insn "neon_vdup_nv8hf" 3415 [(set (match_operand:V8HF 0 "s_register_operand" "=w") 3416 (vec_duplicate:V8HF (match_operand:HF 1 "s_register_operand" "r")))] 3417 "TARGET_NEON" 3418 "vdup.16\t%q0, %1" 3419 [(set_attr "type" "neon_from_gp_q")] 3420) 3421 3422(define_insn "neon_vdup_nv4bf" 3423 [(set (match_operand:V4BF 0 "s_register_operand" "=w") 3424 (vec_duplicate:V4BF (match_operand:BF 1 "s_register_operand" "r")))] 3425 "TARGET_NEON" 3426 "vdup.16\t%P0, %1" 3427 [(set_attr "type" "neon_from_gp")] 3428) 3429 3430(define_insn "neon_vdup_nv8bf" 3431 [(set (match_operand:V8BF 0 "s_register_operand" "=w") 3432 (vec_duplicate:V8BF (match_operand:BF 1 "s_register_operand" "r")))] 3433 "TARGET_NEON" 3434 "vdup.16\t%q0, %1" 3435 [(set_attr "type" "neon_from_gp_q")] 3436) 3437 3438(define_insn "neon_vdup_n<mode>" 3439 [(set (match_operand:V32 0 "s_register_operand" "=w,w") 3440 (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))] 3441 "TARGET_NEON" 3442 "@ 3443 vdup.<V_sz_elem>\t%<V_reg>0, %1 3444 vdup.<V_sz_elem>\t%<V_reg>0, %y1" 3445 [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")] 3446) 3447 3448(define_expand "neon_vdup_ndi" 3449 [(match_operand:DI 0 "s_register_operand") 3450 (match_operand:DI 1 "s_register_operand")] 3451 "TARGET_NEON" 3452{ 3453 emit_move_insn (operands[0], operands[1]); 3454 DONE; 3455} 3456) 3457 3458(define_insn "neon_vdup_nv2di" 3459 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w") 3460 (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))] 3461 "TARGET_NEON" 3462 "@ 3463 vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1 3464 vmov\t%e0, %P1\;vmov\t%f0, %P1" 3465 [(set_attr "length" "8") 3466 (set_attr "type" "multiple")] 3467) 3468 3469(define_insn "neon_vdup_lane<mode>_internal" 3470 [(set (match_operand:VDQW 0 "s_register_operand" "=w") 3471 (vec_duplicate:VDQW 3472 (vec_select:<V_elem> 3473 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w") 3474 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] 3475 "TARGET_NEON" 3476{ 3477 if (BYTES_BIG_ENDIAN) 3478 { 3479 int elt = INTVAL (operands[2]); 3480 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt; 3481 operands[2] = GEN_INT (elt); 3482 } 3483 if (<Is_d_reg>) 3484 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]"; 3485 else 3486 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]"; 3487} 3488 [(set_attr "type" "neon_dup<q>")] 3489) 3490 3491(define_insn "neon_vdup_lane<mode>_internal" 3492 [(set (match_operand:VHFBF 0 "s_register_operand" "=w") 3493 (vec_duplicate:VHFBF 3494 (vec_select:<V_elem> 3495 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w") 3496 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] 3497 "TARGET_NEON && (TARGET_FP16 || TARGET_BF16_SIMD)" 3498{ 3499 if (BYTES_BIG_ENDIAN) 3500 { 3501 int elt = INTVAL (operands[2]); 3502 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt; 3503 operands[2] = GEN_INT (elt); 3504 } 3505 if (<Is_d_reg>) 3506 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]"; 3507 else 3508 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]"; 3509} 3510 [(set_attr "type" "neon_dup<q>")] 3511) 3512 3513(define_expand "neon_vdup_lane<mode>" 3514 [(match_operand:VDQW 0 "s_register_operand") 3515 (match_operand:<V_double_vector_mode> 1 "s_register_operand") 3516 (match_operand:SI 2 "immediate_operand")] 3517 "TARGET_NEON" 3518{ 3519 if (BYTES_BIG_ENDIAN) 3520 { 3521 unsigned int elt = INTVAL (operands[2]); 3522 unsigned int reg_nelts 3523 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode); 3524 elt ^= reg_nelts - 1; 3525 operands[2] = GEN_INT (elt); 3526 } 3527 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1], 3528 operands[2])); 3529 DONE; 3530}) 3531 3532(define_expand "neon_vdup_lane<mode>" 3533 [(match_operand:VHFBF 0 "s_register_operand") 3534 (match_operand:<V_double_vector_mode> 1 "s_register_operand") 3535 (match_operand:SI 2 "immediate_operand")] 3536 "TARGET_NEON && (TARGET_FP16 || TARGET_BF16_SIMD)" 3537{ 3538 if (BYTES_BIG_ENDIAN) 3539 { 3540 unsigned int elt = INTVAL (operands[2]); 3541 unsigned int reg_nelts 3542 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode); 3543 elt ^= reg_nelts - 1; 3544 operands[2] = GEN_INT (elt); 3545 } 3546 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1], 3547 operands[2])); 3548 DONE; 3549}) 3550 3551; Scalar index is ignored, since only zero is valid here. 3552(define_expand "neon_vdup_lanedi" 3553 [(match_operand:DI 0 "s_register_operand") 3554 (match_operand:DI 1 "s_register_operand") 3555 (match_operand:SI 2 "immediate_operand")] 3556 "TARGET_NEON" 3557{ 3558 emit_move_insn (operands[0], operands[1]); 3559 DONE; 3560}) 3561 3562; Likewise for v2di, as the DImode second operand has only a single element. 3563(define_expand "neon_vdup_lanev2di" 3564 [(match_operand:V2DI 0 "s_register_operand") 3565 (match_operand:DI 1 "s_register_operand") 3566 (match_operand:SI 2 "immediate_operand")] 3567 "TARGET_NEON" 3568{ 3569 emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1])); 3570 DONE; 3571}) 3572 3573; Disabled before reload because we don't want combine doing something silly, 3574; but used by the post-reload expansion of neon_vcombine. 3575(define_insn "*neon_vswp<mode>" 3576 [(set (match_operand:VDQX 0 "s_register_operand" "+w") 3577 (match_operand:VDQX 1 "s_register_operand" "+w")) 3578 (set (match_dup 1) (match_dup 0))] 3579 "TARGET_NEON && reload_completed" 3580 "vswp\t%<V_reg>0, %<V_reg>1" 3581 [(set_attr "type" "neon_permute<q>")] 3582) 3583 3584;; In this insn, operand 1 should be low, and operand 2 the high part of the 3585;; dest vector. 3586;; FIXME: A different implementation of this builtin could make it much 3587;; more likely that we wouldn't actually need to output anything (we could make 3588;; it so that the reg allocator puts things in the right places magically 3589;; instead). Lack of subregs for vectors makes that tricky though, I think. 3590 3591(define_insn_and_split "neon_vcombine<mode>" 3592 [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w") 3593 (vec_concat:<V_DOUBLE> 3594 (match_operand:VDX 1 "s_register_operand" "w") 3595 (match_operand:VDX 2 "s_register_operand" "w")))] 3596 "TARGET_NEON" 3597 "#" 3598 "&& reload_completed" 3599 [(const_int 0)] 3600{ 3601 neon_split_vcombine (operands); 3602 DONE; 3603} 3604[(set_attr "type" "multiple")] 3605) 3606 3607(define_expand "neon_vget_high<mode>" 3608 [(match_operand:<V_HALF> 0 "s_register_operand") 3609 (match_operand:VQXBF 1 "s_register_operand")] 3610 "TARGET_NEON" 3611{ 3612 emit_move_insn (operands[0], 3613 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 3614 GET_MODE_SIZE (<V_HALF>mode))); 3615 DONE; 3616}) 3617 3618(define_expand "neon_vget_low<mode>" 3619 [(match_operand:<V_HALF> 0 "s_register_operand") 3620 (match_operand:VQX 1 "s_register_operand")] 3621 "TARGET_NEON" 3622{ 3623 emit_move_insn (operands[0], 3624 simplify_gen_subreg (<V_HALF>mode, operands[1], 3625 <MODE>mode, 0)); 3626 DONE; 3627}) 3628 3629(define_insn "float<mode><V_cvtto>2" 3630 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") 3631 (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))] 3632 "TARGET_NEON && !flag_rounding_math" 3633 "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1" 3634 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")] 3635) 3636 3637(define_insn "floatuns<mode><V_cvtto>2" 3638 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") 3639 (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))] 3640 "TARGET_NEON && !flag_rounding_math" 3641 "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1" 3642 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")] 3643) 3644 3645(define_insn "fix_trunc<mode><V_cvtto>2" 3646 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") 3647 (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))] 3648 "TARGET_NEON" 3649 "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1" 3650 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")] 3651) 3652 3653(define_insn "fixuns_trunc<mode><V_cvtto>2" 3654 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") 3655 (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))] 3656 "TARGET_NEON" 3657 "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1" 3658 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")] 3659) 3660 3661(define_insn "neon_vcvt<sup><mode>" 3662 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") 3663 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")] 3664 VCVT_US))] 3665 "TARGET_NEON" 3666 "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1" 3667 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")] 3668) 3669 3670(define_insn "neon_vcvt<sup><mode>" 3671 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") 3672 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")] 3673 VCVT_US))] 3674 "TARGET_NEON" 3675 "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1" 3676 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")] 3677) 3678 3679(define_insn "neon_vcvtv4sfv4hf" 3680 [(set (match_operand:V4SF 0 "s_register_operand" "=w") 3681 (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")] 3682 UNSPEC_VCVT))] 3683 "TARGET_NEON && TARGET_FP16" 3684 "vcvt.f32.f16\t%q0, %P1" 3685 [(set_attr "type" "neon_fp_cvt_widen_h")] 3686) 3687 3688(define_insn "neon_vcvtv4hfv4sf" 3689 [(set (match_operand:V4HF 0 "s_register_operand" "=w") 3690 (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")] 3691 UNSPEC_VCVT))] 3692 "TARGET_NEON && TARGET_FP16" 3693 "vcvt.f16.f32\t%P0, %q1" 3694 [(set_attr "type" "neon_fp_cvt_narrow_s_q")] 3695) 3696 3697(define_insn "neon_vcvt<sup><mode>" 3698 [(set 3699 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w") 3700 (unspec:<VH_CVTTO> 3701 [(match_operand:VCVTHI 1 "s_register_operand" "w")] 3702 VCVT_US))] 3703 "TARGET_NEON_FP16INST" 3704 "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1" 3705 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")] 3706) 3707 3708(define_insn "neon_vcvt<sup><mode>" 3709 [(set 3710 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w") 3711 (unspec:<VH_CVTTO> 3712 [(match_operand:VH 1 "s_register_operand" "w")] 3713 VCVT_US))] 3714 "TARGET_NEON_FP16INST" 3715 "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1" 3716 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")] 3717) 3718 3719(define_insn "neon_vcvt<sup>_n<mode>" 3720 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") 3721 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w") 3722 (match_operand:SI 2 "immediate_operand" "i")] 3723 VCVT_US_N))] 3724 "TARGET_NEON" 3725{ 3726 arm_const_bounds (operands[2], 1, 33); 3727 return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2"; 3728} 3729 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")] 3730) 3731 3732(define_insn "neon_vcvt<sup>_n<mode>" 3733 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w") 3734 (unspec:<VH_CVTTO> 3735 [(match_operand:VH 1 "s_register_operand" "w") 3736 (match_operand:SI 2 "immediate_operand" "i")] 3737 VCVT_US_N))] 3738 "TARGET_NEON_FP16INST" 3739{ 3740 arm_const_bounds (operands[2], 0, 17); 3741 return "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1, %2"; 3742} 3743 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")] 3744) 3745 3746(define_insn "neon_vcvt<sup>_n<mode>" 3747 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") 3748 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w") 3749 (match_operand:SI 2 "immediate_operand" "i")] 3750 VCVT_US_N))] 3751 "TARGET_NEON" 3752{ 3753 arm_const_bounds (operands[2], 1, 33); 3754 return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2"; 3755} 3756 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")] 3757) 3758 3759(define_insn "neon_vcvt<sup>_n<mode>" 3760 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w") 3761 (unspec:<VH_CVTTO> 3762 [(match_operand:VCVTHI 1 "s_register_operand" "w") 3763 (match_operand:SI 2 "immediate_operand" "i")] 3764 VCVT_US_N))] 3765 "TARGET_NEON_FP16INST" 3766{ 3767 arm_const_bounds (operands[2], 0, 17); 3768 return "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1, %2"; 3769} 3770 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")] 3771) 3772 3773(define_insn "neon_vcvt<vcvth_op><sup><mode>" 3774 [(set 3775 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w") 3776 (unspec:<VH_CVTTO> 3777 [(match_operand:VH 1 "s_register_operand" "w")] 3778 VCVT_HF_US))] 3779 "TARGET_NEON_FP16INST" 3780 "vcvt<vcvth_op>.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1" 3781 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")] 3782) 3783 3784(define_insn "neon_vmovn<mode>" 3785 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") 3786 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")] 3787 UNSPEC_VMOVN))] 3788 "TARGET_NEON" 3789 "vmovn.<V_if_elem>\t%P0, %q1" 3790 [(set_attr "type" "neon_shift_imm_narrow_q")] 3791) 3792 3793(define_insn "neon_vqmovn<sup><mode>" 3794 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") 3795 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")] 3796 VQMOVN))] 3797 "TARGET_NEON" 3798 "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1" 3799 [(set_attr "type" "neon_sat_shift_imm_narrow_q")] 3800) 3801 3802(define_insn "neon_vqmovun<mode>" 3803 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") 3804 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")] 3805 UNSPEC_VQMOVUN))] 3806 "TARGET_NEON" 3807 "vqmovun.<V_s_elem>\t%P0, %q1" 3808 [(set_attr "type" "neon_sat_shift_imm_narrow_q")] 3809) 3810 3811(define_insn "neon_vmovl<sup><mode>" 3812 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 3813 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")] 3814 VMOVL))] 3815 "TARGET_NEON" 3816 "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1" 3817 [(set_attr "type" "neon_shift_imm_long")] 3818) 3819 3820(define_insn "neon_vmul_lane<mode>" 3821 [(set (match_operand:VMD 0 "s_register_operand" "=w") 3822 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w") 3823 (match_operand:VMD 2 "s_register_operand" 3824 "<scalar_mul_constraint>") 3825 (match_operand:SI 3 "immediate_operand" "i")] 3826 UNSPEC_VMUL_LANE))] 3827 "TARGET_NEON" 3828{ 3829 return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]"; 3830} 3831 [(set (attr "type") 3832 (if_then_else (match_test "<Is_float_mode>") 3833 (const_string "neon_fp_mul_s_scalar<q>") 3834 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))] 3835) 3836 3837(define_insn "neon_vmul_lane<mode>" 3838 [(set (match_operand:VMQ 0 "s_register_operand" "=w") 3839 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w") 3840 (match_operand:<V_HALF> 2 "s_register_operand" 3841 "<scalar_mul_constraint>") 3842 (match_operand:SI 3 "immediate_operand" "i")] 3843 UNSPEC_VMUL_LANE))] 3844 "TARGET_NEON" 3845{ 3846 return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]"; 3847} 3848 [(set (attr "type") 3849 (if_then_else (match_test "<Is_float_mode>") 3850 (const_string "neon_fp_mul_s_scalar<q>") 3851 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))] 3852) 3853 3854(define_insn "neon_vmul_lane<mode>" 3855 [(set (match_operand:VH 0 "s_register_operand" "=w") 3856 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w") 3857 (match_operand:V4HF 2 "s_register_operand" 3858 "<scalar_mul_constraint>") 3859 (match_operand:SI 3 "immediate_operand" "i")] 3860 UNSPEC_VMUL_LANE))] 3861 "TARGET_NEON_FP16INST" 3862 "vmul.f16\t%<V_reg>0, %<V_reg>1, %P2[%c3]" 3863 [(set_attr "type" "neon_fp_mul_s_scalar<q>")] 3864) 3865 3866(define_insn "neon_vmull<sup>_lane<mode>" 3867 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 3868 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w") 3869 (match_operand:VMDI 2 "s_register_operand" 3870 "<scalar_mul_constraint>") 3871 (match_operand:SI 3 "immediate_operand" "i")] 3872 VMULL_LANE))] 3873 "TARGET_NEON" 3874{ 3875 return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]"; 3876} 3877 [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")] 3878) 3879 3880(define_insn "neon_vqdmull_lane<mode>" 3881 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 3882 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w") 3883 (match_operand:VMDI 2 "s_register_operand" 3884 "<scalar_mul_constraint>") 3885 (match_operand:SI 3 "immediate_operand" "i")] 3886 UNSPEC_VQDMULL_LANE))] 3887 "TARGET_NEON" 3888{ 3889 return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]"; 3890} 3891 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")] 3892) 3893 3894(define_insn "neon_vq<r>dmulh_lane<mode>" 3895 [(set (match_operand:VMQI 0 "s_register_operand" "=w") 3896 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w") 3897 (match_operand:<V_HALF> 2 "s_register_operand" 3898 "<scalar_mul_constraint>") 3899 (match_operand:SI 3 "immediate_operand" "i")] 3900 VQDMULH_LANE))] 3901 "TARGET_NEON" 3902{ 3903 return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]"; 3904} 3905 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")] 3906) 3907 3908(define_insn "neon_vq<r>dmulh_lane<mode>" 3909 [(set (match_operand:VMDI 0 "s_register_operand" "=w") 3910 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w") 3911 (match_operand:VMDI 2 "s_register_operand" 3912 "<scalar_mul_constraint>") 3913 (match_operand:SI 3 "immediate_operand" "i")] 3914 VQDMULH_LANE))] 3915 "TARGET_NEON" 3916{ 3917 return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]"; 3918} 3919 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")] 3920) 3921 3922;; vqrdmlah_lane, vqrdmlsh_lane 3923(define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>" 3924 [(set (match_operand:VMQI 0 "s_register_operand" "=w") 3925 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "0") 3926 (match_operand:VMQI 2 "s_register_operand" "w") 3927 (match_operand:<V_HALF> 3 "s_register_operand" 3928 "<scalar_mul_constraint>") 3929 (match_operand:SI 4 "immediate_operand" "i")] 3930 VQRDMLH_AS))] 3931 "TARGET_NEON_RDMA" 3932{ 3933 return 3934 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%q0, %q2, %P3[%c4]"; 3935} 3936 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar<q>")] 3937) 3938 3939(define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>" 3940 [(set (match_operand:VMDI 0 "s_register_operand" "=w") 3941 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "0") 3942 (match_operand:VMDI 2 "s_register_operand" "w") 3943 (match_operand:VMDI 3 "s_register_operand" 3944 "<scalar_mul_constraint>") 3945 (match_operand:SI 4 "immediate_operand" "i")] 3946 VQRDMLH_AS))] 3947 "TARGET_NEON_RDMA" 3948{ 3949 return 3950 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%P0, %P2, %P3[%c4]"; 3951} 3952 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar")] 3953) 3954 3955(define_insn "neon_vmla_lane<mode>" 3956 [(set (match_operand:VMD 0 "s_register_operand" "=w") 3957 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0") 3958 (match_operand:VMD 2 "s_register_operand" "w") 3959 (match_operand:VMD 3 "s_register_operand" 3960 "<scalar_mul_constraint>") 3961 (match_operand:SI 4 "immediate_operand" "i")] 3962 UNSPEC_VMLA_LANE))] 3963 "TARGET_NEON" 3964{ 3965 return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]"; 3966} 3967 [(set (attr "type") 3968 (if_then_else (match_test "<Is_float_mode>") 3969 (const_string "neon_fp_mla_s_scalar<q>") 3970 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))] 3971) 3972 3973(define_insn "neon_vmla_lane<mode>" 3974 [(set (match_operand:VMQ 0 "s_register_operand" "=w") 3975 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0") 3976 (match_operand:VMQ 2 "s_register_operand" "w") 3977 (match_operand:<V_HALF> 3 "s_register_operand" 3978 "<scalar_mul_constraint>") 3979 (match_operand:SI 4 "immediate_operand" "i")] 3980 UNSPEC_VMLA_LANE))] 3981 "TARGET_NEON" 3982{ 3983 return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]"; 3984} 3985 [(set (attr "type") 3986 (if_then_else (match_test "<Is_float_mode>") 3987 (const_string "neon_fp_mla_s_scalar<q>") 3988 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))] 3989) 3990 3991(define_insn "neon_vmlal<sup>_lane<mode>" 3992 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 3993 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") 3994 (match_operand:VMDI 2 "s_register_operand" "w") 3995 (match_operand:VMDI 3 "s_register_operand" 3996 "<scalar_mul_constraint>") 3997 (match_operand:SI 4 "immediate_operand" "i")] 3998 VMLAL_LANE))] 3999 "TARGET_NEON" 4000{ 4001 return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]"; 4002} 4003 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")] 4004) 4005 4006(define_insn "neon_vqdmlal_lane<mode>" 4007 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 4008 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") 4009 (match_operand:VMDI 2 "s_register_operand" "w") 4010 (match_operand:VMDI 3 "s_register_operand" 4011 "<scalar_mul_constraint>") 4012 (match_operand:SI 4 "immediate_operand" "i")] 4013 UNSPEC_VQDMLAL_LANE))] 4014 "TARGET_NEON" 4015{ 4016 return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]"; 4017} 4018 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")] 4019) 4020 4021(define_insn "neon_vmls_lane<mode>" 4022 [(set (match_operand:VMD 0 "s_register_operand" "=w") 4023 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0") 4024 (match_operand:VMD 2 "s_register_operand" "w") 4025 (match_operand:VMD 3 "s_register_operand" 4026 "<scalar_mul_constraint>") 4027 (match_operand:SI 4 "immediate_operand" "i")] 4028 UNSPEC_VMLS_LANE))] 4029 "TARGET_NEON" 4030{ 4031 return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]"; 4032} 4033 [(set (attr "type") 4034 (if_then_else (match_test "<Is_float_mode>") 4035 (const_string "neon_fp_mla_s_scalar<q>") 4036 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))] 4037) 4038 4039(define_insn "neon_vmls_lane<mode>" 4040 [(set (match_operand:VMQ 0 "s_register_operand" "=w") 4041 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0") 4042 (match_operand:VMQ 2 "s_register_operand" "w") 4043 (match_operand:<V_HALF> 3 "s_register_operand" 4044 "<scalar_mul_constraint>") 4045 (match_operand:SI 4 "immediate_operand" "i")] 4046 UNSPEC_VMLS_LANE))] 4047 "TARGET_NEON" 4048{ 4049 return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]"; 4050} 4051 [(set (attr "type") 4052 (if_then_else (match_test "<Is_float_mode>") 4053 (const_string "neon_fp_mla_s_scalar<q>") 4054 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))] 4055) 4056 4057(define_insn "neon_vmlsl<sup>_lane<mode>" 4058 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 4059 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") 4060 (match_operand:VMDI 2 "s_register_operand" "w") 4061 (match_operand:VMDI 3 "s_register_operand" 4062 "<scalar_mul_constraint>") 4063 (match_operand:SI 4 "immediate_operand" "i")] 4064 VMLSL_LANE))] 4065 "TARGET_NEON" 4066{ 4067 return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]"; 4068} 4069 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")] 4070) 4071 4072(define_insn "neon_vqdmlsl_lane<mode>" 4073 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 4074 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") 4075 (match_operand:VMDI 2 "s_register_operand" "w") 4076 (match_operand:VMDI 3 "s_register_operand" 4077 "<scalar_mul_constraint>") 4078 (match_operand:SI 4 "immediate_operand" "i")] 4079 UNSPEC_VQDMLSL_LANE))] 4080 "TARGET_NEON" 4081{ 4082 return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]"; 4083} 4084 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")] 4085) 4086 4087; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a 4088; core register into a temp register, then use a scalar taken from that. This 4089; isn't an optimal solution if e.g. the scalar has just been read from memory 4090; or extracted from another vector. The latter case it's currently better to 4091; use the "_lane" variant, and the former case can probably be implemented 4092; using vld1_lane, but that hasn't been done yet. 4093 4094(define_expand "neon_vmul_n<mode>" 4095 [(match_operand:VMD 0 "s_register_operand") 4096 (match_operand:VMD 1 "s_register_operand") 4097 (match_operand:<V_elem> 2 "s_register_operand")] 4098 "TARGET_NEON" 4099{ 4100 rtx tmp = gen_reg_rtx (<MODE>mode); 4101 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx)); 4102 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp, 4103 const0_rtx)); 4104 DONE; 4105}) 4106 4107(define_expand "neon_vmul_n<mode>" 4108 [(match_operand:VMQ 0 "s_register_operand") 4109 (match_operand:VMQ 1 "s_register_operand") 4110 (match_operand:<V_elem> 2 "s_register_operand")] 4111 "TARGET_NEON" 4112{ 4113 rtx tmp = gen_reg_rtx (<V_HALF>mode); 4114 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx)); 4115 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp, 4116 const0_rtx)); 4117 DONE; 4118}) 4119 4120(define_expand "neon_vmul_n<mode>" 4121 [(match_operand:VH 0 "s_register_operand") 4122 (match_operand:VH 1 "s_register_operand") 4123 (match_operand:<V_elem> 2 "s_register_operand")] 4124 "TARGET_NEON_FP16INST" 4125{ 4126 rtx tmp = gen_reg_rtx (V4HFmode); 4127 emit_insn (gen_neon_vset_lanev4hf (tmp, operands[2], tmp, const0_rtx)); 4128 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp, 4129 const0_rtx)); 4130 DONE; 4131}) 4132 4133(define_expand "neon_vmulls_n<mode>" 4134 [(match_operand:<V_widen> 0 "s_register_operand") 4135 (match_operand:VMDI 1 "s_register_operand") 4136 (match_operand:<V_elem> 2 "s_register_operand")] 4137 "TARGET_NEON" 4138{ 4139 rtx tmp = gen_reg_rtx (<MODE>mode); 4140 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx)); 4141 emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp, 4142 const0_rtx)); 4143 DONE; 4144}) 4145 4146(define_expand "neon_vmullu_n<mode>" 4147 [(match_operand:<V_widen> 0 "s_register_operand") 4148 (match_operand:VMDI 1 "s_register_operand") 4149 (match_operand:<V_elem> 2 "s_register_operand")] 4150 "TARGET_NEON" 4151{ 4152 rtx tmp = gen_reg_rtx (<MODE>mode); 4153 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx)); 4154 emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp, 4155 const0_rtx)); 4156 DONE; 4157}) 4158 4159(define_expand "neon_vqdmull_n<mode>" 4160 [(match_operand:<V_widen> 0 "s_register_operand") 4161 (match_operand:VMDI 1 "s_register_operand") 4162 (match_operand:<V_elem> 2 "s_register_operand")] 4163 "TARGET_NEON" 4164{ 4165 rtx tmp = gen_reg_rtx (<MODE>mode); 4166 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx)); 4167 emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp, 4168 const0_rtx)); 4169 DONE; 4170}) 4171 4172(define_expand "neon_vqdmulh_n<mode>" 4173 [(match_operand:VMDI 0 "s_register_operand") 4174 (match_operand:VMDI 1 "s_register_operand") 4175 (match_operand:<V_elem> 2 "s_register_operand")] 4176 "TARGET_NEON" 4177{ 4178 rtx tmp = gen_reg_rtx (<MODE>mode); 4179 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx)); 4180 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp, 4181 const0_rtx)); 4182 DONE; 4183}) 4184 4185(define_expand "neon_vqrdmulh_n<mode>" 4186 [(match_operand:VMDI 0 "s_register_operand") 4187 (match_operand:VMDI 1 "s_register_operand") 4188 (match_operand:<V_elem> 2 "s_register_operand")] 4189 "TARGET_NEON" 4190{ 4191 rtx tmp = gen_reg_rtx (<MODE>mode); 4192 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx)); 4193 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp, 4194 const0_rtx)); 4195 DONE; 4196}) 4197 4198(define_expand "neon_vqdmulh_n<mode>" 4199 [(match_operand:VMQI 0 "s_register_operand") 4200 (match_operand:VMQI 1 "s_register_operand") 4201 (match_operand:<V_elem> 2 "s_register_operand")] 4202 "TARGET_NEON" 4203{ 4204 rtx tmp = gen_reg_rtx (<V_HALF>mode); 4205 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx)); 4206 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp, 4207 const0_rtx)); 4208 DONE; 4209}) 4210 4211(define_expand "neon_vqrdmulh_n<mode>" 4212 [(match_operand:VMQI 0 "s_register_operand") 4213 (match_operand:VMQI 1 "s_register_operand") 4214 (match_operand:<V_elem> 2 "s_register_operand")] 4215 "TARGET_NEON" 4216{ 4217 rtx tmp = gen_reg_rtx (<V_HALF>mode); 4218 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx)); 4219 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp, 4220 const0_rtx)); 4221 DONE; 4222}) 4223 4224(define_expand "neon_vmla_n<mode>" 4225 [(match_operand:VMD 0 "s_register_operand") 4226 (match_operand:VMD 1 "s_register_operand") 4227 (match_operand:VMD 2 "s_register_operand") 4228 (match_operand:<V_elem> 3 "s_register_operand")] 4229 "TARGET_NEON" 4230{ 4231 rtx tmp = gen_reg_rtx (<MODE>mode); 4232 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); 4233 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2], 4234 tmp, const0_rtx)); 4235 DONE; 4236}) 4237 4238(define_expand "neon_vmla_n<mode>" 4239 [(match_operand:VMQ 0 "s_register_operand") 4240 (match_operand:VMQ 1 "s_register_operand") 4241 (match_operand:VMQ 2 "s_register_operand") 4242 (match_operand:<V_elem> 3 "s_register_operand")] 4243 "TARGET_NEON" 4244{ 4245 rtx tmp = gen_reg_rtx (<V_HALF>mode); 4246 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx)); 4247 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2], 4248 tmp, const0_rtx)); 4249 DONE; 4250}) 4251 4252(define_expand "neon_vmlals_n<mode>" 4253 [(match_operand:<V_widen> 0 "s_register_operand") 4254 (match_operand:<V_widen> 1 "s_register_operand") 4255 (match_operand:VMDI 2 "s_register_operand") 4256 (match_operand:<V_elem> 3 "s_register_operand")] 4257 "TARGET_NEON" 4258{ 4259 rtx tmp = gen_reg_rtx (<MODE>mode); 4260 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); 4261 emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2], 4262 tmp, const0_rtx)); 4263 DONE; 4264}) 4265 4266(define_expand "neon_vmlalu_n<mode>" 4267 [(match_operand:<V_widen> 0 "s_register_operand") 4268 (match_operand:<V_widen> 1 "s_register_operand") 4269 (match_operand:VMDI 2 "s_register_operand") 4270 (match_operand:<V_elem> 3 "s_register_operand")] 4271 "TARGET_NEON" 4272{ 4273 rtx tmp = gen_reg_rtx (<MODE>mode); 4274 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); 4275 emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2], 4276 tmp, const0_rtx)); 4277 DONE; 4278}) 4279 4280(define_expand "neon_vqdmlal_n<mode>" 4281 [(match_operand:<V_widen> 0 "s_register_operand") 4282 (match_operand:<V_widen> 1 "s_register_operand") 4283 (match_operand:VMDI 2 "s_register_operand") 4284 (match_operand:<V_elem> 3 "s_register_operand")] 4285 "TARGET_NEON" 4286{ 4287 rtx tmp = gen_reg_rtx (<MODE>mode); 4288 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); 4289 emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2], 4290 tmp, const0_rtx)); 4291 DONE; 4292}) 4293 4294(define_expand "neon_vmls_n<mode>" 4295 [(match_operand:VMD 0 "s_register_operand") 4296 (match_operand:VMD 1 "s_register_operand") 4297 (match_operand:VMD 2 "s_register_operand") 4298 (match_operand:<V_elem> 3 "s_register_operand")] 4299 "TARGET_NEON" 4300{ 4301 rtx tmp = gen_reg_rtx (<MODE>mode); 4302 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); 4303 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2], 4304 tmp, const0_rtx)); 4305 DONE; 4306}) 4307 4308(define_expand "neon_vmls_n<mode>" 4309 [(match_operand:VMQ 0 "s_register_operand") 4310 (match_operand:VMQ 1 "s_register_operand") 4311 (match_operand:VMQ 2 "s_register_operand") 4312 (match_operand:<V_elem> 3 "s_register_operand")] 4313 "TARGET_NEON" 4314{ 4315 rtx tmp = gen_reg_rtx (<V_HALF>mode); 4316 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx)); 4317 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2], 4318 tmp, const0_rtx)); 4319 DONE; 4320}) 4321 4322(define_expand "neon_vmlsls_n<mode>" 4323 [(match_operand:<V_widen> 0 "s_register_operand") 4324 (match_operand:<V_widen> 1 "s_register_operand") 4325 (match_operand:VMDI 2 "s_register_operand") 4326 (match_operand:<V_elem> 3 "s_register_operand")] 4327 "TARGET_NEON" 4328{ 4329 rtx tmp = gen_reg_rtx (<MODE>mode); 4330 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); 4331 emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2], 4332 tmp, const0_rtx)); 4333 DONE; 4334}) 4335 4336(define_expand "neon_vmlslu_n<mode>" 4337 [(match_operand:<V_widen> 0 "s_register_operand") 4338 (match_operand:<V_widen> 1 "s_register_operand") 4339 (match_operand:VMDI 2 "s_register_operand") 4340 (match_operand:<V_elem> 3 "s_register_operand")] 4341 "TARGET_NEON" 4342{ 4343 rtx tmp = gen_reg_rtx (<MODE>mode); 4344 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); 4345 emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2], 4346 tmp, const0_rtx)); 4347 DONE; 4348}) 4349 4350(define_expand "neon_vqdmlsl_n<mode>" 4351 [(match_operand:<V_widen> 0 "s_register_operand") 4352 (match_operand:<V_widen> 1 "s_register_operand") 4353 (match_operand:VMDI 2 "s_register_operand") 4354 (match_operand:<V_elem> 3 "s_register_operand")] 4355 "TARGET_NEON" 4356{ 4357 rtx tmp = gen_reg_rtx (<MODE>mode); 4358 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); 4359 emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2], 4360 tmp, const0_rtx)); 4361 DONE; 4362}) 4363 4364(define_insn "@neon_vext<mode>" 4365 [(set (match_operand:VDQX 0 "s_register_operand" "=w") 4366 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w") 4367 (match_operand:VDQX 2 "s_register_operand" "w") 4368 (match_operand:SI 3 "immediate_operand" "i")] 4369 UNSPEC_VEXT))] 4370 "TARGET_NEON" 4371{ 4372 arm_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode)); 4373 return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3"; 4374} 4375 [(set_attr "type" "neon_ext<q>")] 4376) 4377 4378(define_insn "@neon_vrev64<mode>" 4379 [(set (match_operand:VDQ 0 "s_register_operand" "=w") 4380 (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")] 4381 UNSPEC_VREV64))] 4382 "TARGET_NEON" 4383 "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1" 4384 [(set_attr "type" "neon_rev<q>")] 4385) 4386 4387(define_insn "@neon_vrev32<mode>" 4388 [(set (match_operand:VX 0 "s_register_operand" "=w") 4389 (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")] 4390 UNSPEC_VREV32))] 4391 "TARGET_NEON" 4392 "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1" 4393 [(set_attr "type" "neon_rev<q>")] 4394) 4395 4396(define_insn "@neon_vrev16<mode>" 4397 [(set (match_operand:VE 0 "s_register_operand" "=w") 4398 (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")] 4399 UNSPEC_VREV16))] 4400 "TARGET_NEON" 4401 "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1" 4402 [(set_attr "type" "neon_rev<q>")] 4403) 4404 4405; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register 4406; allocation. For an intrinsic of form: 4407; rD = vbsl_* (rS, rN, rM) 4408; We can use any of: 4409; vbsl rS, rN, rM (if D = S) 4410; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM) 4411; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN) 4412 4413(define_insn "neon_vbsl<mode>_internal" 4414 [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w") 4415 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w") 4416 (match_operand:VDQX 2 "s_register_operand" " w,w,0") 4417 (match_operand:VDQX 3 "s_register_operand" " w,0,w")] 4418 UNSPEC_VBSL))] 4419 "TARGET_NEON" 4420 "@ 4421 vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3 4422 vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1 4423 vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1" 4424 [(set_attr "type" "neon_bsl<q>")] 4425) 4426 4427(define_expand "@neon_vbsl<mode>" 4428 [(set (match_operand:VDQX 0 "s_register_operand") 4429 (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand") 4430 (match_operand:VDQX 2 "s_register_operand") 4431 (match_operand:VDQX 3 "s_register_operand")] 4432 UNSPEC_VBSL))] 4433 "TARGET_NEON" 4434{ 4435 /* We can't alias operands together if they have different modes. */ 4436 operands[1] = gen_lowpart (<MODE>mode, operands[1]); 4437}) 4438 4439;; vshl, vrshl 4440(define_insn "neon_v<shift_op><sup><mode>" 4441 [(set (match_operand:VDQIX 0 "s_register_operand" "=w") 4442 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") 4443 (match_operand:VDQIX 2 "s_register_operand" "w")] 4444 VSHL))] 4445 "TARGET_NEON" 4446 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 4447 [(set_attr "type" "neon_shift_imm<q>")] 4448) 4449 4450;; vqshl, vqrshl 4451(define_insn "neon_v<shift_op><sup><mode>" 4452 [(set (match_operand:VDQIX 0 "s_register_operand" "=w") 4453 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") 4454 (match_operand:VDQIX 2 "s_register_operand" "w")] 4455 VQSHL))] 4456 "TARGET_NEON" 4457 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" 4458 [(set_attr "type" "neon_sat_shift_imm<q>")] 4459) 4460 4461;; vshr_n, vrshr_n 4462(define_insn "neon_v<shift_op><sup>_n<mode>" 4463 [(set (match_operand:VDQIX 0 "s_register_operand" "=w") 4464 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") 4465 (match_operand:SI 2 "immediate_operand" "i")] 4466 VSHR_N))] 4467 "TARGET_NEON" 4468{ 4469 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1); 4470 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2"; 4471} 4472 [(set_attr "type" "neon_shift_imm<q>")] 4473) 4474 4475;; vshrn_n, vrshrn_n 4476(define_insn "neon_v<shift_op>_n<mode>" 4477 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") 4478 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w") 4479 (match_operand:SI 2 "immediate_operand" "i")] 4480 VSHRN_N))] 4481 "TARGET_NEON" 4482{ 4483 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1); 4484 return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2"; 4485} 4486 [(set_attr "type" "neon_shift_imm_narrow_q")] 4487) 4488 4489;; vqshrn_n, vqrshrn_n 4490(define_insn "neon_v<shift_op><sup>_n<mode>" 4491 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") 4492 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w") 4493 (match_operand:SI 2 "immediate_operand" "i")] 4494 VQSHRN_N))] 4495 "TARGET_NEON" 4496{ 4497 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1); 4498 return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2"; 4499} 4500 [(set_attr "type" "neon_sat_shift_imm_narrow_q")] 4501) 4502 4503;; vqshrun_n, vqrshrun_n 4504(define_insn "neon_v<shift_op>_n<mode>" 4505 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") 4506 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w") 4507 (match_operand:SI 2 "immediate_operand" "i")] 4508 VQSHRUN_N))] 4509 "TARGET_NEON" 4510{ 4511 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1); 4512 return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2"; 4513} 4514 [(set_attr "type" "neon_sat_shift_imm_narrow_q")] 4515) 4516 4517(define_insn "neon_vshl_n<mode>" 4518 [(set (match_operand:VDQIX 0 "s_register_operand" "=w") 4519 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") 4520 (match_operand:SI 2 "immediate_operand" "i")] 4521 UNSPEC_VSHL_N))] 4522 "TARGET_NEON" 4523{ 4524 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode)); 4525 return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2"; 4526} 4527 [(set_attr "type" "neon_shift_imm<q>")] 4528) 4529 4530(define_insn "neon_vqshl_<sup>_n<mode>" 4531 [(set (match_operand:VDQIX 0 "s_register_operand" "=w") 4532 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") 4533 (match_operand:SI 2 "immediate_operand" "i")] 4534 VQSHL_N))] 4535 "TARGET_NEON" 4536{ 4537 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode)); 4538 return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2"; 4539} 4540 [(set_attr "type" "neon_sat_shift_imm<q>")] 4541) 4542 4543(define_insn "neon_vqshlu_n<mode>" 4544 [(set (match_operand:VDQIX 0 "s_register_operand" "=w") 4545 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") 4546 (match_operand:SI 2 "immediate_operand" "i")] 4547 UNSPEC_VQSHLU_N))] 4548 "TARGET_NEON" 4549{ 4550 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode)); 4551 return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2"; 4552} 4553 [(set_attr "type" "neon_sat_shift_imm<q>")] 4554) 4555 4556(define_insn "neon_vshll<sup>_n<mode>" 4557 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") 4558 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w") 4559 (match_operand:SI 2 "immediate_operand" "i")] 4560 VSHLL_N))] 4561 "TARGET_NEON" 4562{ 4563 /* The boundaries are: 0 < imm <= size. */ 4564 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1); 4565 return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2"; 4566} 4567 [(set_attr "type" "neon_shift_imm_long")] 4568) 4569 4570;; vsra_n, vrsra_n 4571(define_insn "neon_v<shift_op><sup>_n<mode>" 4572 [(set (match_operand:VDQIX 0 "s_register_operand" "=w") 4573 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0") 4574 (match_operand:VDQIX 2 "s_register_operand" "w") 4575 (match_operand:SI 3 "immediate_operand" "i")] 4576 VSRA_N))] 4577 "TARGET_NEON" 4578{ 4579 arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1); 4580 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3"; 4581} 4582 [(set_attr "type" "neon_shift_acc<q>")] 4583) 4584 4585(define_insn "neon_vsri_n<mode>" 4586 [(set (match_operand:VDQIX 0 "s_register_operand" "=w") 4587 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0") 4588 (match_operand:VDQIX 2 "s_register_operand" "w") 4589 (match_operand:SI 3 "immediate_operand" "i")] 4590 UNSPEC_VSRI))] 4591 "TARGET_NEON" 4592{ 4593 arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1); 4594 return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3"; 4595} 4596 [(set_attr "type" "neon_shift_reg<q>")] 4597) 4598 4599(define_insn "neon_vsli_n<mode>" 4600 [(set (match_operand:VDQIX 0 "s_register_operand" "=w") 4601 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0") 4602 (match_operand:VDQIX 2 "s_register_operand" "w") 4603 (match_operand:SI 3 "immediate_operand" "i")] 4604 UNSPEC_VSLI))] 4605 "TARGET_NEON" 4606{ 4607 arm_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode)); 4608 return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3"; 4609} 4610 [(set_attr "type" "neon_shift_reg<q>")] 4611) 4612 4613(define_insn "neon_vtbl1v8qi" 4614 [(set (match_operand:V8QI 0 "s_register_operand" "=w") 4615 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w") 4616 (match_operand:V8QI 2 "s_register_operand" "w")] 4617 UNSPEC_VTBL))] 4618 "TARGET_NEON" 4619 "vtbl.8\t%P0, {%P1}, %P2" 4620 [(set_attr "type" "neon_tbl1")] 4621) 4622 4623(define_insn "neon_vtbl2v8qi" 4624 [(set (match_operand:V8QI 0 "s_register_operand" "=w") 4625 (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w") 4626 (match_operand:V8QI 2 "s_register_operand" "w")] 4627 UNSPEC_VTBL))] 4628 "TARGET_NEON" 4629{ 4630 rtx ops[4]; 4631 int tabbase = REGNO (operands[1]); 4632 4633 ops[0] = operands[0]; 4634 ops[1] = gen_rtx_REG (V8QImode, tabbase); 4635 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); 4636 ops[3] = operands[2]; 4637 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops); 4638 4639 return ""; 4640} 4641 [(set_attr "type" "neon_tbl2")] 4642) 4643 4644(define_insn "neon_vtbl3v8qi" 4645 [(set (match_operand:V8QI 0 "s_register_operand" "=w") 4646 (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w") 4647 (match_operand:V8QI 2 "s_register_operand" "w")] 4648 UNSPEC_VTBL))] 4649 "TARGET_NEON" 4650{ 4651 rtx ops[5]; 4652 int tabbase = REGNO (operands[1]); 4653 4654 ops[0] = operands[0]; 4655 ops[1] = gen_rtx_REG (V8QImode, tabbase); 4656 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); 4657 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4); 4658 ops[4] = operands[2]; 4659 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops); 4660 4661 return ""; 4662} 4663 [(set_attr "type" "neon_tbl3")] 4664) 4665 4666(define_insn "neon_vtbl4v8qi" 4667 [(set (match_operand:V8QI 0 "s_register_operand" "=w") 4668 (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w") 4669 (match_operand:V8QI 2 "s_register_operand" "w")] 4670 UNSPEC_VTBL))] 4671 "TARGET_NEON" 4672{ 4673 rtx ops[6]; 4674 int tabbase = REGNO (operands[1]); 4675 4676 ops[0] = operands[0]; 4677 ops[1] = gen_rtx_REG (V8QImode, tabbase); 4678 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); 4679 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4); 4680 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6); 4681 ops[5] = operands[2]; 4682 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops); 4683 4684 return ""; 4685} 4686 [(set_attr "type" "neon_tbl4")] 4687) 4688 4689;; These three are used by the vec_perm infrastructure for V16QImode. 4690(define_insn_and_split "neon_vtbl1v16qi" 4691 [(set (match_operand:V16QI 0 "s_register_operand" "=&w") 4692 (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w") 4693 (match_operand:V16QI 2 "s_register_operand" "w")] 4694 UNSPEC_VTBL))] 4695 "TARGET_NEON" 4696 "#" 4697 "&& reload_completed" 4698 [(const_int 0)] 4699{ 4700 rtx op0, op1, op2, part0, part2; 4701 unsigned ofs; 4702 4703 op0 = operands[0]; 4704 op1 = gen_lowpart (TImode, operands[1]); 4705 op2 = operands[2]; 4706 4707 ofs = subreg_lowpart_offset (V8QImode, V16QImode); 4708 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs); 4709 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs); 4710 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2)); 4711 4712 ofs = subreg_highpart_offset (V8QImode, V16QImode); 4713 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs); 4714 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs); 4715 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2)); 4716 DONE; 4717} 4718 [(set_attr "type" "multiple")] 4719) 4720 4721(define_insn_and_split "neon_vtbl2v16qi" 4722 [(set (match_operand:V16QI 0 "s_register_operand" "=&w") 4723 (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w") 4724 (match_operand:V16QI 2 "s_register_operand" "w")] 4725 UNSPEC_VTBL))] 4726 "TARGET_NEON" 4727 "#" 4728 "&& reload_completed" 4729 [(const_int 0)] 4730{ 4731 rtx op0, op1, op2, part0, part2; 4732 unsigned ofs; 4733 4734 op0 = operands[0]; 4735 op1 = operands[1]; 4736 op2 = operands[2]; 4737 4738 ofs = subreg_lowpart_offset (V8QImode, V16QImode); 4739 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs); 4740 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs); 4741 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2)); 4742 4743 ofs = subreg_highpart_offset (V8QImode, V16QImode); 4744 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs); 4745 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs); 4746 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2)); 4747 DONE; 4748} 4749 [(set_attr "type" "multiple")] 4750) 4751 4752;; ??? Logically we should extend the regular neon_vcombine pattern to 4753;; handle quad-word input modes, producing octa-word output modes. But 4754;; that requires us to add support for octa-word vector modes in moves. 4755;; That seems overkill for this one use in vec_perm. 4756(define_insn_and_split "neon_vcombinev16qi" 4757 [(set (match_operand:OI 0 "s_register_operand" "=w") 4758 (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w") 4759 (match_operand:V16QI 2 "s_register_operand" "w")] 4760 UNSPEC_VCONCAT))] 4761 "TARGET_NEON" 4762 "#" 4763 "&& reload_completed" 4764 [(const_int 0)] 4765{ 4766 neon_split_vcombine (operands); 4767 DONE; 4768} 4769[(set_attr "type" "multiple")] 4770) 4771 4772(define_insn "neon_vtbx1v8qi" 4773 [(set (match_operand:V8QI 0 "s_register_operand" "=w") 4774 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0") 4775 (match_operand:V8QI 2 "s_register_operand" "w") 4776 (match_operand:V8QI 3 "s_register_operand" "w")] 4777 UNSPEC_VTBX))] 4778 "TARGET_NEON" 4779 "vtbx.8\t%P0, {%P2}, %P3" 4780 [(set_attr "type" "neon_tbl1")] 4781) 4782 4783(define_insn "neon_vtbx2v8qi" 4784 [(set (match_operand:V8QI 0 "s_register_operand" "=w") 4785 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0") 4786 (match_operand:TI 2 "s_register_operand" "w") 4787 (match_operand:V8QI 3 "s_register_operand" "w")] 4788 UNSPEC_VTBX))] 4789 "TARGET_NEON" 4790{ 4791 rtx ops[4]; 4792 int tabbase = REGNO (operands[2]); 4793 4794 ops[0] = operands[0]; 4795 ops[1] = gen_rtx_REG (V8QImode, tabbase); 4796 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); 4797 ops[3] = operands[3]; 4798 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops); 4799 4800 return ""; 4801} 4802 [(set_attr "type" "neon_tbl2")] 4803) 4804 4805(define_insn "neon_vtbx3v8qi" 4806 [(set (match_operand:V8QI 0 "s_register_operand" "=w") 4807 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0") 4808 (match_operand:EI 2 "s_register_operand" "w") 4809 (match_operand:V8QI 3 "s_register_operand" "w")] 4810 UNSPEC_VTBX))] 4811 "TARGET_NEON" 4812{ 4813 rtx ops[5]; 4814 int tabbase = REGNO (operands[2]); 4815 4816 ops[0] = operands[0]; 4817 ops[1] = gen_rtx_REG (V8QImode, tabbase); 4818 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); 4819 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4); 4820 ops[4] = operands[3]; 4821 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops); 4822 4823 return ""; 4824} 4825 [(set_attr "type" "neon_tbl3")] 4826) 4827 4828(define_insn "neon_vtbx4v8qi" 4829 [(set (match_operand:V8QI 0 "s_register_operand" "=w") 4830 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0") 4831 (match_operand:OI 2 "s_register_operand" "w") 4832 (match_operand:V8QI 3 "s_register_operand" "w")] 4833 UNSPEC_VTBX))] 4834 "TARGET_NEON" 4835{ 4836 rtx ops[6]; 4837 int tabbase = REGNO (operands[2]); 4838 4839 ops[0] = operands[0]; 4840 ops[1] = gen_rtx_REG (V8QImode, tabbase); 4841 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); 4842 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4); 4843 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6); 4844 ops[5] = operands[3]; 4845 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops); 4846 4847 return ""; 4848} 4849 [(set_attr "type" "neon_tbl4")] 4850) 4851 4852(define_expand "@neon_vtrn<mode>_internal" 4853 [(parallel 4854 [(set (match_operand:VDQWH 0 "s_register_operand") 4855 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand") 4856 (match_operand:VDQWH 2 "s_register_operand")] 4857 UNSPEC_VTRN1)) 4858 (set (match_operand:VDQWH 3 "s_register_operand") 4859 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])] 4860 "TARGET_NEON" 4861 "" 4862) 4863 4864;; Note: Different operand numbering to handle tied registers correctly. 4865(define_insn "*neon_vtrn<mode>_insn" 4866 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w") 4867 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0") 4868 (match_operand:VDQWH 3 "s_register_operand" "2")] 4869 UNSPEC_VTRN1)) 4870 (set (match_operand:VDQWH 2 "s_register_operand" "=&w") 4871 (unspec:VDQWH [(match_dup 1) (match_dup 3)] 4872 UNSPEC_VTRN2))] 4873 "TARGET_NEON" 4874 "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" 4875 [(set_attr "type" "neon_permute<q>")] 4876) 4877 4878(define_expand "@neon_vzip<mode>_internal" 4879 [(parallel 4880 [(set (match_operand:VDQWH 0 "s_register_operand") 4881 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand") 4882 (match_operand:VDQWH 2 "s_register_operand")] 4883 UNSPEC_VZIP1)) 4884 (set (match_operand:VDQWH 3 "s_register_operand") 4885 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])] 4886 "TARGET_NEON" 4887 "" 4888) 4889 4890;; Note: Different operand numbering to handle tied registers correctly. 4891(define_insn "*neon_vzip<mode>_insn" 4892 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w") 4893 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0") 4894 (match_operand:VDQWH 3 "s_register_operand" "2")] 4895 UNSPEC_VZIP1)) 4896 (set (match_operand:VDQWH 2 "s_register_operand" "=&w") 4897 (unspec:VDQWH [(match_dup 1) (match_dup 3)] 4898 UNSPEC_VZIP2))] 4899 "TARGET_NEON" 4900 "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" 4901 [(set_attr "type" "neon_zip<q>")] 4902) 4903 4904(define_expand "@neon_vuzp<mode>_internal" 4905 [(parallel 4906 [(set (match_operand:VDQWH 0 "s_register_operand") 4907 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand") 4908 (match_operand:VDQWH 2 "s_register_operand")] 4909 UNSPEC_VUZP1)) 4910 (set (match_operand:VDQWH 3 "s_register_operand") 4911 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])] 4912 "TARGET_NEON" 4913 "" 4914) 4915 4916;; Note: Different operand numbering to handle tied registers correctly. 4917(define_insn "*neon_vuzp<mode>_insn" 4918 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w") 4919 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0") 4920 (match_operand:VDQWH 3 "s_register_operand" "2")] 4921 UNSPEC_VUZP1)) 4922 (set (match_operand:VDQWH 2 "s_register_operand" "=&w") 4923 (unspec:VDQWH [(match_dup 1) (match_dup 3)] 4924 UNSPEC_VUZP2))] 4925 "TARGET_NEON" 4926 "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" 4927 [(set_attr "type" "neon_zip<q>")] 4928) 4929 4930(define_expand "vec_load_lanes<mode><mode>" 4931 [(set (match_operand:VDQX 0 "s_register_operand") 4932 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")] 4933 UNSPEC_VLD1))] 4934 "TARGET_NEON") 4935 4936(define_insn "neon_vld1<mode>" 4937 [(set (match_operand:VDQX 0 "s_register_operand" "=w") 4938 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")] 4939 UNSPEC_VLD1))] 4940 "TARGET_NEON" 4941 "vld1.<V_sz_elem>\t%h0, %A1" 4942 [(set_attr "type" "neon_load1_1reg<q>")] 4943) 4944 4945;; The lane numbers in the RTL are in GCC lane order, having been flipped 4946;; in arm_expand_neon_args. The lane numbers are restored to architectural 4947;; lane order here. 4948(define_insn "neon_vld1_lane<mode>" 4949 [(set (match_operand:VDX 0 "s_register_operand" "=w") 4950 (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um") 4951 (match_operand:VDX 2 "s_register_operand" "0") 4952 (match_operand:SI 3 "immediate_operand" "i")] 4953 UNSPEC_VLD1_LANE))] 4954 "TARGET_NEON" 4955{ 4956 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); 4957 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); 4958 operands[3] = GEN_INT (lane); 4959 if (max == 1) 4960 return "vld1.<V_sz_elem>\t%P0, %A1"; 4961 else 4962 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1"; 4963} 4964 [(set_attr "type" "neon_load1_one_lane<q>")] 4965) 4966 4967;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 4968;; here on big endian targets. 4969(define_insn "neon_vld1_lane<mode>" 4970 [(set (match_operand:VQX 0 "s_register_operand" "=w") 4971 (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um") 4972 (match_operand:VQX 2 "s_register_operand" "0") 4973 (match_operand:SI 3 "immediate_operand" "i")] 4974 UNSPEC_VLD1_LANE))] 4975 "TARGET_NEON" 4976{ 4977 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); 4978 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); 4979 operands[3] = GEN_INT (lane); 4980 int regno = REGNO (operands[0]); 4981 if (lane >= max / 2) 4982 { 4983 lane -= max / 2; 4984 regno += 2; 4985 operands[3] = GEN_INT (lane); 4986 } 4987 operands[0] = gen_rtx_REG (<V_HALF>mode, regno); 4988 if (max == 2) 4989 return "vld1.<V_sz_elem>\t%P0, %A1"; 4990 else 4991 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1"; 4992} 4993 [(set_attr "type" "neon_load1_one_lane<q>")] 4994) 4995 4996(define_insn "neon_vld1_dup<mode>" 4997 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w") 4998 (vec_duplicate:VD_LANE (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))] 4999 "TARGET_NEON" 5000 "vld1.<V_sz_elem>\t{%P0[]}, %A1" 5001 [(set_attr "type" "neon_load1_all_lanes<q>")] 5002) 5003 5004;; Special case for DImode. Treat it exactly like a simple load. 5005(define_expand "neon_vld1_dupdi" 5006 [(set (match_operand:DI 0 "s_register_operand") 5007 (unspec:DI [(match_operand:DI 1 "neon_struct_operand")] 5008 UNSPEC_VLD1))] 5009 "TARGET_NEON" 5010 "" 5011) 5012 5013(define_insn "neon_vld1_dup<mode>" 5014 [(set (match_operand:VQ2 0 "s_register_operand" "=w") 5015 (vec_duplicate:VQ2 (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))] 5016 "TARGET_NEON" 5017{ 5018 return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1"; 5019} 5020 [(set_attr "type" "neon_load1_all_lanes<q>")] 5021) 5022 5023(define_insn_and_split "neon_vld1_dupv2di" 5024 [(set (match_operand:V2DI 0 "s_register_operand" "=w") 5025 (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))] 5026 "TARGET_NEON" 5027 "#" 5028 "&& reload_completed" 5029 [(const_int 0)] 5030 { 5031 rtx tmprtx = gen_lowpart (DImode, operands[0]); 5032 emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1])); 5033 emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx ); 5034 DONE; 5035 } 5036 [(set_attr "length" "8") 5037 (set_attr "type" "neon_load1_all_lanes_q")] 5038) 5039 5040(define_expand "vec_store_lanes<mode><mode>" 5041 [(set (match_operand:VDQX 0 "neon_struct_operand") 5042 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")] 5043 UNSPEC_VST1))] 5044 "TARGET_NEON") 5045 5046(define_insn "neon_vst1<mode>" 5047 [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um") 5048 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")] 5049 UNSPEC_VST1))] 5050 "TARGET_NEON" 5051 "vst1.<V_sz_elem>\t%h1, %A0" 5052 [(set_attr "type" "neon_store1_1reg<q>")]) 5053 5054;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 5055;; here on big endian targets. 5056(define_insn "neon_vst1_lane<mode>" 5057 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um") 5058 (unspec:<V_elem> 5059 [(match_operand:VDX 1 "s_register_operand" "w") 5060 (match_operand:SI 2 "immediate_operand" "i")] 5061 UNSPEC_VST1_LANE))] 5062 "TARGET_NEON" 5063{ 5064 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); 5065 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); 5066 operands[2] = GEN_INT (lane); 5067 if (max == 1) 5068 return "vst1.<V_sz_elem>\t{%P1}, %A0"; 5069 else 5070 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0"; 5071} 5072 [(set_attr "type" "neon_store1_one_lane<q>")] 5073) 5074 5075;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 5076;; here on big endian targets. 5077(define_insn "neon_vst1_lane<mode>" 5078 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um") 5079 (unspec:<V_elem> 5080 [(match_operand:VQX 1 "s_register_operand" "w") 5081 (match_operand:SI 2 "immediate_operand" "i")] 5082 UNSPEC_VST1_LANE))] 5083 "TARGET_NEON" 5084{ 5085 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); 5086 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); 5087 int regno = REGNO (operands[1]); 5088 if (lane >= max / 2) 5089 { 5090 lane -= max / 2; 5091 regno += 2; 5092 } 5093 operands[2] = GEN_INT (lane); 5094 operands[1] = gen_rtx_REG (<V_HALF>mode, regno); 5095 if (max == 2) 5096 return "vst1.<V_sz_elem>\t{%P1}, %A0"; 5097 else 5098 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0"; 5099} 5100 [(set_attr "type" "neon_store1_one_lane<q>")] 5101) 5102 5103(define_expand "vec_load_lanesti<mode>" 5104 [(set (match_operand:TI 0 "s_register_operand") 5105 (unspec:TI [(match_operand:TI 1 "neon_struct_operand") 5106 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5107 UNSPEC_VLD2))] 5108 "TARGET_NEON") 5109 5110(define_insn "neon_vld2<mode>" 5111 [(set (match_operand:TI 0 "s_register_operand" "=w") 5112 (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um") 5113 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5114 UNSPEC_VLD2))] 5115 "TARGET_NEON" 5116{ 5117 if (<V_sz_elem> == 64) 5118 return "vld1.64\t%h0, %A1"; 5119 else 5120 return "vld2.<V_sz_elem>\t%h0, %A1"; 5121} 5122 [(set (attr "type") 5123 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) 5124 (const_string "neon_load1_2reg<q>") 5125 (const_string "neon_load2_2reg<q>")))] 5126) 5127 5128(define_insn "neon_vld2<mode>" 5129 [(set (match_operand:OI 0 "s_register_operand" "=w") 5130 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um") 5131 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5132 UNSPEC_VLD2))] 5133 "TARGET_NEON" 5134 "vld2.<V_sz_elem>\t%h0, %A1" 5135 [(set_attr "type" "neon_load2_2reg_q")]) 5136 5137;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 5138;; here on big endian targets. 5139(define_insn "neon_vld2_lane<mode>" 5140 [(set (match_operand:TI 0 "s_register_operand" "=w") 5141 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um") 5142 (match_operand:TI 2 "s_register_operand" "0") 5143 (match_operand:SI 3 "immediate_operand" "i") 5144 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5145 UNSPEC_VLD2_LANE))] 5146 "TARGET_NEON" 5147{ 5148 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); 5149 int regno = REGNO (operands[0]); 5150 rtx ops[4]; 5151 ops[0] = gen_rtx_REG (DImode, regno); 5152 ops[1] = gen_rtx_REG (DImode, regno + 2); 5153 ops[2] = operands[1]; 5154 ops[3] = GEN_INT (lane); 5155 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops); 5156 return ""; 5157} 5158 [(set_attr "type" "neon_load2_one_lane<q>")] 5159) 5160 5161;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 5162;; here on big endian targets. 5163(define_insn "neon_vld2_lane<mode>" 5164 [(set (match_operand:OI 0 "s_register_operand" "=w") 5165 (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um") 5166 (match_operand:OI 2 "s_register_operand" "0") 5167 (match_operand:SI 3 "immediate_operand" "i") 5168 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5169 UNSPEC_VLD2_LANE))] 5170 "TARGET_NEON" 5171{ 5172 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); 5173 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); 5174 int regno = REGNO (operands[0]); 5175 rtx ops[4]; 5176 if (lane >= max / 2) 5177 { 5178 lane -= max / 2; 5179 regno += 2; 5180 } 5181 ops[0] = gen_rtx_REG (DImode, regno); 5182 ops[1] = gen_rtx_REG (DImode, regno + 4); 5183 ops[2] = operands[1]; 5184 ops[3] = GEN_INT (lane); 5185 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops); 5186 return ""; 5187} 5188 [(set_attr "type" "neon_load2_one_lane<q>")] 5189) 5190 5191(define_insn "neon_vld2_dup<mode>" 5192 [(set (match_operand:TI 0 "s_register_operand" "=w") 5193 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um") 5194 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5195 UNSPEC_VLD2_DUP))] 5196 "TARGET_NEON" 5197{ 5198 if (GET_MODE_NUNITS (<MODE>mode) > 1) 5199 return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1"; 5200 else 5201 return "vld1.<V_sz_elem>\t%h0, %A1"; 5202} 5203 [(set (attr "type") 5204 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1")) 5205 (const_string "neon_load2_all_lanes<q>") 5206 (const_string "neon_load1_1reg<q>")))] 5207) 5208 5209(define_insn "neon_vld2_dupv8bf" 5210 [(set (match_operand:OI 0 "s_register_operand" "=w") 5211 (unspec:OI [(match_operand:V2BF 1 "neon_struct_operand" "Um") 5212 (unspec:V8BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5213 UNSPEC_VLD2_DUP))] 5214 "TARGET_BF16_SIMD" 5215 { 5216 rtx ops[5]; 5217 int tabbase = REGNO (operands[0]); 5218 5219 ops[4] = operands[1]; 5220 ops[0] = gen_rtx_REG (V4BFmode, tabbase); 5221 ops[1] = gen_rtx_REG (V4BFmode, tabbase + 2); 5222 ops[2] = gen_rtx_REG (V4BFmode, tabbase + 4); 5223 ops[3] = gen_rtx_REG (V4BFmode, tabbase + 6); 5224 output_asm_insn ("vld2.16\t{%P0, %P1, %P2, %P3}, %A4", ops); 5225 return ""; 5226 } 5227 [(set_attr "type" "neon_load2_all_lanes_q")] 5228) 5229 5230(define_expand "vec_store_lanesti<mode>" 5231 [(set (match_operand:TI 0 "neon_struct_operand") 5232 (unspec:TI [(match_operand:TI 1 "s_register_operand") 5233 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5234 UNSPEC_VST2))] 5235 "TARGET_NEON") 5236 5237(define_insn "neon_vst2<mode>" 5238 [(set (match_operand:TI 0 "neon_struct_operand" "=Um") 5239 (unspec:TI [(match_operand:TI 1 "s_register_operand" "w") 5240 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5241 UNSPEC_VST2))] 5242 "TARGET_NEON" 5243{ 5244 if (<V_sz_elem> == 64) 5245 return "vst1.64\t%h1, %A0"; 5246 else 5247 return "vst2.<V_sz_elem>\t%h1, %A0"; 5248} 5249 [(set (attr "type") 5250 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) 5251 (const_string "neon_store1_2reg<q>") 5252 (const_string "neon_store2_one_lane<q>")))] 5253) 5254 5255(define_insn "neon_vst2<mode>" 5256 [(set (match_operand:OI 0 "neon_struct_operand" "=Um") 5257 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w") 5258 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5259 UNSPEC_VST2))] 5260 "TARGET_NEON" 5261 "vst2.<V_sz_elem>\t%h1, %A0" 5262 [(set_attr "type" "neon_store2_4reg<q>")] 5263) 5264 5265;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 5266;; here on big endian targets. 5267(define_insn "neon_vst2_lane<mode>" 5268 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um") 5269 (unspec:<V_two_elem> 5270 [(match_operand:TI 1 "s_register_operand" "w") 5271 (match_operand:SI 2 "immediate_operand" "i") 5272 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5273 UNSPEC_VST2_LANE))] 5274 "TARGET_NEON" 5275{ 5276 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); 5277 int regno = REGNO (operands[1]); 5278 rtx ops[4]; 5279 ops[0] = operands[0]; 5280 ops[1] = gen_rtx_REG (DImode, regno); 5281 ops[2] = gen_rtx_REG (DImode, regno + 2); 5282 ops[3] = GEN_INT (lane); 5283 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops); 5284 return ""; 5285} 5286 [(set_attr "type" "neon_store2_one_lane<q>")] 5287) 5288 5289;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 5290;; here on big endian targets. 5291(define_insn "neon_vst2_lane<mode>" 5292 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um") 5293 (unspec:<V_two_elem> 5294 [(match_operand:OI 1 "s_register_operand" "w") 5295 (match_operand:SI 2 "immediate_operand" "i") 5296 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5297 UNSPEC_VST2_LANE))] 5298 "TARGET_NEON" 5299{ 5300 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); 5301 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); 5302 int regno = REGNO (operands[1]); 5303 rtx ops[4]; 5304 if (lane >= max / 2) 5305 { 5306 lane -= max / 2; 5307 regno += 2; 5308 } 5309 ops[0] = operands[0]; 5310 ops[1] = gen_rtx_REG (DImode, regno); 5311 ops[2] = gen_rtx_REG (DImode, regno + 4); 5312 ops[3] = GEN_INT (lane); 5313 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops); 5314 return ""; 5315} 5316 [(set_attr "type" "neon_store2_one_lane<q>")] 5317) 5318 5319(define_expand "vec_load_lanesei<mode>" 5320 [(set (match_operand:EI 0 "s_register_operand") 5321 (unspec:EI [(match_operand:EI 1 "neon_struct_operand") 5322 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5323 UNSPEC_VLD3))] 5324 "TARGET_NEON") 5325 5326(define_insn "neon_vld3<mode>" 5327 [(set (match_operand:EI 0 "s_register_operand" "=w") 5328 (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um") 5329 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5330 UNSPEC_VLD3))] 5331 "TARGET_NEON" 5332{ 5333 if (<V_sz_elem> == 64) 5334 return "vld1.64\t%h0, %A1"; 5335 else 5336 return "vld3.<V_sz_elem>\t%h0, %A1"; 5337} 5338 [(set (attr "type") 5339 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) 5340 (const_string "neon_load1_3reg<q>") 5341 (const_string "neon_load3_3reg<q>")))] 5342) 5343 5344(define_expand "vec_load_lanesci<mode>" 5345 [(match_operand:CI 0 "s_register_operand") 5346 (match_operand:CI 1 "neon_struct_operand") 5347 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5348 "TARGET_NEON" 5349{ 5350 emit_insn (gen_neon_vld3<mode> (operands[0], operands[1])); 5351 DONE; 5352}) 5353 5354(define_expand "neon_vld3<mode>" 5355 [(match_operand:CI 0 "s_register_operand") 5356 (match_operand:CI 1 "neon_struct_operand") 5357 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5358 "TARGET_NEON" 5359{ 5360 rtx mem; 5361 5362 mem = adjust_address (operands[1], EImode, 0); 5363 emit_insn (gen_neon_vld3qa<mode> (operands[0], mem)); 5364 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode)); 5365 emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0])); 5366 DONE; 5367}) 5368 5369(define_insn "neon_vld3qa<mode>" 5370 [(set (match_operand:CI 0 "s_register_operand" "=w") 5371 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um") 5372 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5373 UNSPEC_VLD3A))] 5374 "TARGET_NEON" 5375{ 5376 int regno = REGNO (operands[0]); 5377 rtx ops[4]; 5378 ops[0] = gen_rtx_REG (DImode, regno); 5379 ops[1] = gen_rtx_REG (DImode, regno + 4); 5380 ops[2] = gen_rtx_REG (DImode, regno + 8); 5381 ops[3] = operands[1]; 5382 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops); 5383 return ""; 5384} 5385 [(set_attr "type" "neon_load3_3reg<q>")] 5386) 5387 5388(define_insn "neon_vld3qb<mode>" 5389 [(set (match_operand:CI 0 "s_register_operand" "=w") 5390 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um") 5391 (match_operand:CI 2 "s_register_operand" "0") 5392 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5393 UNSPEC_VLD3B))] 5394 "TARGET_NEON" 5395{ 5396 int regno = REGNO (operands[0]); 5397 rtx ops[4]; 5398 ops[0] = gen_rtx_REG (DImode, regno + 2); 5399 ops[1] = gen_rtx_REG (DImode, regno + 6); 5400 ops[2] = gen_rtx_REG (DImode, regno + 10); 5401 ops[3] = operands[1]; 5402 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops); 5403 return ""; 5404} 5405 [(set_attr "type" "neon_load3_3reg<q>")] 5406) 5407 5408;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 5409;; here on big endian targets. 5410(define_insn "neon_vld3_lane<mode>" 5411 [(set (match_operand:EI 0 "s_register_operand" "=w") 5412 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um") 5413 (match_operand:EI 2 "s_register_operand" "0") 5414 (match_operand:SI 3 "immediate_operand" "i") 5415 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5416 UNSPEC_VLD3_LANE))] 5417 "TARGET_NEON" 5418{ 5419 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])); 5420 int regno = REGNO (operands[0]); 5421 rtx ops[5]; 5422 ops[0] = gen_rtx_REG (DImode, regno); 5423 ops[1] = gen_rtx_REG (DImode, regno + 2); 5424 ops[2] = gen_rtx_REG (DImode, regno + 4); 5425 ops[3] = operands[1]; 5426 ops[4] = GEN_INT (lane); 5427 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3", 5428 ops); 5429 return ""; 5430} 5431 [(set_attr "type" "neon_load3_one_lane<q>")] 5432) 5433 5434;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 5435;; here on big endian targets. 5436(define_insn "neon_vld3_lane<mode>" 5437 [(set (match_operand:CI 0 "s_register_operand" "=w") 5438 (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um") 5439 (match_operand:CI 2 "s_register_operand" "0") 5440 (match_operand:SI 3 "immediate_operand" "i") 5441 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5442 UNSPEC_VLD3_LANE))] 5443 "TARGET_NEON" 5444{ 5445 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); 5446 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); 5447 int regno = REGNO (operands[0]); 5448 rtx ops[5]; 5449 if (lane >= max / 2) 5450 { 5451 lane -= max / 2; 5452 regno += 2; 5453 } 5454 ops[0] = gen_rtx_REG (DImode, regno); 5455 ops[1] = gen_rtx_REG (DImode, regno + 4); 5456 ops[2] = gen_rtx_REG (DImode, regno + 8); 5457 ops[3] = operands[1]; 5458 ops[4] = GEN_INT (lane); 5459 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3", 5460 ops); 5461 return ""; 5462} 5463 [(set_attr "type" "neon_load3_one_lane<q>")] 5464) 5465 5466(define_insn "neon_vld3_dup<mode>" 5467 [(set (match_operand:EI 0 "s_register_operand" "=w") 5468 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um") 5469 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5470 UNSPEC_VLD3_DUP))] 5471 "TARGET_NEON" 5472{ 5473 if (GET_MODE_NUNITS (<MODE>mode) > 1) 5474 { 5475 int regno = REGNO (operands[0]); 5476 rtx ops[4]; 5477 ops[0] = gen_rtx_REG (DImode, regno); 5478 ops[1] = gen_rtx_REG (DImode, regno + 2); 5479 ops[2] = gen_rtx_REG (DImode, regno + 4); 5480 ops[3] = operands[1]; 5481 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops); 5482 return ""; 5483 } 5484 else 5485 return "vld1.<V_sz_elem>\t%h0, %A1"; 5486} 5487 [(set (attr "type") 5488 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1")) 5489 (const_string "neon_load3_all_lanes<q>") 5490 (const_string "neon_load1_1reg<q>")))]) 5491 5492(define_insn "neon_vld3_dupv8bf" 5493 [(set (match_operand:CI 0 "s_register_operand" "=w") 5494 (unspec:CI [(match_operand:V2BF 1 "neon_struct_operand" "Um") 5495 (unspec:V8BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5496 UNSPEC_VLD2_DUP))] 5497 "TARGET_BF16_SIMD" 5498 { 5499 rtx ops[4]; 5500 int tabbase = REGNO (operands[0]); 5501 5502 ops[3] = operands[1]; 5503 ops[0] = gen_rtx_REG (V4BFmode, tabbase); 5504 ops[1] = gen_rtx_REG (V4BFmode, tabbase + 2); 5505 ops[2] = gen_rtx_REG (V4BFmode, tabbase + 4); 5506 output_asm_insn ("vld3.16\t{%P0[], %P1[], %P2[]}, %A3", ops); 5507 return ""; 5508 } 5509 [(set_attr "type" "neon_load3_all_lanes_q")] 5510) 5511 5512(define_expand "vec_store_lanesei<mode>" 5513 [(set (match_operand:EI 0 "neon_struct_operand") 5514 (unspec:EI [(match_operand:EI 1 "s_register_operand") 5515 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5516 UNSPEC_VST3))] 5517 "TARGET_NEON") 5518 5519(define_insn "neon_vst3<mode>" 5520 [(set (match_operand:EI 0 "neon_struct_operand" "=Um") 5521 (unspec:EI [(match_operand:EI 1 "s_register_operand" "w") 5522 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5523 UNSPEC_VST3))] 5524 "TARGET_NEON" 5525{ 5526 if (<V_sz_elem> == 64) 5527 return "vst1.64\t%h1, %A0"; 5528 else 5529 return "vst3.<V_sz_elem>\t%h1, %A0"; 5530} 5531 [(set (attr "type") 5532 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) 5533 (const_string "neon_store1_3reg<q>") 5534 (const_string "neon_store3_one_lane<q>")))]) 5535 5536(define_expand "vec_store_lanesci<mode>" 5537 [(match_operand:CI 0 "neon_struct_operand") 5538 (match_operand:CI 1 "s_register_operand") 5539 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5540 "TARGET_NEON" 5541{ 5542 emit_insn (gen_neon_vst3<mode> (operands[0], operands[1])); 5543 DONE; 5544}) 5545 5546(define_expand "neon_vst3<mode>" 5547 [(match_operand:CI 0 "neon_struct_operand") 5548 (match_operand:CI 1 "s_register_operand") 5549 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5550 "TARGET_NEON" 5551{ 5552 rtx mem; 5553 5554 mem = adjust_address (operands[0], EImode, 0); 5555 emit_insn (gen_neon_vst3qa<mode> (mem, operands[1])); 5556 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode)); 5557 emit_insn (gen_neon_vst3qb<mode> (mem, operands[1])); 5558 DONE; 5559}) 5560 5561(define_insn "neon_vst3qa<mode>" 5562 [(set (match_operand:EI 0 "neon_struct_operand" "=Um") 5563 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w") 5564 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5565 UNSPEC_VST3A))] 5566 "TARGET_NEON" 5567{ 5568 int regno = REGNO (operands[1]); 5569 rtx ops[4]; 5570 ops[0] = operands[0]; 5571 ops[1] = gen_rtx_REG (DImode, regno); 5572 ops[2] = gen_rtx_REG (DImode, regno + 4); 5573 ops[3] = gen_rtx_REG (DImode, regno + 8); 5574 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops); 5575 return ""; 5576} 5577 [(set_attr "type" "neon_store3_3reg<q>")] 5578) 5579 5580(define_insn "neon_vst3qb<mode>" 5581 [(set (match_operand:EI 0 "neon_struct_operand" "=Um") 5582 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w") 5583 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5584 UNSPEC_VST3B))] 5585 "TARGET_NEON" 5586{ 5587 int regno = REGNO (operands[1]); 5588 rtx ops[4]; 5589 ops[0] = operands[0]; 5590 ops[1] = gen_rtx_REG (DImode, regno + 2); 5591 ops[2] = gen_rtx_REG (DImode, regno + 6); 5592 ops[3] = gen_rtx_REG (DImode, regno + 10); 5593 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops); 5594 return ""; 5595} 5596 [(set_attr "type" "neon_store3_3reg<q>")] 5597) 5598 5599;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 5600;; here on big endian targets. 5601(define_insn "neon_vst3_lane<mode>" 5602 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um") 5603 (unspec:<V_three_elem> 5604 [(match_operand:EI 1 "s_register_operand" "w") 5605 (match_operand:SI 2 "immediate_operand" "i") 5606 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5607 UNSPEC_VST3_LANE))] 5608 "TARGET_NEON" 5609{ 5610 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); 5611 int regno = REGNO (operands[1]); 5612 rtx ops[5]; 5613 ops[0] = operands[0]; 5614 ops[1] = gen_rtx_REG (DImode, regno); 5615 ops[2] = gen_rtx_REG (DImode, regno + 2); 5616 ops[3] = gen_rtx_REG (DImode, regno + 4); 5617 ops[4] = GEN_INT (lane); 5618 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0", 5619 ops); 5620 return ""; 5621} 5622 [(set_attr "type" "neon_store3_one_lane<q>")] 5623) 5624 5625;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 5626;; here on big endian targets. 5627(define_insn "neon_vst3_lane<mode>" 5628 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um") 5629 (unspec:<V_three_elem> 5630 [(match_operand:CI 1 "s_register_operand" "w") 5631 (match_operand:SI 2 "immediate_operand" "i") 5632 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5633 UNSPEC_VST3_LANE))] 5634 "TARGET_NEON" 5635{ 5636 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); 5637 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); 5638 int regno = REGNO (operands[1]); 5639 rtx ops[5]; 5640 if (lane >= max / 2) 5641 { 5642 lane -= max / 2; 5643 regno += 2; 5644 } 5645 ops[0] = operands[0]; 5646 ops[1] = gen_rtx_REG (DImode, regno); 5647 ops[2] = gen_rtx_REG (DImode, regno + 4); 5648 ops[3] = gen_rtx_REG (DImode, regno + 8); 5649 ops[4] = GEN_INT (lane); 5650 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0", 5651 ops); 5652 return ""; 5653} 5654 [(set_attr "type" "neon_store3_one_lane<q>")] 5655) 5656 5657(define_expand "vec_load_lanesoi<mode>" 5658 [(set (match_operand:OI 0 "s_register_operand") 5659 (unspec:OI [(match_operand:OI 1 "neon_struct_operand") 5660 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5661 UNSPEC_VLD4))] 5662 "TARGET_NEON") 5663 5664(define_insn "neon_vld4<mode>" 5665 [(set (match_operand:OI 0 "s_register_operand" "=w") 5666 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um") 5667 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5668 UNSPEC_VLD4))] 5669 "TARGET_NEON" 5670{ 5671 if (<V_sz_elem> == 64) 5672 return "vld1.64\t%h0, %A1"; 5673 else 5674 return "vld4.<V_sz_elem>\t%h0, %A1"; 5675} 5676 [(set (attr "type") 5677 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) 5678 (const_string "neon_load1_4reg<q>") 5679 (const_string "neon_load4_4reg<q>")))] 5680) 5681 5682(define_expand "neon_vld4<mode>" 5683 [(match_operand:XI 0 "s_register_operand") 5684 (match_operand:XI 1 "neon_struct_operand") 5685 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5686 "TARGET_NEON" 5687{ 5688 rtx mem; 5689 5690 mem = adjust_address (operands[1], OImode, 0); 5691 emit_insn (gen_neon_vld4qa<mode> (operands[0], mem)); 5692 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode)); 5693 emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0])); 5694 DONE; 5695}) 5696 5697(define_insn "neon_vld4qa<mode>" 5698 [(set (match_operand:XI 0 "s_register_operand" "=w") 5699 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um") 5700 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5701 UNSPEC_VLD4A))] 5702 "TARGET_NEON" 5703{ 5704 int regno = REGNO (operands[0]); 5705 rtx ops[5]; 5706 ops[0] = gen_rtx_REG (DImode, regno); 5707 ops[1] = gen_rtx_REG (DImode, regno + 4); 5708 ops[2] = gen_rtx_REG (DImode, regno + 8); 5709 ops[3] = gen_rtx_REG (DImode, regno + 12); 5710 ops[4] = operands[1]; 5711 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops); 5712 return ""; 5713} 5714 [(set_attr "type" "neon_load4_4reg<q>")] 5715) 5716 5717(define_insn "neon_vld4qb<mode>" 5718 [(set (match_operand:XI 0 "s_register_operand" "=w") 5719 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um") 5720 (match_operand:XI 2 "s_register_operand" "0") 5721 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5722 UNSPEC_VLD4B))] 5723 "TARGET_NEON" 5724{ 5725 int regno = REGNO (operands[0]); 5726 rtx ops[5]; 5727 ops[0] = gen_rtx_REG (DImode, regno + 2); 5728 ops[1] = gen_rtx_REG (DImode, regno + 6); 5729 ops[2] = gen_rtx_REG (DImode, regno + 10); 5730 ops[3] = gen_rtx_REG (DImode, regno + 14); 5731 ops[4] = operands[1]; 5732 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops); 5733 return ""; 5734} 5735 [(set_attr "type" "neon_load4_4reg<q>")] 5736) 5737 5738;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 5739;; here on big endian targets. 5740(define_insn "neon_vld4_lane<mode>" 5741 [(set (match_operand:OI 0 "s_register_operand" "=w") 5742 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um") 5743 (match_operand:OI 2 "s_register_operand" "0") 5744 (match_operand:SI 3 "immediate_operand" "i") 5745 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5746 UNSPEC_VLD4_LANE))] 5747 "TARGET_NEON" 5748{ 5749 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); 5750 int regno = REGNO (operands[0]); 5751 rtx ops[6]; 5752 ops[0] = gen_rtx_REG (DImode, regno); 5753 ops[1] = gen_rtx_REG (DImode, regno + 2); 5754 ops[2] = gen_rtx_REG (DImode, regno + 4); 5755 ops[3] = gen_rtx_REG (DImode, regno + 6); 5756 ops[4] = operands[1]; 5757 ops[5] = GEN_INT (lane); 5758 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4", 5759 ops); 5760 return ""; 5761} 5762 [(set_attr "type" "neon_load4_one_lane<q>")] 5763) 5764 5765;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 5766;; here on big endian targets. 5767(define_insn "neon_vld4_lane<mode>" 5768 [(set (match_operand:XI 0 "s_register_operand" "=w") 5769 (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um") 5770 (match_operand:XI 2 "s_register_operand" "0") 5771 (match_operand:SI 3 "immediate_operand" "i") 5772 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5773 UNSPEC_VLD4_LANE))] 5774 "TARGET_NEON" 5775{ 5776 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); 5777 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); 5778 int regno = REGNO (operands[0]); 5779 rtx ops[6]; 5780 if (lane >= max / 2) 5781 { 5782 lane -= max / 2; 5783 regno += 2; 5784 } 5785 ops[0] = gen_rtx_REG (DImode, regno); 5786 ops[1] = gen_rtx_REG (DImode, regno + 4); 5787 ops[2] = gen_rtx_REG (DImode, regno + 8); 5788 ops[3] = gen_rtx_REG (DImode, regno + 12); 5789 ops[4] = operands[1]; 5790 ops[5] = GEN_INT (lane); 5791 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4", 5792 ops); 5793 return ""; 5794} 5795 [(set_attr "type" "neon_load4_one_lane<q>")] 5796) 5797 5798(define_insn "neon_vld4_dup<mode>" 5799 [(set (match_operand:OI 0 "s_register_operand" "=w") 5800 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um") 5801 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5802 UNSPEC_VLD4_DUP))] 5803 "TARGET_NEON" 5804{ 5805 if (GET_MODE_NUNITS (<MODE>mode) > 1) 5806 { 5807 int regno = REGNO (operands[0]); 5808 rtx ops[5]; 5809 ops[0] = gen_rtx_REG (DImode, regno); 5810 ops[1] = gen_rtx_REG (DImode, regno + 2); 5811 ops[2] = gen_rtx_REG (DImode, regno + 4); 5812 ops[3] = gen_rtx_REG (DImode, regno + 6); 5813 ops[4] = operands[1]; 5814 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4", 5815 ops); 5816 return ""; 5817 } 5818 else 5819 return "vld1.<V_sz_elem>\t%h0, %A1"; 5820} 5821 [(set (attr "type") 5822 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1")) 5823 (const_string "neon_load4_all_lanes<q>") 5824 (const_string "neon_load1_1reg<q>")))] 5825) 5826 5827(define_insn "neon_vld4_dupv8bf" 5828 [(set (match_operand:XI 0 "s_register_operand" "=w") 5829 (unspec:XI [(match_operand:V2BF 1 "neon_struct_operand" "Um") 5830 (unspec:V8BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5831 UNSPEC_VLD2_DUP))] 5832 "TARGET_BF16_SIMD" 5833 { 5834 rtx ops[5]; 5835 int tabbase = REGNO (operands[0]); 5836 5837 ops[4] = operands[1]; 5838 ops[0] = gen_rtx_REG (V4BFmode, tabbase); 5839 ops[1] = gen_rtx_REG (V4BFmode, tabbase + 2); 5840 ops[2] = gen_rtx_REG (V4BFmode, tabbase + 4); 5841 ops[3] = gen_rtx_REG (V4BFmode, tabbase + 6); 5842 output_asm_insn ("vld4.16\t{%P0[], %P1[], %P2[], %P3[]}, %A4", ops); 5843 return ""; 5844 } 5845 [(set_attr "type" "neon_load4_all_lanes_q")] 5846) 5847 5848(define_expand "vec_store_lanesoi<mode>" 5849 [(set (match_operand:OI 0 "neon_struct_operand") 5850 (unspec:OI [(match_operand:OI 1 "s_register_operand") 5851 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5852 UNSPEC_VST4))] 5853 "TARGET_NEON") 5854 5855(define_insn "neon_vst4<mode>" 5856 [(set (match_operand:OI 0 "neon_struct_operand" "=Um") 5857 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w") 5858 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5859 UNSPEC_VST4))] 5860 "TARGET_NEON" 5861{ 5862 if (<V_sz_elem> == 64) 5863 return "vst1.64\t%h1, %A0"; 5864 else 5865 return "vst4.<V_sz_elem>\t%h1, %A0"; 5866} 5867 [(set (attr "type") 5868 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) 5869 (const_string "neon_store1_4reg<q>") 5870 (const_string "neon_store4_4reg<q>")))] 5871) 5872 5873(define_expand "neon_vst4<mode>" 5874 [(match_operand:XI 0 "neon_struct_operand") 5875 (match_operand:XI 1 "s_register_operand") 5876 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5877 "TARGET_NEON" 5878{ 5879 rtx mem; 5880 5881 mem = adjust_address (operands[0], OImode, 0); 5882 emit_insn (gen_neon_vst4qa<mode> (mem, operands[1])); 5883 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode)); 5884 emit_insn (gen_neon_vst4qb<mode> (mem, operands[1])); 5885 DONE; 5886}) 5887 5888(define_insn "neon_vst4qa<mode>" 5889 [(set (match_operand:OI 0 "neon_struct_operand" "=Um") 5890 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w") 5891 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5892 UNSPEC_VST4A))] 5893 "TARGET_NEON" 5894{ 5895 int regno = REGNO (operands[1]); 5896 rtx ops[5]; 5897 ops[0] = operands[0]; 5898 ops[1] = gen_rtx_REG (DImode, regno); 5899 ops[2] = gen_rtx_REG (DImode, regno + 4); 5900 ops[3] = gen_rtx_REG (DImode, regno + 8); 5901 ops[4] = gen_rtx_REG (DImode, regno + 12); 5902 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops); 5903 return ""; 5904} 5905 [(set_attr "type" "neon_store4_4reg<q>")] 5906) 5907 5908(define_insn "neon_vst4qb<mode>" 5909 [(set (match_operand:OI 0 "neon_struct_operand" "=Um") 5910 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w") 5911 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5912 UNSPEC_VST4B))] 5913 "TARGET_NEON" 5914{ 5915 int regno = REGNO (operands[1]); 5916 rtx ops[5]; 5917 ops[0] = operands[0]; 5918 ops[1] = gen_rtx_REG (DImode, regno + 2); 5919 ops[2] = gen_rtx_REG (DImode, regno + 6); 5920 ops[3] = gen_rtx_REG (DImode, regno + 10); 5921 ops[4] = gen_rtx_REG (DImode, regno + 14); 5922 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops); 5923 return ""; 5924} 5925 [(set_attr "type" "neon_store4_4reg<q>")] 5926) 5927 5928;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 5929;; here on big endian targets. 5930(define_insn "neon_vst4_lane<mode>" 5931 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um") 5932 (unspec:<V_four_elem> 5933 [(match_operand:OI 1 "s_register_operand" "w") 5934 (match_operand:SI 2 "immediate_operand" "i") 5935 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5936 UNSPEC_VST4_LANE))] 5937 "TARGET_NEON" 5938{ 5939 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); 5940 int regno = REGNO (operands[1]); 5941 rtx ops[6]; 5942 ops[0] = operands[0]; 5943 ops[1] = gen_rtx_REG (DImode, regno); 5944 ops[2] = gen_rtx_REG (DImode, regno + 2); 5945 ops[3] = gen_rtx_REG (DImode, regno + 4); 5946 ops[4] = gen_rtx_REG (DImode, regno + 6); 5947 ops[5] = GEN_INT (lane); 5948 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0", 5949 ops); 5950 return ""; 5951} 5952 [(set_attr "type" "neon_store4_one_lane<q>")] 5953) 5954 5955;; see comment on neon_vld1_lane for reason why the lane numbers are reversed 5956;; here on big endian targets. 5957(define_insn "neon_vst4_lane<mode>" 5958 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um") 5959 (unspec:<V_four_elem> 5960 [(match_operand:XI 1 "s_register_operand" "w") 5961 (match_operand:SI 2 "immediate_operand" "i") 5962 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5963 UNSPEC_VST4_LANE))] 5964 "TARGET_NEON" 5965{ 5966 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); 5967 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); 5968 int regno = REGNO (operands[1]); 5969 rtx ops[6]; 5970 if (lane >= max / 2) 5971 { 5972 lane -= max / 2; 5973 regno += 2; 5974 } 5975 ops[0] = operands[0]; 5976 ops[1] = gen_rtx_REG (DImode, regno); 5977 ops[2] = gen_rtx_REG (DImode, regno + 4); 5978 ops[3] = gen_rtx_REG (DImode, regno + 8); 5979 ops[4] = gen_rtx_REG (DImode, regno + 12); 5980 ops[5] = GEN_INT (lane); 5981 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0", 5982 ops); 5983 return ""; 5984} 5985 [(set_attr "type" "neon_store4_4reg<q>")] 5986) 5987 5988(define_insn "neon_vec_unpack<US>_lo_<mode>" 5989 [(set (match_operand:<V_unpack> 0 "register_operand" "=w") 5990 (SE:<V_unpack> (vec_select:<V_HALF> 5991 (match_operand:VU 1 "register_operand" "w") 5992 (match_operand:VU 2 "vect_par_constant_low" ""))))] 5993 "TARGET_NEON && !BYTES_BIG_ENDIAN" 5994 "vmovl.<US><V_sz_elem> %q0, %e1" 5995 [(set_attr "type" "neon_shift_imm_long")] 5996) 5997 5998(define_insn "neon_vec_unpack<US>_hi_<mode>" 5999 [(set (match_operand:<V_unpack> 0 "register_operand" "=w") 6000 (SE:<V_unpack> (vec_select:<V_HALF> 6001 (match_operand:VU 1 "register_operand" "w") 6002 (match_operand:VU 2 "vect_par_constant_high" ""))))] 6003 "TARGET_NEON && !BYTES_BIG_ENDIAN" 6004 "vmovl.<US><V_sz_elem> %q0, %f1" 6005 [(set_attr "type" "neon_shift_imm_long")] 6006) 6007 6008(define_expand "vec_unpack<US>_hi_<mode>" 6009 [(match_operand:<V_unpack> 0 "register_operand") 6010 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))] 6011 "TARGET_NEON && !BYTES_BIG_ENDIAN" 6012 { 6013 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ; 6014 rtx t1; 6015 int i; 6016 for (i = 0; i < (<V_mode_nunits>/2); i++) 6017 RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i); 6018 6019 t1 = gen_rtx_PARALLEL (<MODE>mode, v); 6020 emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0], 6021 operands[1], 6022 t1)); 6023 DONE; 6024 } 6025) 6026 6027(define_expand "vec_unpack<US>_lo_<mode>" 6028 [(match_operand:<V_unpack> 0 "register_operand") 6029 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))] 6030 "TARGET_NEON && !BYTES_BIG_ENDIAN" 6031 { 6032 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ; 6033 rtx t1; 6034 int i; 6035 for (i = 0; i < (<V_mode_nunits>/2) ; i++) 6036 RTVEC_ELT (v, i) = GEN_INT (i); 6037 t1 = gen_rtx_PARALLEL (<MODE>mode, v); 6038 emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0], 6039 operands[1], 6040 t1)); 6041 DONE; 6042 } 6043) 6044 6045(define_insn "neon_vec_<US>mult_lo_<mode>" 6046 [(set (match_operand:<V_unpack> 0 "register_operand" "=w") 6047 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF> 6048 (match_operand:VU 1 "register_operand" "w") 6049 (match_operand:VU 2 "vect_par_constant_low" ""))) 6050 (SE:<V_unpack> (vec_select:<V_HALF> 6051 (match_operand:VU 3 "register_operand" "w") 6052 (match_dup 2)))))] 6053 "TARGET_NEON && !BYTES_BIG_ENDIAN" 6054 "vmull.<US><V_sz_elem> %q0, %e1, %e3" 6055 [(set_attr "type" "neon_mul_<V_elem_ch>_long")] 6056) 6057 6058(define_expand "vec_widen_<US>mult_lo_<mode>" 6059 [(match_operand:<V_unpack> 0 "register_operand") 6060 (SE:<V_unpack> (match_operand:VU 1 "register_operand")) 6061 (SE:<V_unpack> (match_operand:VU 2 "register_operand"))] 6062 "TARGET_NEON && !BYTES_BIG_ENDIAN" 6063 { 6064 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ; 6065 rtx t1; 6066 int i; 6067 for (i = 0; i < (<V_mode_nunits>/2) ; i++) 6068 RTVEC_ELT (v, i) = GEN_INT (i); 6069 t1 = gen_rtx_PARALLEL (<MODE>mode, v); 6070 6071 emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0], 6072 operands[1], 6073 t1, 6074 operands[2])); 6075 DONE; 6076 } 6077) 6078 6079(define_insn "neon_vec_<US>mult_hi_<mode>" 6080 [(set (match_operand:<V_unpack> 0 "register_operand" "=w") 6081 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF> 6082 (match_operand:VU 1 "register_operand" "w") 6083 (match_operand:VU 2 "vect_par_constant_high" ""))) 6084 (SE:<V_unpack> (vec_select:<V_HALF> 6085 (match_operand:VU 3 "register_operand" "w") 6086 (match_dup 2)))))] 6087 "TARGET_NEON && !BYTES_BIG_ENDIAN" 6088 "vmull.<US><V_sz_elem> %q0, %f1, %f3" 6089 [(set_attr "type" "neon_mul_<V_elem_ch>_long")] 6090) 6091 6092(define_expand "vec_widen_<US>mult_hi_<mode>" 6093 [(match_operand:<V_unpack> 0 "register_operand") 6094 (SE:<V_unpack> (match_operand:VU 1 "register_operand")) 6095 (SE:<V_unpack> (match_operand:VU 2 "register_operand"))] 6096 "TARGET_NEON && !BYTES_BIG_ENDIAN" 6097 { 6098 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ; 6099 rtx t1; 6100 int i; 6101 for (i = 0; i < (<V_mode_nunits>/2) ; i++) 6102 RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i); 6103 t1 = gen_rtx_PARALLEL (<MODE>mode, v); 6104 6105 emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0], 6106 operands[1], 6107 t1, 6108 operands[2])); 6109 DONE; 6110 6111 } 6112) 6113 6114(define_insn "neon_vec_<US>shiftl_<mode>" 6115 [(set (match_operand:<V_widen> 0 "register_operand" "=w") 6116 (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w") 6117 (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))] 6118 "TARGET_NEON" 6119{ 6120 return "vshll.<US><V_sz_elem> %q0, %P1, %2"; 6121} 6122 [(set_attr "type" "neon_shift_imm_long")] 6123) 6124 6125(define_expand "vec_widen_<US>shiftl_lo_<mode>" 6126 [(match_operand:<V_unpack> 0 "register_operand") 6127 (SE:<V_unpack> (match_operand:VU 1 "register_operand")) 6128 (match_operand:SI 2 "immediate_operand")] 6129 "TARGET_NEON && !BYTES_BIG_ENDIAN" 6130 { 6131 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0], 6132 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0), 6133 operands[2])); 6134 DONE; 6135 } 6136) 6137 6138(define_expand "vec_widen_<US>shiftl_hi_<mode>" 6139 [(match_operand:<V_unpack> 0 "register_operand") 6140 (SE:<V_unpack> (match_operand:VU 1 "register_operand")) 6141 (match_operand:SI 2 "immediate_operand")] 6142 "TARGET_NEON && !BYTES_BIG_ENDIAN" 6143 { 6144 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0], 6145 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 6146 GET_MODE_SIZE (<V_HALF>mode)), 6147 operands[2])); 6148 DONE; 6149 } 6150) 6151 6152;; Vectorize for non-neon-quad case 6153(define_insn "neon_unpack<US>_<mode>" 6154 [(set (match_operand:<V_widen> 0 "register_operand" "=w") 6155 (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))] 6156 "TARGET_NEON" 6157 "vmovl.<US><V_sz_elem> %q0, %P1" 6158 [(set_attr "type" "neon_move")] 6159) 6160 6161(define_expand "vec_unpack<US>_lo_<mode>" 6162 [(match_operand:<V_double_width> 0 "register_operand") 6163 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))] 6164 "TARGET_NEON" 6165{ 6166 rtx tmpreg = gen_reg_rtx (<V_widen>mode); 6167 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1])); 6168 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg)); 6169 6170 DONE; 6171} 6172) 6173 6174(define_expand "vec_unpack<US>_hi_<mode>" 6175 [(match_operand:<V_double_width> 0 "register_operand") 6176 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))] 6177 "TARGET_NEON" 6178{ 6179 rtx tmpreg = gen_reg_rtx (<V_widen>mode); 6180 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1])); 6181 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg)); 6182 6183 DONE; 6184} 6185) 6186 6187(define_insn "neon_vec_<US>mult_<mode>" 6188 [(set (match_operand:<V_widen> 0 "register_operand" "=w") 6189 (mult:<V_widen> (SE:<V_widen> 6190 (match_operand:VDI 1 "register_operand" "w")) 6191 (SE:<V_widen> 6192 (match_operand:VDI 2 "register_operand" "w"))))] 6193 "TARGET_NEON" 6194 "vmull.<US><V_sz_elem> %q0, %P1, %P2" 6195 [(set_attr "type" "neon_mul_<V_elem_ch>_long")] 6196) 6197 6198(define_expand "vec_widen_<US>mult_hi_<mode>" 6199 [(match_operand:<V_double_width> 0 "register_operand") 6200 (SE:<V_double_width> (match_operand:VDI 1 "register_operand")) 6201 (SE:<V_double_width> (match_operand:VDI 2 "register_operand"))] 6202 "TARGET_NEON" 6203 { 6204 rtx tmpreg = gen_reg_rtx (<V_widen>mode); 6205 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2])); 6206 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg)); 6207 6208 DONE; 6209 6210 } 6211) 6212 6213(define_expand "vec_widen_<US>mult_lo_<mode>" 6214 [(match_operand:<V_double_width> 0 "register_operand") 6215 (SE:<V_double_width> (match_operand:VDI 1 "register_operand")) 6216 (SE:<V_double_width> (match_operand:VDI 2 "register_operand"))] 6217 "TARGET_NEON" 6218 { 6219 rtx tmpreg = gen_reg_rtx (<V_widen>mode); 6220 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2])); 6221 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg)); 6222 6223 DONE; 6224 6225 } 6226) 6227 6228(define_expand "vec_widen_<US>shiftl_hi_<mode>" 6229 [(match_operand:<V_double_width> 0 "register_operand") 6230 (SE:<V_double_width> (match_operand:VDI 1 "register_operand")) 6231 (match_operand:SI 2 "immediate_operand")] 6232 "TARGET_NEON" 6233 { 6234 rtx tmpreg = gen_reg_rtx (<V_widen>mode); 6235 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2])); 6236 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg)); 6237 6238 DONE; 6239 } 6240) 6241 6242(define_expand "vec_widen_<US>shiftl_lo_<mode>" 6243 [(match_operand:<V_double_width> 0 "register_operand") 6244 (SE:<V_double_width> (match_operand:VDI 1 "register_operand")) 6245 (match_operand:SI 2 "immediate_operand")] 6246 "TARGET_NEON" 6247 { 6248 rtx tmpreg = gen_reg_rtx (<V_widen>mode); 6249 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2])); 6250 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg)); 6251 6252 DONE; 6253 } 6254) 6255 6256; FIXME: These instruction patterns can't be used safely in big-endian mode 6257; because the ordering of vector elements in Q registers is different from what 6258; the semantics of the instructions require. 6259 6260(define_insn "vec_pack_trunc_<mode>" 6261 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w") 6262 (vec_concat:<V_narrow_pack> 6263 (truncate:<V_narrow> 6264 (match_operand:VN 1 "register_operand" "w")) 6265 (truncate:<V_narrow> 6266 (match_operand:VN 2 "register_operand" "w"))))] 6267 "TARGET_NEON && !BYTES_BIG_ENDIAN" 6268 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2" 6269 [(set_attr "type" "multiple") 6270 (set_attr "length" "8")] 6271) 6272 6273;; For the non-quad case. 6274(define_insn "neon_vec_pack_trunc_<mode>" 6275 [(set (match_operand:<V_narrow> 0 "register_operand" "=w") 6276 (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))] 6277 "TARGET_NEON && !BYTES_BIG_ENDIAN" 6278 "vmovn.i<V_sz_elem>\t%P0, %q1" 6279 [(set_attr "type" "neon_move_narrow_q")] 6280) 6281 6282(define_expand "vec_pack_trunc_<mode>" 6283 [(match_operand:<V_narrow_pack> 0 "register_operand") 6284 (match_operand:VSHFT 1 "register_operand") 6285 (match_operand:VSHFT 2 "register_operand")] 6286 "TARGET_NEON && !BYTES_BIG_ENDIAN" 6287{ 6288 rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode); 6289 6290 emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1])); 6291 emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2])); 6292 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg)); 6293 DONE; 6294}) 6295 6296(define_insn "neon_vabd<mode>_2" 6297 [(set (match_operand:VF 0 "s_register_operand" "=w") 6298 (abs:VF (minus:VF (match_operand:VF 1 "s_register_operand" "w") 6299 (match_operand:VF 2 "s_register_operand" "w"))))] 6300 "ARM_HAVE_NEON_<MODE>_ARITH" 6301 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2" 6302 [(set_attr "type" "neon_fp_abd_s<q>")] 6303) 6304 6305(define_insn "neon_vabd<mode>_3" 6306 [(set (match_operand:VF 0 "s_register_operand" "=w") 6307 (abs:VF (unspec:VF [(match_operand:VF 1 "s_register_operand" "w") 6308 (match_operand:VF 2 "s_register_operand" "w")] 6309 UNSPEC_VSUB)))] 6310 "ARM_HAVE_NEON_<MODE>_ARITH" 6311 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2" 6312 [(set_attr "type" "neon_fp_abd_s<q>")] 6313) 6314 6315(define_insn "neon_<sup>mmlav16qi" 6316 [(set (match_operand:V4SI 0 "register_operand" "=w") 6317 (plus:V4SI 6318 (unspec:V4SI [(match_operand:V16QI 2 "register_operand" "w") 6319 (match_operand:V16QI 3 "register_operand" "w")] MATMUL) 6320 (match_operand:V4SI 1 "register_operand" "0")))] 6321 "TARGET_I8MM" 6322 "v<sup>mmla.<mmla_sfx>\t%q0, %q2, %q3" 6323 [(set_attr "type" "neon_mla_s_q")] 6324) 6325 6326(define_insn "neon_vbfdot<VCVTF:mode>" 6327 [(set (match_operand:VCVTF 0 "register_operand" "=w") 6328 (plus:VCVTF (match_operand:VCVTF 1 "register_operand" "0") 6329 (unspec:VCVTF [ 6330 (match_operand:<VSF2BF> 2 "register_operand" "w") 6331 (match_operand:<VSF2BF> 3 "register_operand" "w")] 6332 UNSPEC_DOT_S)))] 6333 "TARGET_BF16_SIMD" 6334 "vdot.bf16\\t%<V_reg>0, %<V_reg>2, %<V_reg>3" 6335 [(set_attr "type" "neon_dot<q>")] 6336) 6337 6338(define_insn "neon_vbfdot_lanev4bf<VCVTF:mode>" 6339 [(set (match_operand:VCVTF 0 "register_operand" "=w") 6340 (plus:VCVTF (match_operand:VCVTF 1 "register_operand" "0") 6341 (unspec:VCVTF [ 6342 (match_operand:<VSF2BF> 2 "register_operand" "w") 6343 (match_operand:V4BF 3 "register_operand" "x") 6344 (match_operand:SI 4 "immediate_operand" "i")] 6345 UNSPEC_DOT_S)))] 6346 "TARGET_BF16_SIMD" 6347 "vdot.bf16\\t%<V_reg>0, %<V_reg>2, %P3[%c4]" 6348 [(set_attr "type" "neon_dot<q>")] 6349) 6350 6351(define_insn "neon_vbfdot_lanev8bf<VCVTF:mode>" 6352 [(set (match_operand:VCVTF 0 "register_operand" "=w") 6353 (plus:VCVTF (match_operand:VCVTF 1 "register_operand" "0") 6354 (unspec:VCVTF [ 6355 (match_operand:<VSF2BF> 2 "register_operand" "w") 6356 (match_operand:V8BF 3 "register_operand" "x") 6357 (match_operand:SI 4 "immediate_operand" "i")] 6358 UNSPEC_DOT_S)))] 6359 "TARGET_BF16_SIMD" 6360 { 6361 int lane = INTVAL (operands[4]); 6362 int half = GET_MODE_NUNITS (GET_MODE (operands[3])) / 4; 6363 if (lane < half) 6364 return "vdot.bf16\\t%<V_reg>0, %<V_reg>2, %e3[%c4]"; 6365 else 6366 { 6367 operands[4] = GEN_INT (lane - half); 6368 return "vdot.bf16\\t%<V_reg>0, %<V_reg>2, %f3[%c4]"; 6369 } 6370 } 6371 [(set_attr "type" "neon_dot<q>")] 6372) 6373 6374(define_insn "neon_vbfcvtv4sf<VBFCVT:mode>" 6375 [(set (match_operand:VBFCVT 0 "register_operand" "=w") 6376 (unspec:VBFCVT [(match_operand:V4SF 1 "register_operand" "w")] 6377 UNSPEC_BFCVT))] 6378 "TARGET_BF16_SIMD" 6379 "vcvt.bf16.f32\\t%<V_bf_low>0, %q1" 6380 [(set_attr "type" "neon_fp_cvt_narrow_s_q")] 6381) 6382 6383(define_insn "neon_vbfcvtv4sf_highv8bf" 6384 [(set (match_operand:V8BF 0 "register_operand" "=w") 6385 (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "0") 6386 (match_operand:V4SF 2 "register_operand" "w")] 6387 UNSPEC_BFCVT_HIGH))] 6388 "TARGET_BF16_SIMD" 6389 "vcvt.bf16.f32\\t%f0, %q2" 6390 [(set_attr "type" "neon_fp_cvt_narrow_s_q")] 6391) 6392 6393(define_insn "neon_vbfcvtsf" 6394 [(set (match_operand:BF 0 "register_operand" "=t") 6395 (unspec:BF [(match_operand:SF 1 "register_operand" "t")] 6396 UNSPEC_BFCVT))] 6397 "TARGET_BF16_FP" 6398 "vcvtb.bf16.f32\\t%0, %1" 6399 [(set_attr "type" "f_cvt")] 6400) 6401 6402(define_insn "neon_vbfcvt<VBFCVT:mode>" 6403 [(set (match_operand:V4SF 0 "register_operand" "=w") 6404 (unspec:V4SF [(match_operand:VBFCVT 1 "register_operand" "w")] 6405 UNSPEC_BFCVT))] 6406 "TARGET_BF16_SIMD" 6407 "vshll.u32\\t%q0, %<V_bf_low>1, #16" 6408 [(set_attr "type" "neon_shift_imm_q")] 6409) 6410 6411(define_insn "neon_vbfcvt_highv8bf" 6412 [(set (match_operand:V4SF 0 "register_operand" "=w") 6413 (unspec:V4SF [(match_operand:V8BF 1 "register_operand" "w")] 6414 UNSPEC_BFCVT_HIGH))] 6415 "TARGET_BF16_SIMD" 6416 "vshll.u32\\t%q0, %f1, #16" 6417 [(set_attr "type" "neon_shift_imm_q")] 6418) 6419 6420;; Convert a BF scalar operand to SF via VSHL. 6421;; VSHL doesn't accept 32-bit registers where the BF and SF scalar operands 6422;; would be allocated, therefore the operands must be converted to intermediate 6423;; vectors (i.e. V2SI) in order to apply 64-bit registers. 6424(define_expand "neon_vbfcvtbf" 6425 [(match_operand:SF 0 "register_operand") 6426 (unspec:SF [(match_operand:BF 1 "register_operand")] UNSPEC_BFCVT)] 6427 "TARGET_BF16_FP" 6428{ 6429 rtx op0 = gen_reg_rtx (V2SImode); 6430 rtx op1 = gen_reg_rtx (V2SImode); 6431 emit_insn (gen_neon_vbfcvtbf_cvtmodev2si (op1, operands[1])); 6432 emit_insn (gen_neon_vshl_nv2si (op0, op1, gen_int_mode(16, SImode))); 6433 emit_insn (gen_neon_vbfcvtbf_cvtmodesf (operands[0], op0)); 6434 DONE; 6435}) 6436 6437;; Convert BF mode to V2SI and V2SI to SF. 6438;; Implement this by allocating a 32-bit operand in the low half of a 64-bit 6439;; register indexed by a 32-bit sub-register number. 6440;; This will generate reloads but compiler can optimize out the moves. 6441;; Use 'x' constraint to guarantee the 32-bit sub-registers in an indexable 6442;; range so that to avoid extra moves. 6443(define_insn "neon_vbfcvtbf_cvtmode<mode>" 6444 [(set (match_operand:VBFCVTM 0 "register_operand" "=x") 6445 (unspec:VBFCVTM [(match_operand:<V_bf_cvt_m> 1 "register_operand" "0")] 6446 UNSPEC_BFCVT))] 6447 "TARGET_BF16_FP" 6448 "" 6449) 6450 6451(define_insn "neon_vmmlav8bf" 6452 [(set (match_operand:V4SF 0 "register_operand" "=w") 6453 (plus:V4SF (match_operand:V4SF 1 "register_operand" "0") 6454 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w") 6455 (match_operand:V8BF 3 "register_operand" "w")] 6456 UNSPEC_BFMMLA)))] 6457 "TARGET_BF16_SIMD" 6458 "vmmla.bf16\\t%q0, %q2, %q3" 6459 [(set_attr "type" "neon_fp_mla_s_q")] 6460) 6461 6462(define_insn "neon_vfma<bt>v8bf" 6463 [(set (match_operand:V4SF 0 "register_operand" "=w") 6464 (plus: V4SF (match_operand:V4SF 1 "register_operand" "0") 6465 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w") 6466 (match_operand:V8BF 3 "register_operand" "w")] 6467 BF_MA)))] 6468 "TARGET_BF16_SIMD" 6469 "vfma<bt>.bf16\\t%q0, %q2, %q3" 6470 [(set_attr "type" "neon_fp_mla_s_q")] 6471) 6472 6473(define_insn "neon_vfma<bt>_lanev8bf" 6474 [(set (match_operand:V4SF 0 "register_operand" "=w") 6475 (plus: V4SF (match_operand:V4SF 1 "register_operand" "0") 6476 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w") 6477 (match_operand:V4BF 3 "register_operand" "x") 6478 (match_operand:SI 4 "const_int_operand" "n")] 6479 BF_MA)))] 6480 "TARGET_BF16_SIMD" 6481 "vfma<bt>.bf16\\t%q0, %q2, %P3[%c4]" 6482 [(set_attr "type" "neon_fp_mla_s_scalar_q")] 6483) 6484 6485(define_expand "neon_vfma<bt>_laneqv8bf" 6486 [(set (match_operand:V4SF 0 "register_operand" "=w") 6487 (plus: V4SF (match_operand:V4SF 1 "register_operand" "0") 6488 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w") 6489 (match_operand:V8BF 3 "register_operand" "x") 6490 (match_operand:SI 4 "const_int_operand" "n")] 6491 BF_MA)))] 6492 "TARGET_BF16_SIMD" 6493 { 6494 int lane = INTVAL (operands[4]); 6495 gcc_assert (IN_RANGE(lane, 0, 7)); 6496 if (lane < 4) 6497 { 6498 emit_insn (gen_neon_vfma<bt>_lanev8bf (operands[0], operands[1], operands[2], operands[3], operands[4])); 6499 } 6500 else 6501 { 6502 rtx op_highpart = gen_reg_rtx (V4BFmode); 6503 emit_insn (gen_neon_vget_highv8bf (op_highpart, operands[3])); 6504 operands[4] = GEN_INT (lane - 4); 6505 emit_insn (gen_neon_vfma<bt>_lanev8bf (operands[0], operands[1], operands[2], op_highpart, operands[4])); 6506 } 6507 DONE; 6508 } 6509 [(set_attr "type" "neon_fp_mla_s_scalar_q")] 6510) 6511