1 /* Decompose multiword subregs.
2    Copyright (C) 2007-2022 Free Software Foundation, Inc.
3    Contributed by Richard Henderson <rth@redhat.com>
4                       Ian Lance Taylor <iant@google.com>
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12 
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "cfghooks.h"
29 #include "df.h"
30 #include "memmodel.h"
31 #include "tm_p.h"
32 #include "expmed.h"
33 #include "insn-config.h"
34 #include "emit-rtl.h"
35 #include "recog.h"
36 #include "cfgrtl.h"
37 #include "cfgbuild.h"
38 #include "dce.h"
39 #include "expr.h"
40 #include "tree-pass.h"
41 #include "lower-subreg.h"
42 #include "rtl-iter.h"
43 #include "target.h"
44 
45 
46 /* Decompose multi-word pseudo-registers into individual
47    pseudo-registers when possible and profitable.  This is possible
48    when all the uses of a multi-word register are via SUBREG, or are
49    copies of the register to another location.  Breaking apart the
50    register permits more CSE and permits better register allocation.
51    This is profitable if the machine does not have move instructions
52    to do this.
53 
54    This pass only splits moves with modes that are wider than
55    word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with
56    integer modes that are twice the width of word_mode.  The latter
57    could be generalized if there was a need to do this, but the trend in
58    architectures is to not need this.
59 
60    There are two useful preprocessor defines for use by maintainers:
61 
62    #define LOG_COSTS 1
63 
64    if you wish to see the actual cost estimates that are being used
65    for each mode wider than word mode and the cost estimates for zero
66    extension and the shifts.   This can be useful when port maintainers
67    are tuning insn rtx costs.
68 
69    #define FORCE_LOWERING 1
70 
71    if you wish to test the pass with all the transformation forced on.
72    This can be useful for finding bugs in the transformations.  */
73 
74 #define LOG_COSTS 0
75 #define FORCE_LOWERING 0
76 
77 /* Bit N in this bitmap is set if regno N is used in a context in
78    which we can decompose it.  */
79 static bitmap decomposable_context;
80 
81 /* Bit N in this bitmap is set if regno N is used in a context in
82    which it cannot be decomposed.  */
83 static bitmap non_decomposable_context;
84 
85 /* Bit N in this bitmap is set if regno N is used in a subreg
86    which changes the mode but not the size.  This typically happens
87    when the register accessed as a floating-point value; we want to
88    avoid generating accesses to its subwords in integer modes.  */
89 static bitmap subreg_context;
90 
91 /* Bit N in the bitmap in element M of this array is set if there is a
92    copy from reg M to reg N.  */
93 static vec<bitmap> reg_copy_graph;
94 
95 struct target_lower_subreg default_target_lower_subreg;
96 #if SWITCHABLE_TARGET
97 struct target_lower_subreg *this_target_lower_subreg
98   = &default_target_lower_subreg;
99 #endif
100 
101 #define twice_word_mode \
102   this_target_lower_subreg->x_twice_word_mode
103 #define choices \
104   this_target_lower_subreg->x_choices
105 
106 /* Return true if MODE is a mode we know how to lower.  When returning true,
107    store its byte size in *BYTES and its word size in *WORDS.  */
108 
109 static inline bool
interesting_mode_p(machine_mode mode,unsigned int * bytes,unsigned int * words)110 interesting_mode_p (machine_mode mode, unsigned int *bytes,
111                         unsigned int *words)
112 {
113   if (!GET_MODE_SIZE (mode).is_constant (bytes))
114     return false;
115   *words = CEIL (*bytes, UNITS_PER_WORD);
116   return true;
117 }
118 
119 /* RTXes used while computing costs.  */
120 struct cost_rtxes {
121   /* Source and target registers.  */
122   rtx source;
123   rtx target;
124 
125   /* A twice_word_mode ZERO_EXTEND of SOURCE.  */
126   rtx zext;
127 
128   /* A shift of SOURCE.  */
129   rtx shift;
130 
131   /* A SET of TARGET.  */
132   rtx set;
133 };
134 
135 /* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
136    rtxes in RTXES.  SPEED_P selects between the speed and size cost.  */
137 
138 static int
shift_cost(bool speed_p,struct cost_rtxes * rtxes,enum rtx_code code,machine_mode mode,int op1)139 shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
140               machine_mode mode, int op1)
141 {
142   PUT_CODE (rtxes->shift, code);
143   PUT_MODE (rtxes->shift, mode);
144   PUT_MODE (rtxes->source, mode);
145   XEXP (rtxes->shift, 1) = gen_int_shift_amount (mode, op1);
146   return set_src_cost (rtxes->shift, mode, speed_p);
147 }
148 
149 /* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
150    to true if it is profitable to split a double-word CODE shift
151    of X + BITS_PER_WORD bits.  SPEED_P says whether we are testing
152    for speed or size profitability.
153 
154    Use the rtxes in RTXES to calculate costs.  WORD_MOVE_ZERO_COST is
155    the cost of moving zero into a word-mode register.  WORD_MOVE_COST
156    is the cost of moving between word registers.  */
157 
158 static void
compute_splitting_shift(bool speed_p,struct cost_rtxes * rtxes,bool * splitting,enum rtx_code code,int word_move_zero_cost,int word_move_cost)159 compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
160                                bool *splitting, enum rtx_code code,
161                                int word_move_zero_cost, int word_move_cost)
162 {
163   int wide_cost, narrow_cost, upper_cost, i;
164 
165   for (i = 0; i < BITS_PER_WORD; i++)
166     {
167       wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
168                                     i + BITS_PER_WORD);
169       if (i == 0)
170           narrow_cost = word_move_cost;
171       else
172           narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
173 
174       if (code != ASHIFTRT)
175           upper_cost = word_move_zero_cost;
176       else if (i == BITS_PER_WORD - 1)
177           upper_cost = word_move_cost;
178       else
179           upper_cost = shift_cost (speed_p, rtxes, code, word_mode,
180                                          BITS_PER_WORD - 1);
181 
182       if (LOG_COSTS)
183           fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
184                      GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
185                      i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost);
186 
187       if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost)
188           splitting[i] = true;
189     }
190 }
191 
192 /* Compute what we should do when optimizing for speed or size; SPEED_P
193    selects which.  Use RTXES for computing costs.  */
194 
195 static void
compute_costs(bool speed_p,struct cost_rtxes * rtxes)196 compute_costs (bool speed_p, struct cost_rtxes *rtxes)
197 {
198   unsigned int i;
199   int word_move_zero_cost, word_move_cost;
200 
201   PUT_MODE (rtxes->target, word_mode);
202   SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
203   word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p);
204 
205   SET_SRC (rtxes->set) = rtxes->source;
206   word_move_cost = set_rtx_cost (rtxes->set, speed_p);
207 
208   if (LOG_COSTS)
209     fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
210                GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
211 
212   for (i = 0; i < MAX_MACHINE_MODE; i++)
213     {
214       machine_mode mode = (machine_mode) i;
215       unsigned int size, factor;
216       if (interesting_mode_p (mode, &size, &factor) && factor > 1)
217           {
218             unsigned int mode_move_cost;
219 
220             PUT_MODE (rtxes->target, mode);
221             PUT_MODE (rtxes->source, mode);
222             mode_move_cost = set_rtx_cost (rtxes->set, speed_p);
223 
224             if (LOG_COSTS)
225               fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
226                          GET_MODE_NAME (mode), mode_move_cost,
227                          word_move_cost, factor);
228 
229             if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
230               {
231                 choices[speed_p].move_modes_to_split[i] = true;
232                 choices[speed_p].something_to_do = true;
233               }
234           }
235     }
236 
237   /* For the moves and shifts, the only case that is checked is one
238      where the mode of the target is an integer mode twice the width
239      of the word_mode.
240 
241      If it is not profitable to split a double word move then do not
242      even consider the shifts or the zero extension.  */
243   if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
244     {
245       int zext_cost;
246 
247       /* The only case here to check to see if moving the upper part with a
248            zero is cheaper than doing the zext itself.  */
249       PUT_MODE (rtxes->source, word_mode);
250       zext_cost = set_src_cost (rtxes->zext, twice_word_mode, speed_p);
251 
252       if (LOG_COSTS)
253           fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
254                      GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
255                      zext_cost, word_move_cost, word_move_zero_cost);
256 
257       if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
258           choices[speed_p].splitting_zext = true;
259 
260       compute_splitting_shift (speed_p, rtxes,
261                                      choices[speed_p].splitting_ashift, ASHIFT,
262                                      word_move_zero_cost, word_move_cost);
263       compute_splitting_shift (speed_p, rtxes,
264                                      choices[speed_p].splitting_lshiftrt, LSHIFTRT,
265                                      word_move_zero_cost, word_move_cost);
266       compute_splitting_shift (speed_p, rtxes,
267                                      choices[speed_p].splitting_ashiftrt, ASHIFTRT,
268                                      word_move_zero_cost, word_move_cost);
269     }
270 }
271 
272 /* Do one-per-target initialisation.  This involves determining
273    which operations on the machine are profitable.  If none are found,
274    then the pass just returns when called.  */
275 
276 void
init_lower_subreg(void)277 init_lower_subreg (void)
278 {
279   struct cost_rtxes rtxes;
280 
281   memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));
282 
283   twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode).require ();
284 
285   rtxes.target = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
286   rtxes.source = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 2);
287   rtxes.set = gen_rtx_SET (rtxes.target, rtxes.source);
288   rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
289   rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
290 
291   if (LOG_COSTS)
292     fprintf (stderr, "\nSize costs\n==========\n\n");
293   compute_costs (false, &rtxes);
294 
295   if (LOG_COSTS)
296     fprintf (stderr, "\nSpeed costs\n===========\n\n");
297   compute_costs (true, &rtxes);
298 }
299 
300 static bool
simple_move_operand(rtx x)301 simple_move_operand (rtx x)
302 {
303   if (GET_CODE (x) == SUBREG)
304     x = SUBREG_REG (x);
305 
306   if (!OBJECT_P (x))
307     return false;
308 
309   if (GET_CODE (x) == LABEL_REF
310       || GET_CODE (x) == SYMBOL_REF
311       || GET_CODE (x) == HIGH
312       || GET_CODE (x) == CONST)
313     return false;
314 
315   if (MEM_P (x)
316       && (MEM_VOLATILE_P (x)
317             || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x))))
318     return false;
319 
320   return true;
321 }
322 
323 /* If X is an operator that can be treated as a simple move that we
324    can split, then return the operand that is operated on.  */
325 
326 static rtx
operand_for_swap_move_operator(rtx x)327 operand_for_swap_move_operator (rtx x)
328 {
329   /* A word sized rotate of a register pair is equivalent to swapping
330      the registers in the register pair.  */
331   if (GET_CODE (x) == ROTATE
332       && GET_MODE (x) == twice_word_mode
333       && simple_move_operand (XEXP (x, 0))
334       && CONST_INT_P (XEXP (x, 1))
335       && INTVAL (XEXP (x, 1)) == BITS_PER_WORD)
336     return XEXP (x, 0);
337 
338   return NULL_RTX;
339 }
340 
341 /* If INSN is a single set between two objects that we want to split,
342    return the single set.  SPEED_P says whether we are optimizing
343    INSN for speed or size.
344 
345    INSN should have been passed to recog and extract_insn before this
346    is called.  */
347 
348 static rtx
simple_move(rtx_insn * insn,bool speed_p)349 simple_move (rtx_insn *insn, bool speed_p)
350 {
351   rtx x, op;
352   rtx set;
353   machine_mode mode;
354 
355   if (recog_data.n_operands != 2)
356     return NULL_RTX;
357 
358   set = single_set (insn);
359   if (!set)
360     return NULL_RTX;
361 
362   x = SET_DEST (set);
363   if (x != recog_data.operand[0] && x != recog_data.operand[1])
364     return NULL_RTX;
365   if (!simple_move_operand (x))
366     return NULL_RTX;
367 
368   x = SET_SRC (set);
369   if ((op = operand_for_swap_move_operator (x)) != NULL_RTX)
370     x = op;
371 
372   if (x != recog_data.operand[0] && x != recog_data.operand[1])
373     return NULL_RTX;
374   /* For the src we can handle ASM_OPERANDS, and it is beneficial for
375      things like x86 rdtsc which returns a DImode value.  */
376   if (GET_CODE (x) != ASM_OPERANDS
377       && !simple_move_operand (x))
378     return NULL_RTX;
379 
380   /* We try to decompose in integer modes, to avoid generating
381      inefficient code copying between integer and floating point
382      registers.  That means that we can't decompose if this is a
383      non-integer mode for which there is no integer mode of the same
384      size.  */
385   mode = GET_MODE (SET_DEST (set));
386   scalar_int_mode int_mode;
387   if (!SCALAR_INT_MODE_P (mode)
388       && (!int_mode_for_size (GET_MODE_BITSIZE (mode), 0).exists (&int_mode)
389             || !targetm.modes_tieable_p (mode, int_mode)))
390     return NULL_RTX;
391 
392   /* Reject PARTIAL_INT modes.  They are used for processor specific
393      purposes and it's probably best not to tamper with them.  */
394   if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
395     return NULL_RTX;
396 
397   if (!choices[speed_p].move_modes_to_split[(int) mode])
398     return NULL_RTX;
399 
400   return set;
401 }
402 
403 /* If SET is a copy from one multi-word pseudo-register to another,
404    record that in reg_copy_graph.  Return whether it is such a
405    copy.  */
406 
407 static bool
find_pseudo_copy(rtx set)408 find_pseudo_copy (rtx set)
409 {
410   rtx dest = SET_DEST (set);
411   rtx src = SET_SRC (set);
412   rtx op;
413   unsigned int rd, rs;
414   bitmap b;
415 
416   if ((op = operand_for_swap_move_operator (src)) != NULL_RTX)
417     src = op;
418 
419   if (!REG_P (dest) || !REG_P (src))
420     return false;
421 
422   rd = REGNO (dest);
423   rs = REGNO (src);
424   if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
425     return false;
426 
427   b = reg_copy_graph[rs];
428   if (b == NULL)
429     {
430       b = BITMAP_ALLOC (NULL);
431       reg_copy_graph[rs] = b;
432     }
433 
434   bitmap_set_bit (b, rd);
435 
436   return true;
437 }
438 
439 /* Look through the registers in DECOMPOSABLE_CONTEXT.  For each case
440    where they are copied to another register, add the register to
441    which they are copied to DECOMPOSABLE_CONTEXT.  Use
442    NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
443    copies of registers which are in NON_DECOMPOSABLE_CONTEXT.  */
444 
445 static void
propagate_pseudo_copies(void)446 propagate_pseudo_copies (void)
447 {
448   auto_bitmap queue, propagate;
449 
450   bitmap_copy (queue, decomposable_context);
451   do
452     {
453       bitmap_iterator iter;
454       unsigned int i;
455 
456       bitmap_clear (propagate);
457 
458       EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
459           {
460             bitmap b = reg_copy_graph[i];
461             if (b)
462               bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
463           }
464 
465       bitmap_and_compl (queue, propagate, decomposable_context);
466       bitmap_ior_into (decomposable_context, propagate);
467     }
468   while (!bitmap_empty_p (queue));
469 }
470 
471 /* A pointer to one of these values is passed to
472    find_decomposable_subregs.  */
473 
474 enum classify_move_insn
475 {
476   /* Not a simple move from one location to another.  */
477   NOT_SIMPLE_MOVE,
478   /* A simple move we want to decompose.  */
479   DECOMPOSABLE_SIMPLE_MOVE,
480   /* Any other simple move.  */
481   SIMPLE_MOVE
482 };
483 
484 /* If we find a SUBREG in *LOC which we could use to decompose a
485    pseudo-register, set a bit in DECOMPOSABLE_CONTEXT.  If we find an
486    unadorned register which is not a simple pseudo-register copy,
487    DATA will point at the type of move, and we set a bit in
488    DECOMPOSABLE_CONTEXT or NON_DECOMPOSABLE_CONTEXT as appropriate.  */
489 
490 static void
find_decomposable_subregs(rtx * loc,enum classify_move_insn * pcmi)491 find_decomposable_subregs (rtx *loc, enum classify_move_insn *pcmi)
492 {
493   subrtx_var_iterator::array_type array;
494   FOR_EACH_SUBRTX_VAR (iter, array, *loc, NONCONST)
495     {
496       rtx x = *iter;
497       if (GET_CODE (x) == SUBREG)
498           {
499             rtx inner = SUBREG_REG (x);
500             unsigned int regno, outer_size, inner_size, outer_words, inner_words;
501 
502             if (!REG_P (inner))
503               continue;
504 
505             regno = REGNO (inner);
506             if (HARD_REGISTER_NUM_P (regno))
507               {
508                 iter.skip_subrtxes ();
509                 continue;
510               }
511 
512             if (!interesting_mode_p (GET_MODE (x), &outer_size, &outer_words)
513                 || !interesting_mode_p (GET_MODE (inner), &inner_size,
514                                               &inner_words))
515               continue;
516 
517             /* We only try to decompose single word subregs of multi-word
518                registers.  When we find one, we return -1 to avoid iterating
519                over the inner register.
520 
521                ??? This doesn't allow, e.g., DImode subregs of TImode values
522                on 32-bit targets.  We would need to record the way the
523                pseudo-register was used, and only decompose if all the uses
524                were the same number and size of pieces.  Hopefully this
525                doesn't happen much.  */
526 
527             if (outer_words == 1
528                 && inner_words > 1
529                 /* Don't allow to decompose floating point subregs of
530                      multi-word pseudos if the floating point mode does
531                      not have word size, because otherwise we'd generate
532                      a subreg with that floating mode from a different
533                      sized integral pseudo which is not allowed by
534                      validate_subreg.  */
535                 && (!FLOAT_MODE_P (GET_MODE (x))
536                       || outer_size == UNITS_PER_WORD))
537               {
538                 bitmap_set_bit (decomposable_context, regno);
539                 iter.skip_subrtxes ();
540                 continue;
541               }
542 
543             /* If this is a cast from one mode to another, where the modes
544                have the same size, and they are not tieable, then mark this
545                register as non-decomposable.  If we decompose it we are
546                likely to mess up whatever the backend is trying to do.  */
547             if (outer_words > 1
548                 && outer_size == inner_size
549                 && !targetm.modes_tieable_p (GET_MODE (x), GET_MODE (inner)))
550               {
551                 bitmap_set_bit (non_decomposable_context, regno);
552                 bitmap_set_bit (subreg_context, regno);
553                 iter.skip_subrtxes ();
554                 continue;
555               }
556           }
557       else if (REG_P (x))
558           {
559             unsigned int regno, size, words;
560 
561             /* We will see an outer SUBREG before we see the inner REG, so
562                when we see a plain REG here it means a direct reference to
563                the register.
564 
565                If this is not a simple copy from one location to another,
566                then we cannot decompose this register.  If this is a simple
567                copy we want to decompose, and the mode is right,
568                then we mark the register as decomposable.
569                Otherwise we don't say anything about this register --
570                it could be decomposed, but whether that would be
571                profitable depends upon how it is used elsewhere.
572 
573                We only set bits in the bitmap for multi-word
574                pseudo-registers, since those are the only ones we care about
575                and it keeps the size of the bitmaps down.  */
576 
577             regno = REGNO (x);
578             if (!HARD_REGISTER_NUM_P (regno)
579                 && interesting_mode_p (GET_MODE (x), &size, &words)
580                 && words > 1)
581               {
582                 switch (*pcmi)
583                     {
584                     case NOT_SIMPLE_MOVE:
585                       bitmap_set_bit (non_decomposable_context, regno);
586                       break;
587                     case DECOMPOSABLE_SIMPLE_MOVE:
588                       if (targetm.modes_tieable_p (GET_MODE (x), word_mode))
589                         bitmap_set_bit (decomposable_context, regno);
590                       break;
591                     case SIMPLE_MOVE:
592                       break;
593                     default:
594                       gcc_unreachable ();
595                     }
596               }
597           }
598       else if (MEM_P (x))
599           {
600             enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
601 
602             /* Any registers used in a MEM do not participate in a
603                SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE.  Do our own recursion
604                here, and return -1 to block the parent's recursion.  */
605             find_decomposable_subregs (&XEXP (x, 0), &cmi_mem);
606             iter.skip_subrtxes ();
607           }
608     }
609 }
610 
611 /* Decompose REGNO into word-sized components.  We smash the REG node
612    in place.  This ensures that (1) something goes wrong quickly if we
613    fail to make some replacement, and (2) the debug information inside
614    the symbol table is automatically kept up to date.  */
615 
616 static void
decompose_register(unsigned int regno)617 decompose_register (unsigned int regno)
618 {
619   rtx reg;
620   unsigned int size, words, i;
621   rtvec v;
622 
623   reg = regno_reg_rtx[regno];
624 
625   regno_reg_rtx[regno] = NULL_RTX;
626 
627   if (!interesting_mode_p (GET_MODE (reg), &size, &words))
628     gcc_unreachable ();
629 
630   v = rtvec_alloc (words);
631   for (i = 0; i < words; ++i)
632     RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
633 
634   PUT_CODE (reg, CONCATN);
635   XVEC (reg, 0) = v;
636 
637   if (dump_file)
638     {
639       fprintf (dump_file, "; Splitting reg %u ->", regno);
640       for (i = 0; i < words; ++i)
641           fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
642       fputc ('\n', dump_file);
643     }
644 }
645 
646 /* Get a SUBREG of a CONCATN.  */
647 
648 static rtx
simplify_subreg_concatn(machine_mode outermode,rtx op,poly_uint64 orig_byte)649 simplify_subreg_concatn (machine_mode outermode, rtx op, poly_uint64 orig_byte)
650 {
651   unsigned int outer_size, outer_words, inner_size, inner_words;
652   machine_mode innermode, partmode;
653   rtx part;
654   unsigned int final_offset;
655   unsigned int byte;
656 
657   innermode = GET_MODE (op);
658   if (!interesting_mode_p (outermode, &outer_size, &outer_words)
659       || !interesting_mode_p (innermode, &inner_size, &inner_words))
660     gcc_unreachable ();
661 
662   /* Must be constant if interesting_mode_p passes.  */
663   byte = orig_byte.to_constant ();
664   gcc_assert (GET_CODE (op) == CONCATN);
665   gcc_assert (byte % outer_size == 0);
666 
667   gcc_assert (byte < inner_size);
668   if (outer_size > inner_size)
669     return NULL_RTX;
670 
671   inner_size /= XVECLEN (op, 0);
672   part = XVECEXP (op, 0, byte / inner_size);
673   partmode = GET_MODE (part);
674 
675   final_offset = byte % inner_size;
676   if (final_offset + outer_size > inner_size)
677     return NULL_RTX;
678 
679   /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of
680      regular CONST_VECTORs.  They have vector or integer modes, depending
681      on the capabilities of the target.  Cope with them.  */
682   if (partmode == VOIDmode && VECTOR_MODE_P (innermode))
683     partmode = GET_MODE_INNER (innermode);
684   else if (partmode == VOIDmode)
685     partmode = mode_for_size (inner_size * BITS_PER_UNIT,
686                                     GET_MODE_CLASS (innermode), 0).require ();
687 
688   return simplify_gen_subreg (outermode, part, partmode, final_offset);
689 }
690 
691 /* Wrapper around simplify_gen_subreg which handles CONCATN.  */
692 
693 static rtx
simplify_gen_subreg_concatn(machine_mode outermode,rtx op,machine_mode innermode,unsigned int byte)694 simplify_gen_subreg_concatn (machine_mode outermode, rtx op,
695                                    machine_mode innermode, unsigned int byte)
696 {
697   rtx ret;
698 
699   /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
700      If OP is a SUBREG of a CONCATN, then it must be a simple mode
701      change with the same size and offset 0, or it must extract a
702      part.  We shouldn't see anything else here.  */
703   if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
704     {
705       rtx op2;
706 
707       if (known_eq (GET_MODE_SIZE (GET_MODE (op)),
708                         GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
709             && known_eq (SUBREG_BYTE (op), 0))
710           return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
711                                                       GET_MODE (SUBREG_REG (op)), byte);
712 
713       op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
714                                              SUBREG_BYTE (op));
715       if (op2 == NULL_RTX)
716           {
717             /* We don't handle paradoxical subregs here.  */
718             gcc_assert (!paradoxical_subreg_p (outermode, GET_MODE (op)));
719             gcc_assert (!paradoxical_subreg_p (op));
720             op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
721                                                    byte + SUBREG_BYTE (op));
722             gcc_assert (op2 != NULL_RTX);
723             return op2;
724           }
725 
726       op = op2;
727       gcc_assert (op != NULL_RTX);
728       gcc_assert (innermode == GET_MODE (op));
729     }
730 
731   if (GET_CODE (op) == CONCATN)
732     return simplify_subreg_concatn (outermode, op, byte);
733 
734   ret = simplify_gen_subreg (outermode, op, innermode, byte);
735 
736   /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
737      resolve_simple_move will ask for the high part of the paradoxical
738      subreg, which does not have a value.  Just return a zero.  */
739   if (ret == NULL_RTX
740       && paradoxical_subreg_p (op))
741     return CONST0_RTX (outermode);
742 
743   gcc_assert (ret != NULL_RTX);
744   return ret;
745 }
746 
747 /* Return whether we should resolve X into the registers into which it
748    was decomposed.  */
749 
750 static bool
resolve_reg_p(rtx x)751 resolve_reg_p (rtx x)
752 {
753   return GET_CODE (x) == CONCATN;
754 }
755 
756 /* Return whether X is a SUBREG of a register which we need to
757    resolve.  */
758 
759 static bool
resolve_subreg_p(rtx x)760 resolve_subreg_p (rtx x)
761 {
762   if (GET_CODE (x) != SUBREG)
763     return false;
764   return resolve_reg_p (SUBREG_REG (x));
765 }
766 
767 /* Look for SUBREGs in *LOC which need to be decomposed.  */
768 
769 static bool
resolve_subreg_use(rtx * loc,rtx insn)770 resolve_subreg_use (rtx *loc, rtx insn)
771 {
772   subrtx_ptr_iterator::array_type array;
773   FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST)
774     {
775       rtx *loc = *iter;
776       rtx x = *loc;
777       if (resolve_subreg_p (x))
778           {
779             x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
780                                                SUBREG_BYTE (x));
781 
782             /* It is possible for a note to contain a reference which we can
783                decompose.  In this case, return 1 to the caller to indicate
784                that the note must be removed.  */
785             if (!x)
786               {
787                 gcc_assert (!insn);
788                 return true;
789               }
790 
791             validate_change (insn, loc, x, 1);
792             iter.skip_subrtxes ();
793           }
794       else if (resolve_reg_p (x))
795           /* Return 1 to the caller to indicate that we found a direct
796              reference to a register which is being decomposed.  This can
797              happen inside notes, multiword shift or zero-extend
798              instructions.  */
799           return true;
800     }
801 
802   return false;
803 }
804 
805 /* Resolve any decomposed registers which appear in register notes on
806    INSN.  */
807 
808 static void
resolve_reg_notes(rtx_insn * insn)809 resolve_reg_notes (rtx_insn *insn)
810 {
811   rtx *pnote, note;
812 
813   note = find_reg_equal_equiv_note (insn);
814   if (note)
815     {
816       int old_count = num_validated_changes ();
817       if (resolve_subreg_use (&XEXP (note, 0), NULL_RTX))
818           remove_note (insn, note);
819       else
820           if (old_count != num_validated_changes ())
821             df_notes_rescan (insn);
822     }
823 
824   pnote = &REG_NOTES (insn);
825   while (*pnote != NULL_RTX)
826     {
827       bool del = false;
828 
829       note = *pnote;
830       switch (REG_NOTE_KIND (note))
831           {
832           case REG_DEAD:
833           case REG_UNUSED:
834             if (resolve_reg_p (XEXP (note, 0)))
835               del = true;
836             break;
837 
838           default:
839             break;
840           }
841 
842       if (del)
843           *pnote = XEXP (note, 1);
844       else
845           pnote = &XEXP (note, 1);
846     }
847 }
848 
849 /* Return whether X can be decomposed into subwords.  */
850 
851 static bool
can_decompose_p(rtx x)852 can_decompose_p (rtx x)
853 {
854   if (REG_P (x))
855     {
856       unsigned int regno = REGNO (x);
857 
858       if (HARD_REGISTER_NUM_P (regno))
859           {
860             unsigned int byte, num_bytes, num_words;
861 
862             if (!interesting_mode_p (GET_MODE (x), &num_bytes, &num_words))
863               return false;
864             for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD)
865               if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0)
866                 return false;
867             return true;
868           }
869       else
870           return !bitmap_bit_p (subreg_context, regno);
871     }
872 
873   return true;
874 }
875 
876 /* OPND is a concatn operand this is used with a simple move operator.
877    Return a new rtx with the concatn's operands swapped.  */
878 
879 static rtx
resolve_operand_for_swap_move_operator(rtx opnd)880 resolve_operand_for_swap_move_operator (rtx opnd)
881 {
882   gcc_assert (GET_CODE (opnd) == CONCATN);
883   rtx concatn = copy_rtx (opnd);
884   rtx op0 = XVECEXP (concatn, 0, 0);
885   rtx op1 = XVECEXP (concatn, 0, 1);
886   XVECEXP (concatn, 0, 0) = op1;
887   XVECEXP (concatn, 0, 1) = op0;
888   return concatn;
889 }
890 
891 /* Decompose the registers used in a simple move SET within INSN.  If
892    we don't change anything, return INSN, otherwise return the start
893    of the sequence of moves.  */
894 
895 static rtx_insn *
resolve_simple_move(rtx set,rtx_insn * insn)896 resolve_simple_move (rtx set, rtx_insn *insn)
897 {
898   rtx src, dest, real_dest, src_op;
899   rtx_insn *insns;
900   machine_mode orig_mode, dest_mode;
901   unsigned int orig_size, words;
902   bool pushing;
903 
904   src = SET_SRC (set);
905   dest = SET_DEST (set);
906   orig_mode = GET_MODE (dest);
907 
908   if (!interesting_mode_p (orig_mode, &orig_size, &words))
909     gcc_unreachable ();
910   gcc_assert (words > 1);
911 
912   start_sequence ();
913 
914   /* We have to handle copying from a SUBREG of a decomposed reg where
915      the SUBREG is larger than word size.  Rather than assume that we
916      can take a word_mode SUBREG of the destination, we copy to a new
917      register and then copy that to the destination.  */
918 
919   real_dest = NULL_RTX;
920 
921   if ((src_op = operand_for_swap_move_operator (src)) != NULL_RTX)
922     {
923       if (resolve_reg_p (dest))
924           {
925             /* DEST is a CONCATN, so swap its operands and strip
926                SRC's operator.  */
927             dest = resolve_operand_for_swap_move_operator (dest);
928             src = src_op;
929           }
930       else if (resolve_reg_p (src_op))
931           {
932             /* SRC is an operation on a CONCATN, so strip the operator and
933                swap the CONCATN's operands.  */
934             src = resolve_operand_for_swap_move_operator (src_op);
935           }
936     }
937 
938   if (GET_CODE (src) == SUBREG
939       && resolve_reg_p (SUBREG_REG (src))
940       && (maybe_ne (SUBREG_BYTE (src), 0)
941             || maybe_ne (orig_size, GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
942     {
943       real_dest = dest;
944       dest = gen_reg_rtx (orig_mode);
945       if (REG_P (real_dest))
946           REG_ATTRS (dest) = REG_ATTRS (real_dest);
947     }
948 
949   /* Similarly if we are copying to a SUBREG of a decomposed reg where
950      the SUBREG is larger than word size.  */
951 
952   if (GET_CODE (dest) == SUBREG
953       && resolve_reg_p (SUBREG_REG (dest))
954       && (maybe_ne (SUBREG_BYTE (dest), 0)
955             || maybe_ne (orig_size,
956                            GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
957     {
958       rtx reg, smove;
959       rtx_insn *minsn;
960 
961       reg = gen_reg_rtx (orig_mode);
962       minsn = emit_move_insn (reg, src);
963       smove = single_set (minsn);
964       gcc_assert (smove != NULL_RTX);
965       resolve_simple_move (smove, minsn);
966       src = reg;
967     }
968 
969   /* If we didn't have any big SUBREGS of decomposed registers, and
970      neither side of the move is a register we are decomposing, then
971      we don't have to do anything here.  */
972 
973   if (src == SET_SRC (set)
974       && dest == SET_DEST (set)
975       && !resolve_reg_p (src)
976       && !resolve_subreg_p (src)
977       && !resolve_reg_p (dest)
978       && !resolve_subreg_p (dest))
979     {
980       end_sequence ();
981       return insn;
982     }
983 
984   /* It's possible for the code to use a subreg of a decomposed
985      register while forming an address.  We need to handle that before
986      passing the address to emit_move_insn.  We pass NULL_RTX as the
987      insn parameter to resolve_subreg_use because we cannot validate
988      the insn yet.  */
989   if (MEM_P (src) || MEM_P (dest))
990     {
991       int acg;
992 
993       if (MEM_P (src))
994           resolve_subreg_use (&XEXP (src, 0), NULL_RTX);
995       if (MEM_P (dest))
996           resolve_subreg_use (&XEXP (dest, 0), NULL_RTX);
997       acg = apply_change_group ();
998       gcc_assert (acg);
999     }
1000 
1001   /* If SRC is a register which we can't decompose, or has side
1002      effects, we need to move via a temporary register.  */
1003 
1004   if (!can_decompose_p (src)
1005       || side_effects_p (src)
1006       || GET_CODE (src) == ASM_OPERANDS)
1007     {
1008       rtx reg;
1009 
1010       reg = gen_reg_rtx (orig_mode);
1011 
1012       if (AUTO_INC_DEC)
1013           {
1014             rtx_insn *move = emit_move_insn (reg, src);
1015             if (MEM_P (src))
1016               {
1017                 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
1018                 if (note)
1019                     add_reg_note (move, REG_INC, XEXP (note, 0));
1020               }
1021           }
1022       else
1023           emit_move_insn (reg, src);
1024 
1025       src = reg;
1026     }
1027 
1028   /* If DEST is a register which we can't decompose, or has side
1029      effects, we need to first move to a temporary register.  We
1030      handle the common case of pushing an operand directly.  We also
1031      go through a temporary register if it holds a floating point
1032      value.  This gives us better code on systems which can't move
1033      data easily between integer and floating point registers.  */
1034 
1035   dest_mode = orig_mode;
1036   pushing = push_operand (dest, dest_mode);
1037   if (!can_decompose_p (dest)
1038       || (side_effects_p (dest) && !pushing)
1039       || (!SCALAR_INT_MODE_P (dest_mode)
1040             && !resolve_reg_p (dest)
1041             && !resolve_subreg_p (dest)))
1042     {
1043       if (real_dest == NULL_RTX)
1044           real_dest = dest;
1045       if (!SCALAR_INT_MODE_P (dest_mode))
1046           dest_mode = int_mode_for_mode (dest_mode).require ();
1047       dest = gen_reg_rtx (dest_mode);
1048       if (REG_P (real_dest))
1049           REG_ATTRS (dest) = REG_ATTRS (real_dest);
1050     }
1051 
1052   if (pushing)
1053     {
1054       unsigned int i, j, jinc;
1055 
1056       gcc_assert (orig_size % UNITS_PER_WORD == 0);
1057       gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
1058       gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
1059 
1060       if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
1061           {
1062             j = 0;
1063             jinc = 1;
1064           }
1065       else
1066           {
1067             j = words - 1;
1068             jinc = -1;
1069           }
1070 
1071       for (i = 0; i < words; ++i, j += jinc)
1072           {
1073             rtx temp;
1074 
1075             temp = copy_rtx (XEXP (dest, 0));
1076             temp = adjust_automodify_address_nv (dest, word_mode, temp,
1077                                                          j * UNITS_PER_WORD);
1078             emit_move_insn (temp,
1079                                 simplify_gen_subreg_concatn (word_mode, src,
1080                                                                    orig_mode,
1081                                                                    j * UNITS_PER_WORD));
1082           }
1083     }
1084   else
1085     {
1086       unsigned int i;
1087 
1088       if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
1089           emit_clobber (dest);
1090 
1091       for (i = 0; i < words; ++i)
1092           {
1093             rtx t = simplify_gen_subreg_concatn (word_mode, dest,
1094                                                          dest_mode,
1095                                                          i * UNITS_PER_WORD);
1096             /* simplify_gen_subreg_concatn can return (const_int 0) for
1097                some sub-objects of paradoxical subregs.  As a source operand,
1098                that's fine.  As a destination it must be avoided.  Those are
1099                supposed to be don't care bits, so we can just drop that store
1100                on the floor.  */
1101             if (t != CONST0_RTX (word_mode))
1102               emit_move_insn (t,
1103                                   simplify_gen_subreg_concatn (word_mode, src,
1104                                                                        orig_mode,
1105                                                                        i * UNITS_PER_WORD));
1106           }
1107     }
1108 
1109   if (real_dest != NULL_RTX)
1110     {
1111       rtx mdest, smove;
1112       rtx_insn *minsn;
1113 
1114       if (dest_mode == orig_mode)
1115           mdest = dest;
1116       else
1117           mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
1118       minsn = emit_move_insn (real_dest, mdest);
1119 
1120   if (AUTO_INC_DEC && MEM_P (real_dest)
1121       && !(resolve_reg_p (real_dest) || resolve_subreg_p (real_dest)))
1122     {
1123       rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
1124       if (note)
1125           add_reg_note (minsn, REG_INC, XEXP (note, 0));
1126     }
1127 
1128       smove = single_set (minsn);
1129       gcc_assert (smove != NULL_RTX);
1130 
1131       resolve_simple_move (smove, minsn);
1132     }
1133 
1134   insns = get_insns ();
1135   end_sequence ();
1136 
1137   copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
1138 
1139   emit_insn_before (insns, insn);
1140 
1141   /* If we get here via self-recursion, then INSN is not yet in the insns
1142      chain and delete_insn will fail.  We only want to remove INSN from the
1143      current sequence.  See PR56738.  */
1144   if (in_sequence_p ())
1145     remove_insn (insn);
1146   else
1147     delete_insn (insn);
1148 
1149   return insns;
1150 }
1151 
1152 /* Change a CLOBBER of a decomposed register into a CLOBBER of the
1153    component registers.  Return whether we changed something.  */
1154 
1155 static bool
resolve_clobber(rtx pat,rtx_insn * insn)1156 resolve_clobber (rtx pat, rtx_insn *insn)
1157 {
1158   rtx reg;
1159   machine_mode orig_mode;
1160   unsigned int orig_size, words, i;
1161   int ret;
1162 
1163   reg = XEXP (pat, 0);
1164   /* For clobbers we can look through paradoxical subregs which
1165      we do not handle in simplify_gen_subreg_concatn.  */
1166   if (paradoxical_subreg_p (reg))
1167     reg = SUBREG_REG (reg);
1168   if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
1169     return false;
1170 
1171   orig_mode = GET_MODE (reg);
1172   if (!interesting_mode_p (orig_mode, &orig_size, &words))
1173     gcc_unreachable ();
1174 
1175   ret = validate_change (NULL_RTX, &XEXP (pat, 0),
1176                                simplify_gen_subreg_concatn (word_mode, reg,
1177                                                                   orig_mode, 0),
1178                                0);
1179   df_insn_rescan (insn);
1180   gcc_assert (ret != 0);
1181 
1182   for (i = words - 1; i > 0; --i)
1183     {
1184       rtx x;
1185 
1186       x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
1187                                                i * UNITS_PER_WORD);
1188       x = gen_rtx_CLOBBER (VOIDmode, x);
1189       emit_insn_after (x, insn);
1190     }
1191 
1192   resolve_reg_notes (insn);
1193 
1194   return true;
1195 }
1196 
1197 /* A USE of a decomposed register is no longer meaningful.  Return
1198    whether we changed something.  */
1199 
1200 static bool
resolve_use(rtx pat,rtx_insn * insn)1201 resolve_use (rtx pat, rtx_insn *insn)
1202 {
1203   if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
1204     {
1205       delete_insn (insn);
1206       return true;
1207     }
1208 
1209   resolve_reg_notes (insn);
1210 
1211   return false;
1212 }
1213 
1214 /* A VAR_LOCATION can be simplified.  */
1215 
1216 static void
resolve_debug(rtx_insn * insn)1217 resolve_debug (rtx_insn *insn)
1218 {
1219   subrtx_ptr_iterator::array_type array;
1220   FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), NONCONST)
1221     {
1222       rtx *loc = *iter;
1223       rtx x = *loc;
1224       if (resolve_subreg_p (x))
1225           {
1226             x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
1227                                                SUBREG_BYTE (x));
1228 
1229             if (x)
1230               *loc = x;
1231             else
1232               x = copy_rtx (*loc);
1233           }
1234       if (resolve_reg_p (x))
1235           *loc = copy_rtx (x);
1236     }
1237 
1238   df_insn_rescan (insn);
1239 
1240   resolve_reg_notes (insn);
1241 }
1242 
1243 /* Check if INSN is a decomposable multiword-shift or zero-extend and
1244    set the decomposable_context bitmap accordingly.  SPEED_P is true
1245    if we are optimizing INSN for speed rather than size.  Return true
1246    if INSN is decomposable.  */
1247 
1248 static bool
find_decomposable_shift_zext(rtx_insn * insn,bool speed_p)1249 find_decomposable_shift_zext (rtx_insn *insn, bool speed_p)
1250 {
1251   rtx set;
1252   rtx op;
1253   rtx op_operand;
1254 
1255   set = single_set (insn);
1256   if (!set)
1257     return false;
1258 
1259   op = SET_SRC (set);
1260   if (GET_CODE (op) != ASHIFT
1261       && GET_CODE (op) != LSHIFTRT
1262       && GET_CODE (op) != ASHIFTRT
1263       && GET_CODE (op) != ZERO_EXTEND)
1264     return false;
1265 
1266   op_operand = XEXP (op, 0);
1267   if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
1268       || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
1269       || HARD_REGISTER_NUM_P (REGNO (op_operand))
1270       || GET_MODE (op) != twice_word_mode)
1271     return false;
1272 
1273   if (GET_CODE (op) == ZERO_EXTEND)
1274     {
1275       if (GET_MODE (op_operand) != word_mode
1276             || !choices[speed_p].splitting_zext)
1277           return false;
1278     }
1279   else /* left or right shift */
1280     {
1281       bool *splitting = (GET_CODE (op) == ASHIFT
1282                                ? choices[speed_p].splitting_ashift
1283                                : GET_CODE (op) == ASHIFTRT
1284                                ? choices[speed_p].splitting_ashiftrt
1285                                : choices[speed_p].splitting_lshiftrt);
1286       if (!CONST_INT_P (XEXP (op, 1))
1287             || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
1288                               2 * BITS_PER_WORD - 1)
1289             || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
1290           return false;
1291 
1292       bitmap_set_bit (decomposable_context, REGNO (op_operand));
1293     }
1294 
1295   bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
1296 
1297   return true;
1298 }
1299 
1300 /* Decompose a more than word wide shift (in INSN) of a multiword
1301    pseudo or a multiword zero-extend of a wordmode pseudo into a move
1302    and 'set to zero' insn.  Return a pointer to the new insn when a
1303    replacement was done.  */
1304 
1305 static rtx_insn *
resolve_shift_zext(rtx_insn * insn)1306 resolve_shift_zext (rtx_insn *insn)
1307 {
1308   rtx set;
1309   rtx op;
1310   rtx op_operand;
1311   rtx_insn *insns;
1312   rtx src_reg, dest_reg, dest_upper, upper_src = NULL_RTX;
1313   int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
1314   scalar_int_mode inner_mode;
1315 
1316   set = single_set (insn);
1317   if (!set)
1318     return NULL;
1319 
1320   op = SET_SRC (set);
1321   if (GET_CODE (op) != ASHIFT
1322       && GET_CODE (op) != LSHIFTRT
1323       && GET_CODE (op) != ASHIFTRT
1324       && GET_CODE (op) != ZERO_EXTEND)
1325     return NULL;
1326 
1327   op_operand = XEXP (op, 0);
1328   if (!is_a <scalar_int_mode> (GET_MODE (op_operand), &inner_mode))
1329     return NULL;
1330 
1331   /* We can tear this operation apart only if the regs were already
1332      torn apart.  */
1333   if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
1334     return NULL;
1335 
1336   /* src_reg_num is the number of the word mode register which we
1337      are operating on.  For a left shift and a zero_extend on little
1338      endian machines this is register 0.  */
1339   src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT)
1340                     ? 1 : 0;
1341 
1342   if (WORDS_BIG_ENDIAN && GET_MODE_SIZE (inner_mode) > UNITS_PER_WORD)
1343     src_reg_num = 1 - src_reg_num;
1344 
1345   if (GET_CODE (op) == ZERO_EXTEND)
1346     dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
1347   else
1348     dest_reg_num = 1 - src_reg_num;
1349 
1350   offset1 = UNITS_PER_WORD * dest_reg_num;
1351   offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1352   src_offset = UNITS_PER_WORD * src_reg_num;
1353 
1354   start_sequence ();
1355 
1356   dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1357                                           GET_MODE (SET_DEST (set)),
1358                                           offset1);
1359   dest_upper = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1360                                                       GET_MODE (SET_DEST (set)),
1361                                                       offset2);
1362   src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
1363                                          GET_MODE (op_operand),
1364                                          src_offset);
1365   if (GET_CODE (op) == ASHIFTRT
1366       && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1)
1367     upper_src = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg),
1368                                     BITS_PER_WORD - 1, NULL_RTX, 0);
1369 
1370   if (GET_CODE (op) != ZERO_EXTEND)
1371     {
1372       int shift_count = INTVAL (XEXP (op, 1));
1373       if (shift_count > BITS_PER_WORD)
1374           src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1375                                         LSHIFT_EXPR : RSHIFT_EXPR,
1376                                         word_mode, src_reg,
1377                                         shift_count - BITS_PER_WORD,
1378                                         dest_reg, GET_CODE (op) != ASHIFTRT);
1379     }
1380 
1381   if (dest_reg != src_reg)
1382     emit_move_insn (dest_reg, src_reg);
1383   if (GET_CODE (op) != ASHIFTRT)
1384     emit_move_insn (dest_upper, CONST0_RTX (word_mode));
1385   else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1)
1386     emit_move_insn (dest_upper, copy_rtx (src_reg));
1387   else
1388     emit_move_insn (dest_upper, upper_src);
1389   insns = get_insns ();
1390 
1391   end_sequence ();
1392 
1393   emit_insn_before (insns, insn);
1394 
1395   if (dump_file)
1396     {
1397       rtx_insn *in;
1398       fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
1399       for (in = insns; in != insn; in = NEXT_INSN (in))
1400           fprintf (dump_file, "%d ", INSN_UID (in));
1401       fprintf (dump_file, "\n");
1402     }
1403 
1404   delete_insn (insn);
1405   return insns;
1406 }
1407 
1408 /* Print to dump_file a description of what we're doing with shift code CODE.
1409    SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD.  */
1410 
1411 static void
dump_shift_choices(enum rtx_code code,bool * splitting)1412 dump_shift_choices (enum rtx_code code, bool *splitting)
1413 {
1414   int i;
1415   const char *sep;
1416 
1417   fprintf (dump_file,
1418              "  Splitting mode %s for %s lowering with shift amounts = ",
1419              GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
1420   sep = "";
1421   for (i = 0; i < BITS_PER_WORD; i++)
1422     if (splitting[i])
1423       {
1424           fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
1425           sep = ",";
1426       }
1427   fprintf (dump_file, "\n");
1428 }
1429 
1430 /* Print to dump_file a description of what we're doing when optimizing
1431    for speed or size; SPEED_P says which.  DESCRIPTION is a description
1432    of the SPEED_P choice.  */
1433 
1434 static void
dump_choices(bool speed_p,const char * description)1435 dump_choices (bool speed_p, const char *description)
1436 {
1437   unsigned int size, factor, i;
1438 
1439   fprintf (dump_file, "Choices when optimizing for %s:\n", description);
1440 
1441   for (i = 0; i < MAX_MACHINE_MODE; i++)
1442     if (interesting_mode_p ((machine_mode) i, &size, &factor)
1443           && factor > 1)
1444       fprintf (dump_file, "  %s mode %s for copy lowering.\n",
1445                  choices[speed_p].move_modes_to_split[i]
1446                  ? "Splitting"
1447                  : "Skipping",
1448                  GET_MODE_NAME ((machine_mode) i));
1449 
1450   fprintf (dump_file, "  %s mode %s for zero_extend lowering.\n",
1451              choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
1452              GET_MODE_NAME (twice_word_mode));
1453 
1454   dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
1455   dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_lshiftrt);
1456   dump_shift_choices (ASHIFTRT, choices[speed_p].splitting_ashiftrt);
1457   fprintf (dump_file, "\n");
1458 }
1459 
1460 /* Look for registers which are always accessed via word-sized SUBREGs
1461    or -if DECOMPOSE_COPIES is true- via copies.  Decompose these
1462    registers into several word-sized pseudo-registers.  */
1463 
1464 static void
decompose_multiword_subregs(bool decompose_copies)1465 decompose_multiword_subregs (bool decompose_copies)
1466 {
1467   unsigned int max;
1468   basic_block bb;
1469   bool speed_p;
1470 
1471   if (dump_file)
1472     {
1473       dump_choices (false, "size");
1474       dump_choices (true, "speed");
1475     }
1476 
1477   /* Check if this target even has any modes to consider lowering.   */
1478   if (!choices[false].something_to_do && !choices[true].something_to_do)
1479     {
1480       if (dump_file)
1481           fprintf (dump_file, "Nothing to do!\n");
1482       return;
1483     }
1484 
1485   max = max_reg_num ();
1486 
1487   /* First see if there are any multi-word pseudo-registers.  If there
1488      aren't, there is nothing we can do.  This should speed up this
1489      pass in the normal case, since it should be faster than scanning
1490      all the insns.  */
1491   {
1492     unsigned int i;
1493     bool useful_modes_seen = false;
1494 
1495     for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
1496       if (regno_reg_rtx[i] != NULL)
1497           {
1498             machine_mode mode = GET_MODE (regno_reg_rtx[i]);
1499             if (choices[false].move_modes_to_split[(int) mode]
1500                 || choices[true].move_modes_to_split[(int) mode])
1501               {
1502                 useful_modes_seen = true;
1503                 break;
1504               }
1505           }
1506 
1507     if (!useful_modes_seen)
1508       {
1509           if (dump_file)
1510             fprintf (dump_file, "Nothing to lower in this function.\n");
1511           return;
1512       }
1513   }
1514 
1515   if (df)
1516     {
1517       df_set_flags (DF_DEFER_INSN_RESCAN);
1518       run_word_dce ();
1519     }
1520 
1521   /* FIXME: It may be possible to change this code to look for each
1522      multi-word pseudo-register and to find each insn which sets or
1523      uses that register.  That should be faster than scanning all the
1524      insns.  */
1525 
1526   decomposable_context = BITMAP_ALLOC (NULL);
1527   non_decomposable_context = BITMAP_ALLOC (NULL);
1528   subreg_context = BITMAP_ALLOC (NULL);
1529 
1530   reg_copy_graph.create (max);
1531   reg_copy_graph.safe_grow_cleared (max, true);
1532   memset (reg_copy_graph.address (), 0, sizeof (bitmap) * max);
1533 
1534   speed_p = optimize_function_for_speed_p (cfun);
1535   FOR_EACH_BB_FN (bb, cfun)
1536     {
1537       rtx_insn *insn;
1538 
1539       FOR_BB_INSNS (bb, insn)
1540           {
1541             rtx set;
1542             enum classify_move_insn cmi;
1543             int i, n;
1544 
1545             if (!INSN_P (insn)
1546                 || GET_CODE (PATTERN (insn)) == CLOBBER
1547                 || GET_CODE (PATTERN (insn)) == USE)
1548               continue;
1549 
1550             recog_memoized (insn);
1551 
1552             if (find_decomposable_shift_zext (insn, speed_p))
1553               continue;
1554 
1555             extract_insn (insn);
1556 
1557             set = simple_move (insn, speed_p);
1558 
1559             if (!set)
1560               cmi = NOT_SIMPLE_MOVE;
1561             else
1562               {
1563                 /* We mark pseudo-to-pseudo copies as decomposable during the
1564                      second pass only.  The first pass is so early that there is
1565                      good chance such moves will be optimized away completely by
1566                      subsequent optimizations anyway.
1567 
1568                      However, we call find_pseudo_copy even during the first pass
1569                      so as to properly set up the reg_copy_graph.  */
1570                 if (find_pseudo_copy (set))
1571                     cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE;
1572                 else
1573                     cmi = SIMPLE_MOVE;
1574               }
1575 
1576             n = recog_data.n_operands;
1577             for (i = 0; i < n; ++i)
1578               {
1579                 find_decomposable_subregs (&recog_data.operand[i], &cmi);
1580 
1581                 /* We handle ASM_OPERANDS as a special case to support
1582                      things like x86 rdtsc which returns a DImode value.
1583                      We can decompose the output, which will certainly be
1584                      operand 0, but not the inputs.  */
1585 
1586                 if (cmi == SIMPLE_MOVE
1587                       && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1588                     {
1589                       gcc_assert (i == 0);
1590                       cmi = NOT_SIMPLE_MOVE;
1591                     }
1592               }
1593           }
1594     }
1595 
1596   bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1597   if (!bitmap_empty_p (decomposable_context))
1598     {
1599       unsigned int i;
1600       sbitmap_iterator sbi;
1601       bitmap_iterator iter;
1602       unsigned int regno;
1603 
1604       propagate_pseudo_copies ();
1605 
1606       auto_sbitmap sub_blocks (last_basic_block_for_fn (cfun));
1607       bitmap_clear (sub_blocks);
1608 
1609       EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1610           decompose_register (regno);
1611 
1612       FOR_EACH_BB_FN (bb, cfun)
1613           {
1614             rtx_insn *insn;
1615 
1616             FOR_BB_INSNS (bb, insn)
1617               {
1618                 rtx pat;
1619 
1620                 if (!INSN_P (insn))
1621                     continue;
1622 
1623                 pat = PATTERN (insn);
1624                 if (GET_CODE (pat) == CLOBBER)
1625                     resolve_clobber (pat, insn);
1626                 else if (GET_CODE (pat) == USE)
1627                     resolve_use (pat, insn);
1628                 else if (DEBUG_INSN_P (insn))
1629                     resolve_debug (insn);
1630                 else
1631                     {
1632                       rtx set;
1633                       int i;
1634 
1635                       recog_memoized (insn);
1636                       extract_insn (insn);
1637 
1638                       set = simple_move (insn, speed_p);
1639                       if (set)
1640                         {
1641                           rtx_insn *orig_insn = insn;
1642                           bool cfi = control_flow_insn_p (insn);
1643 
1644                           /* We can end up splitting loads to multi-word pseudos
1645                                into separate loads to machine word size pseudos.
1646                                When this happens, we first had one load that can
1647                                throw, and after resolve_simple_move we'll have a
1648                                bunch of loads (at least two).  All those loads may
1649                                trap if we can have non-call exceptions, so they
1650                                all will end the current basic block.  We split the
1651                                block after the outer loop over all insns, but we
1652                                make sure here that we will be able to split the
1653                                basic block and still produce the correct control
1654                                flow graph for it.  */
1655                           gcc_assert (!cfi
1656                                           || (cfun->can_throw_non_call_exceptions
1657                                               && can_throw_internal (insn)));
1658 
1659                           insn = resolve_simple_move (set, insn);
1660                           if (insn != orig_insn)
1661                               {
1662                                 recog_memoized (insn);
1663                                 extract_insn (insn);
1664 
1665                                 if (cfi)
1666                                   bitmap_set_bit (sub_blocks, bb->index);
1667                               }
1668                         }
1669                       else
1670                         {
1671                           rtx_insn *decomposed_shift;
1672 
1673                           decomposed_shift = resolve_shift_zext (insn);
1674                           if (decomposed_shift != NULL_RTX)
1675                               {
1676                                 insn = decomposed_shift;
1677                                 recog_memoized (insn);
1678                                 extract_insn (insn);
1679                               }
1680                         }
1681 
1682                       for (i = recog_data.n_operands - 1; i >= 0; --i)
1683                         resolve_subreg_use (recog_data.operand_loc[i], insn);
1684 
1685                       resolve_reg_notes (insn);
1686 
1687                       if (num_validated_changes () > 0)
1688                         {
1689                           for (i = recog_data.n_dups - 1; i >= 0; --i)
1690                               {
1691                                 rtx *pl = recog_data.dup_loc[i];
1692                                 int dup_num = recog_data.dup_num[i];
1693                                 rtx *px = recog_data.operand_loc[dup_num];
1694 
1695                                 validate_unshare_change (insn, pl, *px, 1);
1696                               }
1697 
1698                           i = apply_change_group ();
1699                           gcc_assert (i);
1700                         }
1701                     }
1702               }
1703           }
1704 
1705       /* If we had insns to split that caused control flow insns in the middle
1706            of a basic block, split those blocks now.  Note that we only handle
1707            the case where splitting a load has caused multiple possibly trapping
1708            loads to appear.  */
1709       EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi)
1710           {
1711             rtx_insn *insn, *end;
1712             edge fallthru;
1713 
1714             bb = BASIC_BLOCK_FOR_FN (cfun, i);
1715             insn = BB_HEAD (bb);
1716             end = BB_END (bb);
1717 
1718             while (insn != end)
1719               {
1720                 if (control_flow_insn_p (insn))
1721                     {
1722                       /* Split the block after insn.  There will be a fallthru
1723                          edge, which is OK so we keep it.  We have to create the
1724                          exception edges ourselves.  */
1725                       fallthru = split_block (bb, insn);
1726                       rtl_make_eh_edge (NULL, bb, BB_END (bb));
1727                       bb = fallthru->dest;
1728                       insn = BB_HEAD (bb);
1729                     }
1730                 else
1731                   insn = NEXT_INSN (insn);
1732               }
1733           }
1734     }
1735 
1736   for (bitmap b : reg_copy_graph)
1737     if (b)
1738       BITMAP_FREE (b);
1739 
1740   reg_copy_graph.release ();
1741 
1742   BITMAP_FREE (decomposable_context);
1743   BITMAP_FREE (non_decomposable_context);
1744   BITMAP_FREE (subreg_context);
1745 }
1746 
1747 /* Implement first lower subreg pass.  */
1748 
1749 namespace {
1750 
1751 const pass_data pass_data_lower_subreg =
1752 {
1753   RTL_PASS, /* type */
1754   "subreg1", /* name */
1755   OPTGROUP_NONE, /* optinfo_flags */
1756   TV_LOWER_SUBREG, /* tv_id */
1757   0, /* properties_required */
1758   0, /* properties_provided */
1759   0, /* properties_destroyed */
1760   0, /* todo_flags_start */
1761   0, /* todo_flags_finish */
1762 };
1763 
1764 class pass_lower_subreg : public rtl_opt_pass
1765 {
1766 public:
pass_lower_subreg(gcc::context * ctxt)1767   pass_lower_subreg (gcc::context *ctxt)
1768     : rtl_opt_pass (pass_data_lower_subreg, ctxt)
1769   {}
1770 
1771   /* opt_pass methods: */
gate(function *)1772   virtual bool gate (function *) { return flag_split_wide_types != 0; }
execute(function *)1773   virtual unsigned int execute (function *)
1774     {
1775       decompose_multiword_subregs (false);
1776       return 0;
1777     }
1778 
1779 }; // class pass_lower_subreg
1780 
1781 } // anon namespace
1782 
1783 rtl_opt_pass *
make_pass_lower_subreg(gcc::context * ctxt)1784 make_pass_lower_subreg (gcc::context *ctxt)
1785 {
1786   return new pass_lower_subreg (ctxt);
1787 }
1788 
1789 /* Implement second lower subreg pass.  */
1790 
1791 namespace {
1792 
1793 const pass_data pass_data_lower_subreg2 =
1794 {
1795   RTL_PASS, /* type */
1796   "subreg2", /* name */
1797   OPTGROUP_NONE, /* optinfo_flags */
1798   TV_LOWER_SUBREG, /* tv_id */
1799   0, /* properties_required */
1800   0, /* properties_provided */
1801   0, /* properties_destroyed */
1802   0, /* todo_flags_start */
1803   TODO_df_finish, /* todo_flags_finish */
1804 };
1805 
1806 class pass_lower_subreg2 : public rtl_opt_pass
1807 {
1808 public:
pass_lower_subreg2(gcc::context * ctxt)1809   pass_lower_subreg2 (gcc::context *ctxt)
1810     : rtl_opt_pass (pass_data_lower_subreg2, ctxt)
1811   {}
1812 
1813   /* opt_pass methods: */
gate(function *)1814   virtual bool gate (function *) { return flag_split_wide_types
1815                                                     && flag_split_wide_types_early; }
execute(function *)1816   virtual unsigned int execute (function *)
1817     {
1818       decompose_multiword_subregs (true);
1819       return 0;
1820     }
1821 
1822 }; // class pass_lower_subreg2
1823 
1824 } // anon namespace
1825 
1826 rtl_opt_pass *
make_pass_lower_subreg2(gcc::context * ctxt)1827 make_pass_lower_subreg2 (gcc::context *ctxt)
1828 {
1829   return new pass_lower_subreg2 (ctxt);
1830 }
1831 
1832 /* Implement third lower subreg pass.  */
1833 
1834 namespace {
1835 
1836 const pass_data pass_data_lower_subreg3 =
1837 {
1838   RTL_PASS, /* type */
1839   "subreg3", /* name */
1840   OPTGROUP_NONE, /* optinfo_flags */
1841   TV_LOWER_SUBREG, /* tv_id */
1842   0, /* properties_required */
1843   0, /* properties_provided */
1844   0, /* properties_destroyed */
1845   0, /* todo_flags_start */
1846   TODO_df_finish, /* todo_flags_finish */
1847 };
1848 
1849 class pass_lower_subreg3 : public rtl_opt_pass
1850 {
1851 public:
pass_lower_subreg3(gcc::context * ctxt)1852   pass_lower_subreg3 (gcc::context *ctxt)
1853     : rtl_opt_pass (pass_data_lower_subreg3, ctxt)
1854   {}
1855 
1856   /* opt_pass methods: */
gate(function *)1857   virtual bool gate (function *) { return flag_split_wide_types; }
execute(function *)1858   virtual unsigned int execute (function *)
1859     {
1860       decompose_multiword_subregs (true);
1861       return 0;
1862     }
1863 
1864 }; // class pass_lower_subreg3
1865 
1866 } // anon namespace
1867 
1868 rtl_opt_pass *
make_pass_lower_subreg3(gcc::context * ctxt)1869 make_pass_lower_subreg3 (gcc::context *ctxt)
1870 {
1871   return new pass_lower_subreg3 (ctxt);
1872 }
1873