1 /* GIMPLE store merging and byte swapping passes.
2    Copyright (C) 2009-2022 Free Software Foundation, Inc.
3    Contributed by ARM Ltd.
4 
5    This file is part of GCC.
6 
7    GCC is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11 
12    GCC is distributed in the hope that it will be useful, but
13    WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15    General Public License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with GCC; see the file COPYING3.  If not see
19    <http://www.gnu.org/licenses/>.  */
20 
21 /* The purpose of the store merging pass is to combine multiple memory stores
22    of constant values, values loaded from memory, bitwise operations on those,
23    or bit-field values, to consecutive locations, into fewer wider stores.
24 
25    For example, if we have a sequence peforming four byte stores to
26    consecutive memory locations:
27    [p     ] := imm1;
28    [p + 1B] := imm2;
29    [p + 2B] := imm3;
30    [p + 3B] := imm4;
31    we can transform this into a single 4-byte store if the target supports it:
32    [p] := imm1:imm2:imm3:imm4 concatenated according to endianness.
33 
34    Or:
35    [p     ] := [q     ];
36    [p + 1B] := [q + 1B];
37    [p + 2B] := [q + 2B];
38    [p + 3B] := [q + 3B];
39    if there is no overlap can be transformed into a single 4-byte
40    load followed by single 4-byte store.
41 
42    Or:
43    [p     ] := [q     ] ^ imm1;
44    [p + 1B] := [q + 1B] ^ imm2;
45    [p + 2B] := [q + 2B] ^ imm3;
46    [p + 3B] := [q + 3B] ^ imm4;
47    if there is no overlap can be transformed into a single 4-byte
48    load, xored with imm1:imm2:imm3:imm4 and stored using a single 4-byte store.
49 
50    Or:
51    [p:1 ] := imm;
52    [p:31] := val & 0x7FFFFFFF;
53    we can transform this into a single 4-byte store if the target supports it:
54    [p] := imm:(val & 0x7FFFFFFF) concatenated according to endianness.
55 
56    The algorithm is applied to each basic block in three phases:
57 
58    1) Scan through the basic block and record assignments to destinations
59    that can be expressed as a store to memory of a certain size at a certain
60    bit offset from base expressions we can handle.  For bit-fields we also
61    record the surrounding bit region, i.e. bits that could be stored in
62    a read-modify-write operation when storing the bit-field.  Record store
63    chains to different bases in a hash_map (m_stores) and make sure to
64    terminate such chains when appropriate (for example when the stored
65    values get used subsequently).
66    These stores can be a result of structure element initializers, array stores
67    etc.  A store_immediate_info object is recorded for every such store.
68    Record as many such assignments to a single base as possible until a
69    statement that interferes with the store sequence is encountered.
70    Each store has up to 2 operands, which can be a either constant, a memory
71    load or an SSA name, from which the value to be stored can be computed.
72    At most one of the operands can be a constant.  The operands are recorded
73    in store_operand_info struct.
74 
75    2) Analyze the chains of stores recorded in phase 1) (i.e. the vector of
76    store_immediate_info objects) and coalesce contiguous stores into
77    merged_store_group objects.  For bit-field stores, we don't need to
78    require the stores to be contiguous, just their surrounding bit regions
79    have to be contiguous.  If the expression being stored is different
80    between adjacent stores, such as one store storing a constant and
81    following storing a value loaded from memory, or if the loaded memory
82    objects are not adjacent, a new merged_store_group is created as well.
83 
84    For example, given the stores:
85    [p     ] := 0;
86    [p + 1B] := 1;
87    [p + 3B] := 0;
88    [p + 4B] := 1;
89    [p + 5B] := 0;
90    [p + 6B] := 0;
91    This phase would produce two merged_store_group objects, one recording the
92    two bytes stored in the memory region [p : p + 1] and another
93    recording the four bytes stored in the memory region [p + 3 : p + 6].
94 
95    3) The merged_store_group objects produced in phase 2) are processed
96    to generate the sequence of wider stores that set the contiguous memory
97    regions to the sequence of bytes that correspond to it.  This may emit
98    multiple stores per store group to handle contiguous stores that are not
99    of a size that is a power of 2.  For example it can try to emit a 40-bit
100    store as a 32-bit store followed by an 8-bit store.
101    We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT
102    or TARGET_SLOW_UNALIGNED_ACCESS settings.
103 
104    Note on endianness and example:
105    Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores:
106    [p     ] := 0x1234;
107    [p + 2B] := 0x5678;
108    [p + 4B] := 0xab;
109    [p + 5B] := 0xcd;
110 
111    The memory layout for little-endian (LE) and big-endian (BE) must be:
112   p |LE|BE|
113   ---------
114   0 |34|12|
115   1 |12|34|
116   2 |78|56|
117   3 |56|78|
118   4 |ab|ab|
119   5 |cd|cd|
120 
121   To merge these into a single 48-bit merged value 'val' in phase 2)
122   on little-endian we insert stores to higher (consecutive) bitpositions
123   into the most significant bits of the merged value.
124   The final merged value would be: 0xcdab56781234
125 
126   For big-endian we insert stores to higher bitpositions into the least
127   significant bits of the merged value.
128   The final merged value would be: 0x12345678abcd
129 
130   Then, in phase 3), we want to emit this 48-bit value as a 32-bit store
131   followed by a 16-bit store.  Again, we must consider endianness when
132   breaking down the 48-bit value 'val' computed above.
133   For little endian we emit:
134   [p]      (32-bit) := 0x56781234; // val & 0x0000ffffffff;
135   [p + 4B] (16-bit) := 0xcdab;    // (val & 0xffff00000000) >> 32;
136 
137   Whereas for big-endian we emit:
138   [p]      (32-bit) := 0x12345678; // (val & 0xffffffff0000) >> 16;
139   [p + 4B] (16-bit) := 0xabcd;     //  val & 0x00000000ffff;  */
140 
141 #include "config.h"
142 #include "system.h"
143 #include "coretypes.h"
144 #include "backend.h"
145 #include "tree.h"
146 #include "gimple.h"
147 #include "builtins.h"
148 #include "fold-const.h"
149 #include "tree-pass.h"
150 #include "ssa.h"
151 #include "gimple-pretty-print.h"
152 #include "alias.h"
153 #include "fold-const.h"
154 #include "print-tree.h"
155 #include "tree-hash-traits.h"
156 #include "gimple-iterator.h"
157 #include "gimplify.h"
158 #include "gimple-fold.h"
159 #include "stor-layout.h"
160 #include "timevar.h"
161 #include "cfganal.h"
162 #include "cfgcleanup.h"
163 #include "tree-cfg.h"
164 #include "except.h"
165 #include "tree-eh.h"
166 #include "target.h"
167 #include "gimplify-me.h"
168 #include "rtl.h"
169 #include "expr.h"   /* For get_bit_range.  */
170 #include "optabs-tree.h"
171 #include "dbgcnt.h"
172 #include "selftest.h"
173 
174 /* The maximum size (in bits) of the stores this pass should generate.  */
175 #define MAX_STORE_BITSIZE (BITS_PER_WORD)
176 #define MAX_STORE_BYTES (MAX_STORE_BITSIZE / BITS_PER_UNIT)
177 
178 /* Limit to bound the number of aliasing checks for loads with the same
179    vuse as the corresponding store.  */
180 #define MAX_STORE_ALIAS_CHECKS 64
181 
182 namespace {
183 
184 struct bswap_stat
185 {
186   /* Number of hand-written 16-bit nop / bswaps found.  */
187   int found_16bit;
188 
189   /* Number of hand-written 32-bit nop / bswaps found.  */
190   int found_32bit;
191 
192   /* Number of hand-written 64-bit nop / bswaps found.  */
193   int found_64bit;
194 } nop_stats, bswap_stats;
195 
196 /* A symbolic number structure is used to detect byte permutation and selection
197    patterns of a source.  To achieve that, its field N contains an artificial
198    number consisting of BITS_PER_MARKER sized markers tracking where does each
199    byte come from in the source:
200 
201    0         - target byte has the value 0
202    FF        - target byte has an unknown value (eg. due to sign extension)
203    1..size - marker value is the byte index in the source (0 for lsb).
204 
205    To detect permutations on memory sources (arrays and structures), a symbolic
206    number is also associated:
207    - a base address BASE_ADDR and an OFFSET giving the address of the source;
208    - a range which gives the difference between the highest and lowest accessed
209      memory location to make such a symbolic number;
210    - the address SRC of the source element of lowest address as a convenience
211      to easily get BASE_ADDR + offset + lowest bytepos;
212    - number of expressions N_OPS bitwise ored together to represent
213      approximate cost of the computation.
214 
215    Note 1: the range is different from size as size reflects the size of the
216    type of the current expression.  For instance, for an array char a[],
217    (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while
218    (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this
219    time a range of 1.
220 
221    Note 2: for non-memory sources, range holds the same value as size.
222 
223    Note 3: SRC points to the SSA_NAME in case of non-memory source.  */
224 
225 struct symbolic_number {
226   uint64_t n;
227   tree type;
228   tree base_addr;
229   tree offset;
230   poly_int64_pod bytepos;
231   tree src;
232   tree alias_set;
233   tree vuse;
234   unsigned HOST_WIDE_INT range;
235   int n_ops;
236 };
237 
238 #define BITS_PER_MARKER 8
239 #define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
240 #define MARKER_BYTE_UNKNOWN MARKER_MASK
241 #define HEAD_MARKER(n, size) \
242   ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
243 
244 /* The number which the find_bswap_or_nop_1 result should match in
245    order to have a nop.  The number is masked according to the size of
246    the symbolic number before using it.  */
247 #define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
248   (uint64_t)0x08070605 << 32 | 0x04030201)
249 
250 /* The number which the find_bswap_or_nop_1 result should match in
251    order to have a byte swap.  The number is masked according to the
252    size of the symbolic number before using it.  */
253 #define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
254   (uint64_t)0x01020304 << 32 | 0x05060708)
255 
256 /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
257    number N.  Return false if the requested operation is not permitted
258    on a symbolic number.  */
259 
260 inline bool
do_shift_rotate(enum tree_code code,struct symbolic_number * n,int count)261 do_shift_rotate (enum tree_code code,
262                      struct symbolic_number *n,
263                      int count)
264 {
265   int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
266   uint64_t head_marker;
267 
268   if (count < 0
269       || count >= TYPE_PRECISION (n->type)
270       || count % BITS_PER_UNIT != 0)
271     return false;
272   count = (count / BITS_PER_UNIT) * BITS_PER_MARKER;
273 
274   /* Zero out the extra bits of N in order to avoid them being shifted
275      into the significant bits.  */
276   if (size < 64 / BITS_PER_MARKER)
277     n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
278 
279   switch (code)
280     {
281     case LSHIFT_EXPR:
282       n->n <<= count;
283       break;
284     case RSHIFT_EXPR:
285       head_marker = HEAD_MARKER (n->n, size);
286       n->n >>= count;
287       /* Arithmetic shift of signed type: result is dependent on the value.  */
288       if (!TYPE_UNSIGNED (n->type) && head_marker)
289           for (i = 0; i < count / BITS_PER_MARKER; i++)
290             n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
291                       << ((size - 1 - i) * BITS_PER_MARKER);
292       break;
293     case LROTATE_EXPR:
294       n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count));
295       break;
296     case RROTATE_EXPR:
297       n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count));
298       break;
299     default:
300       return false;
301     }
302   /* Zero unused bits for size.  */
303   if (size < 64 / BITS_PER_MARKER)
304     n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
305   return true;
306 }
307 
308 /* Perform sanity checking for the symbolic number N and the gimple
309    statement STMT.  */
310 
311 inline bool
verify_symbolic_number_p(struct symbolic_number * n,gimple * stmt)312 verify_symbolic_number_p (struct symbolic_number *n, gimple *stmt)
313 {
314   tree lhs_type;
315 
316   lhs_type = TREE_TYPE (gimple_get_lhs (stmt));
317 
318   if (TREE_CODE (lhs_type) != INTEGER_TYPE
319       && TREE_CODE (lhs_type) != ENUMERAL_TYPE)
320     return false;
321 
322   if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
323     return false;
324 
325   return true;
326 }
327 
328 /* Initialize the symbolic number N for the bswap pass from the base element
329    SRC manipulated by the bitwise OR expression.  */
330 
331 bool
init_symbolic_number(struct symbolic_number * n,tree src)332 init_symbolic_number (struct symbolic_number *n, tree src)
333 {
334   int size;
335 
336   if (!INTEGRAL_TYPE_P (TREE_TYPE (src)) && !POINTER_TYPE_P (TREE_TYPE (src)))
337     return false;
338 
339   n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
340   n->src = src;
341 
342   /* Set up the symbolic number N by setting each byte to a value between 1 and
343      the byte size of rhs1.  The highest order byte is set to n->size and the
344      lowest order byte to 1.  */
345   n->type = TREE_TYPE (src);
346   size = TYPE_PRECISION (n->type);
347   if (size % BITS_PER_UNIT != 0)
348     return false;
349   size /= BITS_PER_UNIT;
350   if (size > 64 / BITS_PER_MARKER)
351     return false;
352   n->range = size;
353   n->n = CMPNOP;
354   n->n_ops = 1;
355 
356   if (size < 64 / BITS_PER_MARKER)
357     n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
358 
359   return true;
360 }
361 
362 /* Check if STMT might be a byte swap or a nop from a memory source and returns
363    the answer. If so, REF is that memory source and the base of the memory area
364    accessed and the offset of the access from that base are recorded in N.  */
365 
366 bool
find_bswap_or_nop_load(gimple * stmt,tree ref,struct symbolic_number * n)367 find_bswap_or_nop_load (gimple *stmt, tree ref, struct symbolic_number *n)
368 {
369   /* Leaf node is an array or component ref. Memorize its base and
370      offset from base to compare to other such leaf node.  */
371   poly_int64 bitsize, bitpos, bytepos;
372   machine_mode mode;
373   int unsignedp, reversep, volatilep;
374   tree offset, base_addr;
375 
376   /* Not prepared to handle PDP endian.  */
377   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
378     return false;
379 
380   if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
381     return false;
382 
383   base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
384                                            &unsignedp, &reversep, &volatilep);
385 
386   if (TREE_CODE (base_addr) == TARGET_MEM_REF)
387     /* Do not rewrite TARGET_MEM_REF.  */
388     return false;
389   else if (TREE_CODE (base_addr) == MEM_REF)
390     {
391       poly_offset_int bit_offset = 0;
392       tree off = TREE_OPERAND (base_addr, 1);
393 
394       if (!integer_zerop (off))
395           {
396             poly_offset_int boff = mem_ref_offset (base_addr);
397             boff <<= LOG2_BITS_PER_UNIT;
398             bit_offset += boff;
399           }
400 
401       base_addr = TREE_OPERAND (base_addr, 0);
402 
403       /* Avoid returning a negative bitpos as this may wreak havoc later.  */
404       if (maybe_lt (bit_offset, 0))
405           {
406             tree byte_offset = wide_int_to_tree
407               (sizetype, bits_to_bytes_round_down (bit_offset));
408             bit_offset = num_trailing_bits (bit_offset);
409             if (offset)
410               offset = size_binop (PLUS_EXPR, offset, byte_offset);
411             else
412               offset = byte_offset;
413           }
414 
415       bitpos += bit_offset.force_shwi ();
416     }
417   else
418     base_addr = build_fold_addr_expr (base_addr);
419 
420   if (!multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
421     return false;
422   if (!multiple_p (bitsize, BITS_PER_UNIT))
423     return false;
424   if (reversep)
425     return false;
426 
427   if (!init_symbolic_number (n, ref))
428     return false;
429   n->base_addr = base_addr;
430   n->offset = offset;
431   n->bytepos = bytepos;
432   n->alias_set = reference_alias_ptr_type (ref);
433   n->vuse = gimple_vuse (stmt);
434   return true;
435 }
436 
437 /* Compute the symbolic number N representing the result of a bitwise OR,
438    bitwise XOR or plus on 2 symbolic number N1 and N2 whose source statements
439    are respectively SOURCE_STMT1 and SOURCE_STMT2.  CODE is the operation.  */
440 
441 gimple *
perform_symbolic_merge(gimple * source_stmt1,struct symbolic_number * n1,gimple * source_stmt2,struct symbolic_number * n2,struct symbolic_number * n,enum tree_code code)442 perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1,
443                               gimple *source_stmt2, struct symbolic_number *n2,
444                               struct symbolic_number *n, enum tree_code code)
445 {
446   int i, size;
447   uint64_t mask;
448   gimple *source_stmt;
449   struct symbolic_number *n_start;
450 
451   tree rhs1 = gimple_assign_rhs1 (source_stmt1);
452   if (TREE_CODE (rhs1) == BIT_FIELD_REF
453       && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
454     rhs1 = TREE_OPERAND (rhs1, 0);
455   tree rhs2 = gimple_assign_rhs1 (source_stmt2);
456   if (TREE_CODE (rhs2) == BIT_FIELD_REF
457       && TREE_CODE (TREE_OPERAND (rhs2, 0)) == SSA_NAME)
458     rhs2 = TREE_OPERAND (rhs2, 0);
459 
460   /* Sources are different, cancel bswap if they are not memory location with
461      the same base (array, structure, ...).  */
462   if (rhs1 != rhs2)
463     {
464       uint64_t inc;
465       HOST_WIDE_INT start1, start2, start_sub, end_sub, end1, end2, end;
466       struct symbolic_number *toinc_n_ptr, *n_end;
467       basic_block bb1, bb2;
468 
469       if (!n1->base_addr || !n2->base_addr
470             || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
471           return NULL;
472 
473       if (!n1->offset != !n2->offset
474             || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
475           return NULL;
476 
477       start1 = 0;
478       if (!(n2->bytepos - n1->bytepos).is_constant (&start2))
479           return NULL;
480 
481       if (start1 < start2)
482           {
483             n_start = n1;
484             start_sub = start2 - start1;
485           }
486       else
487           {
488             n_start = n2;
489             start_sub = start1 - start2;
490           }
491 
492       bb1 = gimple_bb (source_stmt1);
493       bb2 = gimple_bb (source_stmt2);
494       if (dominated_by_p (CDI_DOMINATORS, bb1, bb2))
495           source_stmt = source_stmt1;
496       else
497           source_stmt = source_stmt2;
498 
499       /* Find the highest address at which a load is performed and
500            compute related info.  */
501       end1 = start1 + (n1->range - 1);
502       end2 = start2 + (n2->range - 1);
503       if (end1 < end2)
504           {
505             end = end2;
506             end_sub = end2 - end1;
507           }
508       else
509           {
510             end = end1;
511             end_sub = end1 - end2;
512           }
513       n_end = (end2 > end1) ? n2 : n1;
514 
515       /* Find symbolic number whose lsb is the most significant.  */
516       if (BYTES_BIG_ENDIAN)
517           toinc_n_ptr = (n_end == n1) ? n2 : n1;
518       else
519           toinc_n_ptr = (n_start == n1) ? n2 : n1;
520 
521       n->range = end - MIN (start1, start2) + 1;
522 
523       /* Check that the range of memory covered can be represented by
524            a symbolic number.  */
525       if (n->range > 64 / BITS_PER_MARKER)
526           return NULL;
527 
528       /* Reinterpret byte marks in symbolic number holding the value of
529            bigger weight according to target endianness.  */
530       inc = BYTES_BIG_ENDIAN ? end_sub : start_sub;
531       size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT;
532       for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
533           {
534             unsigned marker
535               = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
536             if (marker && marker != MARKER_BYTE_UNKNOWN)
537               toinc_n_ptr->n += inc;
538           }
539     }
540   else
541     {
542       n->range = n1->range;
543       n_start = n1;
544       source_stmt = source_stmt1;
545     }
546 
547   if (!n1->alias_set
548       || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set))
549     n->alias_set = n1->alias_set;
550   else
551     n->alias_set = ptr_type_node;
552   n->vuse = n_start->vuse;
553   n->base_addr = n_start->base_addr;
554   n->offset = n_start->offset;
555   n->src = n_start->src;
556   n->bytepos = n_start->bytepos;
557   n->type = n_start->type;
558   size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
559   uint64_t res_n = n1->n | n2->n;
560 
561   for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER)
562     {
563       uint64_t masked1, masked2;
564 
565       masked1 = n1->n & mask;
566       masked2 = n2->n & mask;
567       /* If at least one byte is 0, all of 0 | x == 0 ^ x == 0 + x == x.  */
568       if (masked1 && masked2)
569           {
570             /* + can carry into upper bits, just punt.  */
571             if (code == PLUS_EXPR)
572               return NULL;
573             /* x | x is still x.  */
574             if (code == BIT_IOR_EXPR && masked1 == masked2)
575               continue;
576             if (code == BIT_XOR_EXPR)
577               {
578                 /* x ^ x is 0, but MARKER_BYTE_UNKNOWN stands for
579                      unknown values and unknown ^ unknown is unknown.  */
580                 if (masked1 == masked2
581                       && masked1 != ((uint64_t) MARKER_BYTE_UNKNOWN
582                                          << i * BITS_PER_MARKER))
583                     {
584                       res_n &= ~mask;
585                       continue;
586                     }
587               }
588             /* Otherwise set the byte to unknown, it might still be
589                later masked off.  */
590             res_n |= mask;
591           }
592     }
593   n->n = res_n;
594   n->n_ops = n1->n_ops + n2->n_ops;
595 
596   return source_stmt;
597 }
598 
599 /* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
600    the operation given by the rhs of STMT on the result.  If the operation
601    could successfully be executed the function returns a gimple stmt whose
602    rhs's first tree is the expression of the source operand and NULL
603    otherwise.  */
604 
605 gimple *
find_bswap_or_nop_1(gimple * stmt,struct symbolic_number * n,int limit)606 find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit)
607 {
608   enum tree_code code;
609   tree rhs1, rhs2 = NULL;
610   gimple *rhs1_stmt, *rhs2_stmt, *source_stmt1;
611   enum gimple_rhs_class rhs_class;
612 
613   if (!limit || !is_gimple_assign (stmt))
614     return NULL;
615 
616   rhs1 = gimple_assign_rhs1 (stmt);
617 
618   if (find_bswap_or_nop_load (stmt, rhs1, n))
619     return stmt;
620 
621   /* Handle BIT_FIELD_REF.  */
622   if (TREE_CODE (rhs1) == BIT_FIELD_REF
623       && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
624     {
625       if (!tree_fits_uhwi_p (TREE_OPERAND (rhs1, 1))
626             || !tree_fits_uhwi_p (TREE_OPERAND (rhs1, 2)))
627           return NULL;
628 
629       unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs1, 1));
630       unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs1, 2));
631       if (bitpos % BITS_PER_UNIT == 0
632             && bitsize % BITS_PER_UNIT == 0
633             && init_symbolic_number (n, TREE_OPERAND (rhs1, 0)))
634           {
635             /* Handle big-endian bit numbering in BIT_FIELD_REF.  */
636             if (BYTES_BIG_ENDIAN)
637               bitpos = TYPE_PRECISION (n->type) - bitpos - bitsize;
638 
639             /* Shift.  */
640             if (!do_shift_rotate (RSHIFT_EXPR, n, bitpos))
641               return NULL;
642 
643             /* Mask.  */
644             uint64_t mask = 0;
645             uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
646             for (unsigned i = 0; i < bitsize / BITS_PER_UNIT;
647                  i++, tmp <<= BITS_PER_UNIT)
648               mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
649             n->n &= mask;
650 
651             /* Convert.  */
652             n->type = TREE_TYPE (rhs1);
653             if (!n->base_addr)
654               n->range = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
655 
656             return verify_symbolic_number_p (n, stmt) ? stmt : NULL;
657           }
658 
659       return NULL;
660     }
661 
662   if (TREE_CODE (rhs1) != SSA_NAME)
663     return NULL;
664 
665   code = gimple_assign_rhs_code (stmt);
666   rhs_class = gimple_assign_rhs_class (stmt);
667   rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
668 
669   if (rhs_class == GIMPLE_BINARY_RHS)
670     rhs2 = gimple_assign_rhs2 (stmt);
671 
672   /* Handle unary rhs and binary rhs with integer constants as second
673      operand.  */
674 
675   if (rhs_class == GIMPLE_UNARY_RHS
676       || (rhs_class == GIMPLE_BINARY_RHS
677             && TREE_CODE (rhs2) == INTEGER_CST))
678     {
679       if (code != BIT_AND_EXPR
680             && code != LSHIFT_EXPR
681             && code != RSHIFT_EXPR
682             && code != LROTATE_EXPR
683             && code != RROTATE_EXPR
684             && !CONVERT_EXPR_CODE_P (code))
685           return NULL;
686 
687       source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
688 
689       /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
690            we have to initialize the symbolic number.  */
691       if (!source_stmt1)
692           {
693             if (gimple_assign_load_p (stmt)
694                 || !init_symbolic_number (n, rhs1))
695               return NULL;
696             source_stmt1 = stmt;
697           }
698 
699       switch (code)
700           {
701           case BIT_AND_EXPR:
702             {
703               int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
704               uint64_t val = int_cst_value (rhs2), mask = 0;
705               uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
706 
707               /* Only constants masking full bytes are allowed.  */
708               for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT)
709                 if ((val & tmp) != 0 && (val & tmp) != tmp)
710                     return NULL;
711                 else if (val & tmp)
712                     mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
713 
714               n->n &= mask;
715             }
716             break;
717           case LSHIFT_EXPR:
718           case RSHIFT_EXPR:
719           case LROTATE_EXPR:
720           case RROTATE_EXPR:
721             if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2)))
722               return NULL;
723             break;
724           CASE_CONVERT:
725             {
726               int i, type_size, old_type_size;
727               tree type;
728 
729               type = TREE_TYPE (gimple_assign_lhs (stmt));
730               type_size = TYPE_PRECISION (type);
731               if (type_size % BITS_PER_UNIT != 0)
732                 return NULL;
733               type_size /= BITS_PER_UNIT;
734               if (type_size > 64 / BITS_PER_MARKER)
735                 return NULL;
736 
737               /* Sign extension: result is dependent on the value.  */
738               old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
739               if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size
740                     && HEAD_MARKER (n->n, old_type_size))
741                 for (i = 0; i < type_size - old_type_size; i++)
742                     n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
743                               << ((type_size - 1 - i) * BITS_PER_MARKER);
744 
745               if (type_size < 64 / BITS_PER_MARKER)
746                 {
747                     /* If STMT casts to a smaller type mask out the bits not
748                        belonging to the target type.  */
749                     n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1;
750                 }
751               n->type = type;
752               if (!n->base_addr)
753                 n->range = type_size;
754             }
755             break;
756           default:
757             return NULL;
758           };
759       return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL;
760     }
761 
762   /* Handle binary rhs.  */
763 
764   if (rhs_class == GIMPLE_BINARY_RHS)
765     {
766       struct symbolic_number n1, n2;
767       gimple *source_stmt, *source_stmt2;
768 
769       if (!rhs2 || TREE_CODE (rhs2) != SSA_NAME)
770           return NULL;
771 
772       rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
773 
774       switch (code)
775           {
776           case BIT_IOR_EXPR:
777           case BIT_XOR_EXPR:
778           case PLUS_EXPR:
779             source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
780 
781             if (!source_stmt1)
782               return NULL;
783 
784             source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
785 
786             if (!source_stmt2)
787               return NULL;
788 
789             if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
790               return NULL;
791 
792             if (n1.vuse != n2.vuse)
793               return NULL;
794 
795             source_stmt
796               = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n,
797                                               code);
798 
799             if (!source_stmt)
800               return NULL;
801 
802             if (!verify_symbolic_number_p (n, stmt))
803               return NULL;
804 
805             break;
806           default:
807             return NULL;
808           }
809       return source_stmt;
810     }
811   return NULL;
812 }
813 
814 /* Helper for find_bswap_or_nop and try_coalesce_bswap to compute
815    *CMPXCHG, *CMPNOP and adjust *N.  */
816 
817 void
find_bswap_or_nop_finalize(struct symbolic_number * n,uint64_t * cmpxchg,uint64_t * cmpnop,bool * cast64_to_32)818 find_bswap_or_nop_finalize (struct symbolic_number *n, uint64_t *cmpxchg,
819                                   uint64_t *cmpnop, bool *cast64_to_32)
820 {
821   unsigned rsize;
822   uint64_t tmpn, mask;
823 
824   /* The number which the find_bswap_or_nop_1 result should match in order
825      to have a full byte swap.  The number is shifted to the right
826      according to the size of the symbolic number before using it.  */
827   *cmpxchg = CMPXCHG;
828   *cmpnop = CMPNOP;
829   *cast64_to_32 = false;
830 
831   /* Find real size of result (highest non-zero byte).  */
832   if (n->base_addr)
833     for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
834   else
835     rsize = n->range;
836 
837   /* Zero out the bits corresponding to untouched bytes in original gimple
838      expression.  */
839   if (n->range < (int) sizeof (int64_t))
840     {
841       mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
842       if (n->base_addr == NULL
843             && n->range == 4
844             && int_size_in_bytes (TREE_TYPE (n->src)) == 8)
845           {
846             /* If all bytes in n->n are either 0 or in [5..8] range, this
847                might be a candidate for (unsigned) __builtin_bswap64 (src).
848                It is not worth it for (unsigned short) __builtin_bswap64 (src)
849                or (unsigned short) __builtin_bswap32 (src).  */
850             *cast64_to_32 = true;
851             for (tmpn = n->n; tmpn; tmpn >>= BITS_PER_MARKER)
852               if ((tmpn & MARKER_MASK)
853                     && ((tmpn & MARKER_MASK) <= 4 || (tmpn & MARKER_MASK) > 8))
854                 {
855                     *cast64_to_32 = false;
856                     break;
857                 }
858           }
859       if (*cast64_to_32)
860           *cmpxchg &= mask;
861       else
862           *cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
863       *cmpnop &= mask;
864     }
865 
866   /* Zero out the bits corresponding to unused bytes in the result of the
867      gimple expression.  */
868   if (rsize < n->range)
869     {
870       if (BYTES_BIG_ENDIAN)
871           {
872             mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
873             *cmpxchg &= mask;
874             if (n->range - rsize == sizeof (int64_t))
875               *cmpnop = 0;
876             else
877               *cmpnop >>= (n->range - rsize) * BITS_PER_MARKER;
878           }
879       else
880           {
881             mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
882             if (n->range - rsize == sizeof (int64_t))
883               *cmpxchg = 0;
884             else
885               *cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER;
886             *cmpnop &= mask;
887           }
888       n->range = rsize;
889     }
890 
891   if (*cast64_to_32)
892     n->range = 8;
893   n->range *= BITS_PER_UNIT;
894 }
895 
896 /* Check if STMT completes a bswap implementation or a read in a given
897    endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
898    accordingly.  It also sets N to represent the kind of operations
899    performed: size of the resulting expression and whether it works on
900    a memory source, and if so alias-set and vuse.  At last, the
901    function returns a stmt whose rhs's first tree is the source
902    expression.  */
903 
904 gimple *
find_bswap_or_nop(gimple * stmt,struct symbolic_number * n,bool * bswap,bool * cast64_to_32,uint64_t * mask)905 find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap,
906                        bool *cast64_to_32, uint64_t *mask)
907 {
908   tree type_size = TYPE_SIZE_UNIT (TREE_TYPE (gimple_get_lhs (stmt)));
909   if (!tree_fits_uhwi_p (type_size))
910     return NULL;
911 
912   /* The last parameter determines the depth search limit.  It usually
913      correlates directly to the number n of bytes to be touched.  We
914      increase that number by 2 * (log2(n) + 1) here in order to also
915      cover signed -> unsigned conversions of the src operand as can be seen
916      in libgcc, and for initial shift/and operation of the src operand.  */
917   int limit = tree_to_uhwi (type_size);
918   limit += 2 * (1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit));
919   gimple *ins_stmt = find_bswap_or_nop_1 (stmt, n, limit);
920 
921   if (!ins_stmt)
922     {
923       if (gimple_assign_rhs_code (stmt) != CONSTRUCTOR
924             || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
925           return NULL;
926       unsigned HOST_WIDE_INT sz = tree_to_uhwi (type_size) * BITS_PER_UNIT;
927       if (sz != 16 && sz != 32 && sz != 64)
928           return NULL;
929       tree rhs = gimple_assign_rhs1 (stmt);
930       if (CONSTRUCTOR_NELTS (rhs) == 0)
931           return NULL;
932       tree eltype = TREE_TYPE (TREE_TYPE (rhs));
933       unsigned HOST_WIDE_INT eltsz
934           = int_size_in_bytes (eltype) * BITS_PER_UNIT;
935       if (TYPE_PRECISION (eltype) != eltsz)
936           return NULL;
937       constructor_elt *elt;
938       unsigned int i;
939       tree type = build_nonstandard_integer_type (sz, 1);
940       FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt)
941           {
942             if (TREE_CODE (elt->value) != SSA_NAME
943                 || !INTEGRAL_TYPE_P (TREE_TYPE (elt->value)))
944               return NULL;
945             struct symbolic_number n1;
946             gimple *source_stmt
947               = find_bswap_or_nop_1 (SSA_NAME_DEF_STMT (elt->value), &n1,
948                                            limit - 1);
949 
950             if (!source_stmt)
951               return NULL;
952 
953             n1.type = type;
954             if (!n1.base_addr)
955               n1.range = sz / BITS_PER_UNIT;
956 
957             if (i == 0)
958               {
959                 ins_stmt = source_stmt;
960                 *n = n1;
961               }
962             else
963               {
964                 if (n->vuse != n1.vuse)
965                     return NULL;
966 
967                 struct symbolic_number n0 = *n;
968 
969                 if (!BYTES_BIG_ENDIAN)
970                     {
971                       if (!do_shift_rotate (LSHIFT_EXPR, &n1, i * eltsz))
972                         return NULL;
973                     }
974                 else if (!do_shift_rotate (LSHIFT_EXPR, &n0, eltsz))
975                     return NULL;
976                 ins_stmt
977                     = perform_symbolic_merge (ins_stmt, &n0, source_stmt, &n1, n,
978                                                     BIT_IOR_EXPR);
979 
980                 if (!ins_stmt)
981                     return NULL;
982               }
983           }
984     }
985 
986   uint64_t cmpxchg, cmpnop;
987   find_bswap_or_nop_finalize (n, &cmpxchg, &cmpnop, cast64_to_32);
988 
989   /* A complete byte swap should make the symbolic number to start with
990      the largest digit in the highest order byte. Unchanged symbolic
991      number indicates a read with same endianness as target architecture.  */
992   *mask = ~(uint64_t) 0;
993   if (n->n == cmpnop)
994     *bswap = false;
995   else if (n->n == cmpxchg)
996     *bswap = true;
997   else
998     {
999       int set = 0;
1000       for (uint64_t msk = MARKER_MASK; msk; msk <<= BITS_PER_MARKER)
1001           if ((n->n & msk) == 0)
1002             *mask &= ~msk;
1003           else if ((n->n & msk) == (cmpxchg & msk))
1004             set++;
1005           else
1006             return NULL;
1007       if (set < 2)
1008           return NULL;
1009       *bswap = true;
1010     }
1011 
1012   /* Useless bit manipulation performed by code.  */
1013   if (!n->base_addr && n->n == cmpnop && n->n_ops == 1)
1014     return NULL;
1015 
1016   return ins_stmt;
1017 }
1018 
1019 const pass_data pass_data_optimize_bswap =
1020 {
1021   GIMPLE_PASS, /* type */
1022   "bswap", /* name */
1023   OPTGROUP_NONE, /* optinfo_flags */
1024   TV_NONE, /* tv_id */
1025   PROP_ssa, /* properties_required */
1026   0, /* properties_provided */
1027   0, /* properties_destroyed */
1028   0, /* todo_flags_start */
1029   0, /* todo_flags_finish */
1030 };
1031 
1032 class pass_optimize_bswap : public gimple_opt_pass
1033 {
1034 public:
pass_optimize_bswap(gcc::context * ctxt)1035   pass_optimize_bswap (gcc::context *ctxt)
1036     : gimple_opt_pass (pass_data_optimize_bswap, ctxt)
1037   {}
1038 
1039   /* opt_pass methods: */
gate(function *)1040   virtual bool gate (function *)
1041     {
1042       return flag_expensive_optimizations && optimize && BITS_PER_UNIT == 8;
1043     }
1044 
1045   virtual unsigned int execute (function *);
1046 
1047 }; // class pass_optimize_bswap
1048 
1049 /* Helper function for bswap_replace.  Build VIEW_CONVERT_EXPR from
1050    VAL to TYPE.  If VAL has different type size, emit a NOP_EXPR cast
1051    first.  */
1052 
1053 static tree
bswap_view_convert(gimple_stmt_iterator * gsi,tree type,tree val,bool before)1054 bswap_view_convert (gimple_stmt_iterator *gsi, tree type, tree val,
1055                         bool before)
1056 {
1057   gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (val))
1058                 || POINTER_TYPE_P (TREE_TYPE (val)));
1059   if (TYPE_SIZE (type) != TYPE_SIZE (TREE_TYPE (val)))
1060     {
1061       HOST_WIDE_INT prec = TREE_INT_CST_LOW (TYPE_SIZE (type));
1062       if (POINTER_TYPE_P (TREE_TYPE (val)))
1063           {
1064             gimple *g
1065               = gimple_build_assign (make_ssa_name (pointer_sized_int_node),
1066                                            NOP_EXPR, val);
1067             if (before)
1068               gsi_insert_before (gsi, g, GSI_SAME_STMT);
1069             else
1070               gsi_insert_after (gsi, g, GSI_NEW_STMT);
1071             val = gimple_assign_lhs (g);
1072           }
1073       tree itype = build_nonstandard_integer_type (prec, 1);
1074       gimple *g = gimple_build_assign (make_ssa_name (itype), NOP_EXPR, val);
1075       if (before)
1076           gsi_insert_before (gsi, g, GSI_SAME_STMT);
1077       else
1078           gsi_insert_after (gsi, g, GSI_NEW_STMT);
1079       val = gimple_assign_lhs (g);
1080     }
1081   return build1 (VIEW_CONVERT_EXPR, type, val);
1082 }
1083 
1084 /* Perform the bswap optimization: replace the expression computed in the rhs
1085    of gsi_stmt (GSI) (or if NULL add instead of replace) by an equivalent
1086    bswap, load or load + bswap expression.
1087    Which of these alternatives replace the rhs is given by N->base_addr (non
1088    null if a load is needed) and BSWAP.  The type, VUSE and set-alias of the
1089    load to perform are also given in N while the builtin bswap invoke is given
1090    in FNDEL.  Finally, if a load is involved, INS_STMT refers to one of the
1091    load statements involved to construct the rhs in gsi_stmt (GSI) and
1092    N->range gives the size of the rhs expression for maintaining some
1093    statistics.
1094 
1095    Note that if the replacement involve a load and if gsi_stmt (GSI) is
1096    non-NULL, that stmt is moved just after INS_STMT to do the load with the
1097    same VUSE which can lead to gsi_stmt (GSI) changing of basic block.  */
1098 
1099 tree
bswap_replace(gimple_stmt_iterator gsi,gimple * ins_stmt,tree fndecl,tree bswap_type,tree load_type,struct symbolic_number * n,bool bswap,uint64_t mask)1100 bswap_replace (gimple_stmt_iterator gsi, gimple *ins_stmt, tree fndecl,
1101                  tree bswap_type, tree load_type, struct symbolic_number *n,
1102                  bool bswap, uint64_t mask)
1103 {
1104   tree src, tmp, tgt = NULL_TREE;
1105   gimple *bswap_stmt, *mask_stmt = NULL;
1106   tree_code conv_code = NOP_EXPR;
1107 
1108   gimple *cur_stmt = gsi_stmt (gsi);
1109   src = n->src;
1110   if (cur_stmt)
1111     {
1112       tgt = gimple_assign_lhs (cur_stmt);
1113       if (gimple_assign_rhs_code (cur_stmt) == CONSTRUCTOR
1114             && tgt
1115             && VECTOR_TYPE_P (TREE_TYPE (tgt)))
1116           conv_code = VIEW_CONVERT_EXPR;
1117     }
1118 
1119   /* Need to load the value from memory first.  */
1120   if (n->base_addr)
1121     {
1122       gimple_stmt_iterator gsi_ins = gsi;
1123       if (ins_stmt)
1124           gsi_ins = gsi_for_stmt (ins_stmt);
1125       tree addr_expr, addr_tmp, val_expr, val_tmp;
1126       tree load_offset_ptr, aligned_load_type;
1127       gimple *load_stmt;
1128       unsigned align = get_object_alignment (src);
1129       poly_int64 load_offset = 0;
1130 
1131       if (cur_stmt)
1132           {
1133             basic_block ins_bb = gimple_bb (ins_stmt);
1134             basic_block cur_bb = gimple_bb (cur_stmt);
1135             if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb))
1136               return NULL_TREE;
1137 
1138             /* Move cur_stmt just before one of the load of the original
1139                to ensure it has the same VUSE.  See PR61517 for what could
1140                go wrong.  */
1141             if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt))
1142               reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt));
1143             gsi_move_before (&gsi, &gsi_ins);
1144             gsi = gsi_for_stmt (cur_stmt);
1145           }
1146       else
1147           gsi = gsi_ins;
1148 
1149       /* Compute address to load from and cast according to the size
1150            of the load.  */
1151       addr_expr = build_fold_addr_expr (src);
1152       if (is_gimple_mem_ref_addr (addr_expr))
1153           addr_tmp = unshare_expr (addr_expr);
1154       else
1155           {
1156             addr_tmp = unshare_expr (n->base_addr);
1157             if (!is_gimple_mem_ref_addr (addr_tmp))
1158               addr_tmp = force_gimple_operand_gsi_1 (&gsi, addr_tmp,
1159                                                                is_gimple_mem_ref_addr,
1160                                                                NULL_TREE, true,
1161                                                                GSI_SAME_STMT);
1162             load_offset = n->bytepos;
1163             if (n->offset)
1164               {
1165                 tree off
1166                     = force_gimple_operand_gsi (&gsi, unshare_expr (n->offset),
1167                                                       true, NULL_TREE, true,
1168                                                       GSI_SAME_STMT);
1169                 gimple *stmt
1170                     = gimple_build_assign (make_ssa_name (TREE_TYPE (addr_tmp)),
1171                                                POINTER_PLUS_EXPR, addr_tmp, off);
1172                 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1173                 addr_tmp = gimple_assign_lhs (stmt);
1174               }
1175           }
1176 
1177       /* Perform the load.  */
1178       aligned_load_type = load_type;
1179       if (align < TYPE_ALIGN (load_type))
1180           aligned_load_type = build_aligned_type (load_type, align);
1181       load_offset_ptr = build_int_cst (n->alias_set, load_offset);
1182       val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
1183                                     load_offset_ptr);
1184 
1185       if (!bswap)
1186           {
1187             if (n->range == 16)
1188               nop_stats.found_16bit++;
1189             else if (n->range == 32)
1190               nop_stats.found_32bit++;
1191             else
1192               {
1193                 gcc_assert (n->range == 64);
1194                 nop_stats.found_64bit++;
1195               }
1196 
1197             /* Convert the result of load if necessary.  */
1198             if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), load_type))
1199               {
1200                 val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
1201                                                       "load_dst");
1202                 load_stmt = gimple_build_assign (val_tmp, val_expr);
1203                 gimple_set_vuse (load_stmt, n->vuse);
1204                 gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1205                 if (conv_code == VIEW_CONVERT_EXPR)
1206                     val_tmp = bswap_view_convert (&gsi, TREE_TYPE (tgt), val_tmp,
1207                                                         true);
1208                 gimple_assign_set_rhs_with_ops (&gsi, conv_code, val_tmp);
1209                 update_stmt (cur_stmt);
1210               }
1211             else if (cur_stmt)
1212               {
1213                 gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr);
1214                 gimple_set_vuse (cur_stmt, n->vuse);
1215                 update_stmt (cur_stmt);
1216               }
1217             else
1218               {
1219                 tgt = make_ssa_name (load_type);
1220                 cur_stmt = gimple_build_assign (tgt, MEM_REF, val_expr);
1221                 gimple_set_vuse (cur_stmt, n->vuse);
1222                 gsi_insert_before (&gsi, cur_stmt, GSI_SAME_STMT);
1223               }
1224 
1225             if (dump_file)
1226               {
1227                 fprintf (dump_file,
1228                            "%d bit load in target endianness found at: ",
1229                            (int) n->range);
1230                 print_gimple_stmt (dump_file, cur_stmt, 0);
1231               }
1232             return tgt;
1233           }
1234       else
1235           {
1236             val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst");
1237             load_stmt = gimple_build_assign (val_tmp, val_expr);
1238             gimple_set_vuse (load_stmt, n->vuse);
1239             gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1240           }
1241       src = val_tmp;
1242     }
1243   else if (!bswap)
1244     {
1245       gimple *g = NULL;
1246       if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src)))
1247           {
1248             if (!is_gimple_val (src))
1249               return NULL_TREE;
1250             if (conv_code == VIEW_CONVERT_EXPR)
1251               src = bswap_view_convert (&gsi, TREE_TYPE (tgt), src, true);
1252             g = gimple_build_assign (tgt, conv_code, src);
1253           }
1254       else if (cur_stmt)
1255           g = gimple_build_assign (tgt, src);
1256       else
1257           tgt = src;
1258       if (n->range == 16)
1259           nop_stats.found_16bit++;
1260       else if (n->range == 32)
1261           nop_stats.found_32bit++;
1262       else
1263           {
1264             gcc_assert (n->range == 64);
1265             nop_stats.found_64bit++;
1266           }
1267       if (dump_file)
1268           {
1269             fprintf (dump_file,
1270                        "%d bit reshuffle in target endianness found at: ",
1271                        (int) n->range);
1272             if (cur_stmt)
1273               print_gimple_stmt (dump_file, cur_stmt, 0);
1274             else
1275               {
1276                 print_generic_expr (dump_file, tgt, TDF_NONE);
1277                 fprintf (dump_file, "\n");
1278               }
1279           }
1280       if (cur_stmt)
1281           gsi_replace (&gsi, g, true);
1282       return tgt;
1283     }
1284   else if (TREE_CODE (src) == BIT_FIELD_REF)
1285     src = TREE_OPERAND (src, 0);
1286 
1287   if (n->range == 16)
1288     bswap_stats.found_16bit++;
1289   else if (n->range == 32)
1290     bswap_stats.found_32bit++;
1291   else
1292     {
1293       gcc_assert (n->range == 64);
1294       bswap_stats.found_64bit++;
1295     }
1296 
1297   tmp = src;
1298 
1299   /* Convert the src expression if necessary.  */
1300   if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
1301     {
1302       gimple *convert_stmt;
1303 
1304       tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
1305       convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src);
1306       gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
1307     }
1308 
1309   /* Canonical form for 16 bit bswap is a rotate expression.  Only 16bit values
1310      are considered as rotation of 2N bit values by N bits is generally not
1311      equivalent to a bswap.  Consider for instance 0x01020304 r>> 16 which
1312      gives 0x03040102 while a bswap for that value is 0x04030201.  */
1313   if (bswap && n->range == 16)
1314     {
1315       tree count = build_int_cst (NULL, BITS_PER_UNIT);
1316       src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count);
1317       bswap_stmt = gimple_build_assign (NULL, src);
1318     }
1319   else
1320     bswap_stmt = gimple_build_call (fndecl, 1, tmp);
1321 
1322   if (tgt == NULL_TREE)
1323     tgt = make_ssa_name (bswap_type);
1324   tmp = tgt;
1325 
1326   if (mask != ~(uint64_t) 0)
1327     {
1328       tree m = build_int_cst (bswap_type, mask);
1329       tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
1330       gimple_set_lhs (bswap_stmt, tmp);
1331       mask_stmt = gimple_build_assign (tgt, BIT_AND_EXPR, tmp, m);
1332       tmp = tgt;
1333     }
1334 
1335   /* Convert the result if necessary.  */
1336   if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
1337     {
1338       tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
1339       tree atmp = tmp;
1340       gimple_stmt_iterator gsi2 = gsi;
1341       if (conv_code == VIEW_CONVERT_EXPR)
1342           atmp = bswap_view_convert (&gsi2, TREE_TYPE (tgt), tmp, false);
1343       gimple *convert_stmt = gimple_build_assign (tgt, conv_code, atmp);
1344       gsi_insert_after (&gsi2, convert_stmt, GSI_SAME_STMT);
1345     }
1346 
1347   gimple_set_lhs (mask_stmt ? mask_stmt : bswap_stmt, tmp);
1348 
1349   if (dump_file)
1350     {
1351       fprintf (dump_file, "%d bit bswap implementation found at: ",
1352                  (int) n->range);
1353       if (cur_stmt)
1354           print_gimple_stmt (dump_file, cur_stmt, 0);
1355       else
1356           {
1357             print_generic_expr (dump_file, tgt, TDF_NONE);
1358             fprintf (dump_file, "\n");
1359           }
1360     }
1361 
1362   if (cur_stmt)
1363     {
1364       if (mask_stmt)
1365           gsi_insert_after (&gsi, mask_stmt, GSI_SAME_STMT);
1366       gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
1367       gsi_remove (&gsi, true);
1368     }
1369   else
1370     {
1371       gsi_insert_before (&gsi, bswap_stmt, GSI_SAME_STMT);
1372       if (mask_stmt)
1373           gsi_insert_before (&gsi, mask_stmt, GSI_SAME_STMT);
1374     }
1375   return tgt;
1376 }
1377 
1378 /* Try to optimize an assignment CUR_STMT with CONSTRUCTOR on the rhs
1379    using bswap optimizations.  CDI_DOMINATORS need to be
1380    computed on entry.  Return true if it has been optimized and
1381    TODO_update_ssa is needed.  */
1382 
1383 static bool
maybe_optimize_vector_constructor(gimple * cur_stmt)1384 maybe_optimize_vector_constructor (gimple *cur_stmt)
1385 {
1386   tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
1387   struct symbolic_number n;
1388   bool bswap;
1389 
1390   gcc_assert (is_gimple_assign (cur_stmt)
1391                 && gimple_assign_rhs_code (cur_stmt) == CONSTRUCTOR);
1392 
1393   tree rhs = gimple_assign_rhs1 (cur_stmt);
1394   if (!VECTOR_TYPE_P (TREE_TYPE (rhs))
1395       || !INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs)))
1396       || gimple_assign_lhs (cur_stmt) == NULL_TREE)
1397     return false;
1398 
1399   HOST_WIDE_INT sz = int_size_in_bytes (TREE_TYPE (rhs)) * BITS_PER_UNIT;
1400   switch (sz)
1401     {
1402     case 16:
1403       load_type = bswap_type = uint16_type_node;
1404       break;
1405     case 32:
1406       if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1407             && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
1408           {
1409             load_type = uint32_type_node;
1410             fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1411             bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1412           }
1413       else
1414           return false;
1415       break;
1416     case 64:
1417       if (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
1418             && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
1419                 || (word_mode == SImode
1420                       && builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1421                       && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)))
1422           {
1423             load_type = uint64_type_node;
1424             fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1425             bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1426           }
1427       else
1428           return false;
1429       break;
1430     default:
1431       return false;
1432     }
1433 
1434   bool cast64_to_32;
1435   uint64_t mask;
1436   gimple *ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap,
1437                                                   &cast64_to_32, &mask);
1438   if (!ins_stmt
1439       || n.range != (unsigned HOST_WIDE_INT) sz
1440       || cast64_to_32
1441       || mask != ~(uint64_t) 0)
1442     return false;
1443 
1444   if (bswap && !fndecl && n.range != 16)
1445     return false;
1446 
1447   memset (&nop_stats, 0, sizeof (nop_stats));
1448   memset (&bswap_stats, 0, sizeof (bswap_stats));
1449   return bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
1450                               bswap_type, load_type, &n, bswap, mask) != NULL_TREE;
1451 }
1452 
1453 /* Find manual byte swap implementations as well as load in a given
1454    endianness. Byte swaps are turned into a bswap builtin invokation
1455    while endian loads are converted to bswap builtin invokation or
1456    simple load according to the target endianness.  */
1457 
1458 unsigned int
execute(function * fun)1459 pass_optimize_bswap::execute (function *fun)
1460 {
1461   basic_block bb;
1462   bool bswap32_p, bswap64_p;
1463   bool changed = false;
1464   tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
1465 
1466   bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1467                  && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
1468   bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
1469                  && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
1470                        || (bswap32_p && word_mode == SImode)));
1471 
1472   /* Determine the argument type of the builtins.  The code later on
1473      assumes that the return and argument type are the same.  */
1474   if (bswap32_p)
1475     {
1476       tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1477       bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1478     }
1479 
1480   if (bswap64_p)
1481     {
1482       tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1483       bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1484     }
1485 
1486   memset (&nop_stats, 0, sizeof (nop_stats));
1487   memset (&bswap_stats, 0, sizeof (bswap_stats));
1488   calculate_dominance_info (CDI_DOMINATORS);
1489 
1490   FOR_EACH_BB_FN (bb, fun)
1491     {
1492       gimple_stmt_iterator gsi;
1493 
1494       /* We do a reverse scan for bswap patterns to make sure we get the
1495            widest match. As bswap pattern matching doesn't handle previously
1496            inserted smaller bswap replacements as sub-patterns, the wider
1497            variant wouldn't be detected.  */
1498       for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
1499           {
1500             gimple *ins_stmt, *cur_stmt = gsi_stmt (gsi);
1501             tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
1502             enum tree_code code;
1503             struct symbolic_number n;
1504             bool bswap, cast64_to_32;
1505             uint64_t mask;
1506 
1507             /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
1508                might be moved to a different basic block by bswap_replace and gsi
1509                must not points to it if that's the case.  Moving the gsi_prev
1510                there make sure that gsi points to the statement previous to
1511                cur_stmt while still making sure that all statements are
1512                considered in this basic block.  */
1513             gsi_prev (&gsi);
1514 
1515             if (!is_gimple_assign (cur_stmt))
1516               continue;
1517 
1518             code = gimple_assign_rhs_code (cur_stmt);
1519             switch (code)
1520               {
1521               case LROTATE_EXPR:
1522               case RROTATE_EXPR:
1523                 if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt))
1524                       || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt))
1525                          % BITS_PER_UNIT)
1526                     continue;
1527                 /* Fall through.  */
1528               case BIT_IOR_EXPR:
1529               case BIT_XOR_EXPR:
1530               case PLUS_EXPR:
1531                 break;
1532               case CONSTRUCTOR:
1533                 {
1534                     tree rhs = gimple_assign_rhs1 (cur_stmt);
1535                     if (VECTOR_TYPE_P (TREE_TYPE (rhs))
1536                         && INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs))))
1537                       break;
1538                 }
1539                 continue;
1540               default:
1541                 continue;
1542               }
1543 
1544             ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap,
1545                                                   &cast64_to_32, &mask);
1546 
1547             if (!ins_stmt)
1548               continue;
1549 
1550             switch (n.range)
1551               {
1552               case 16:
1553                 /* Already in canonical form, nothing to do.  */
1554                 if (code == LROTATE_EXPR || code == RROTATE_EXPR)
1555                     continue;
1556                 load_type = bswap_type = uint16_type_node;
1557                 break;
1558               case 32:
1559                 load_type = uint32_type_node;
1560                 if (bswap32_p)
1561                     {
1562                       fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1563                       bswap_type = bswap32_type;
1564                     }
1565                 break;
1566               case 64:
1567                 load_type = uint64_type_node;
1568                 if (bswap64_p)
1569                     {
1570                       fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1571                       bswap_type = bswap64_type;
1572                     }
1573                 break;
1574               default:
1575                 continue;
1576               }
1577 
1578             if (bswap && !fndecl && n.range != 16)
1579               continue;
1580 
1581             if (bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
1582                                    bswap_type, load_type, &n, bswap, mask))
1583               changed = true;
1584           }
1585     }
1586 
1587   statistics_counter_event (fun, "16-bit nop implementations found",
1588                                   nop_stats.found_16bit);
1589   statistics_counter_event (fun, "32-bit nop implementations found",
1590                                   nop_stats.found_32bit);
1591   statistics_counter_event (fun, "64-bit nop implementations found",
1592                                   nop_stats.found_64bit);
1593   statistics_counter_event (fun, "16-bit bswap implementations found",
1594                                   bswap_stats.found_16bit);
1595   statistics_counter_event (fun, "32-bit bswap implementations found",
1596                                   bswap_stats.found_32bit);
1597   statistics_counter_event (fun, "64-bit bswap implementations found",
1598                                   bswap_stats.found_64bit);
1599 
1600   return (changed ? TODO_update_ssa : 0);
1601 }
1602 
1603 } // anon namespace
1604 
1605 gimple_opt_pass *
make_pass_optimize_bswap(gcc::context * ctxt)1606 make_pass_optimize_bswap (gcc::context *ctxt)
1607 {
1608   return new pass_optimize_bswap (ctxt);
1609 }
1610 
1611 namespace {
1612 
1613 /* Struct recording one operand for the store, which is either a constant,
1614    then VAL represents the constant and all the other fields are zero, or
1615    a memory load, then VAL represents the reference, BASE_ADDR is non-NULL
1616    and the other fields also reflect the memory load, or an SSA name, then
1617    VAL represents the SSA name and all the other fields are zero.  */
1618 
1619 class store_operand_info
1620 {
1621 public:
1622   tree val;
1623   tree base_addr;
1624   poly_uint64 bitsize;
1625   poly_uint64 bitpos;
1626   poly_uint64 bitregion_start;
1627   poly_uint64 bitregion_end;
1628   gimple *stmt;
1629   bool bit_not_p;
1630   store_operand_info ();
1631 };
1632 
store_operand_info()1633 store_operand_info::store_operand_info ()
1634   : val (NULL_TREE), base_addr (NULL_TREE), bitsize (0), bitpos (0),
1635     bitregion_start (0), bitregion_end (0), stmt (NULL), bit_not_p (false)
1636 {
1637 }
1638 
1639 /* Struct recording the information about a single store of an immediate
1640    to memory.  These are created in the first phase and coalesced into
1641    merged_store_group objects in the second phase.  */
1642 
1643 class store_immediate_info
1644 {
1645 public:
1646   unsigned HOST_WIDE_INT bitsize;
1647   unsigned HOST_WIDE_INT bitpos;
1648   unsigned HOST_WIDE_INT bitregion_start;
1649   /* This is one past the last bit of the bit region.  */
1650   unsigned HOST_WIDE_INT bitregion_end;
1651   gimple *stmt;
1652   unsigned int order;
1653   /* INTEGER_CST for constant store, STRING_CST for string store,
1654      MEM_REF for memory copy, BIT_*_EXPR for logical bitwise operation,
1655      BIT_INSERT_EXPR for bit insertion.
1656      LROTATE_EXPR if it can be only bswap optimized and
1657      ops are not really meaningful.
1658      NOP_EXPR if bswap optimization detected identity, ops
1659      are not meaningful.  */
1660   enum tree_code rhs_code;
1661   /* Two fields for bswap optimization purposes.  */
1662   struct symbolic_number n;
1663   gimple *ins_stmt;
1664   /* True if BIT_{AND,IOR,XOR}_EXPR result is inverted before storing.  */
1665   bool bit_not_p;
1666   /* True if ops have been swapped and thus ops[1] represents
1667      rhs1 of BIT_{AND,IOR,XOR}_EXPR and ops[0] represents rhs2.  */
1668   bool ops_swapped_p;
1669   /* The index number of the landing pad, or 0 if there is none.  */
1670   int lp_nr;
1671   /* Operands.  For BIT_*_EXPR rhs_code both operands are used, otherwise
1672      just the first one.  */
1673   store_operand_info ops[2];
1674   store_immediate_info (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
1675                               unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
1676                               gimple *, unsigned int, enum tree_code,
1677                               struct symbolic_number &, gimple *, bool, int,
1678                               const store_operand_info &,
1679                               const store_operand_info &);
1680 };
1681 
store_immediate_info(unsigned HOST_WIDE_INT bs,unsigned HOST_WIDE_INT bp,unsigned HOST_WIDE_INT brs,unsigned HOST_WIDE_INT bre,gimple * st,unsigned int ord,enum tree_code rhscode,struct symbolic_number & nr,gimple * ins_stmtp,bool bitnotp,int nr2,const store_operand_info & op0r,const store_operand_info & op1r)1682 store_immediate_info::store_immediate_info (unsigned HOST_WIDE_INT bs,
1683                                                       unsigned HOST_WIDE_INT bp,
1684                                                       unsigned HOST_WIDE_INT brs,
1685                                                       unsigned HOST_WIDE_INT bre,
1686                                                       gimple *st,
1687                                                       unsigned int ord,
1688                                                       enum tree_code rhscode,
1689                                                       struct symbolic_number &nr,
1690                                                       gimple *ins_stmtp,
1691                                                       bool bitnotp,
1692                                                       int nr2,
1693                                                       const store_operand_info &op0r,
1694                                                       const store_operand_info &op1r)
1695   : bitsize (bs), bitpos (bp), bitregion_start (brs), bitregion_end (bre),
1696     stmt (st), order (ord), rhs_code (rhscode), n (nr),
1697     ins_stmt (ins_stmtp), bit_not_p (bitnotp), ops_swapped_p (false),
1698     lp_nr (nr2), ops { op0r, op1r }
1699 {
1700 }
1701 
1702 /* Struct representing a group of stores to contiguous memory locations.
1703    These are produced by the second phase (coalescing) and consumed in the
1704    third phase that outputs the widened stores.  */
1705 
1706 class merged_store_group
1707 {
1708 public:
1709   unsigned HOST_WIDE_INT start;
1710   unsigned HOST_WIDE_INT width;
1711   unsigned HOST_WIDE_INT bitregion_start;
1712   unsigned HOST_WIDE_INT bitregion_end;
1713   /* The size of the allocated memory for val and mask.  */
1714   unsigned HOST_WIDE_INT buf_size;
1715   unsigned HOST_WIDE_INT align_base;
1716   poly_uint64 load_align_base[2];
1717 
1718   unsigned int align;
1719   unsigned int load_align[2];
1720   unsigned int first_order;
1721   unsigned int last_order;
1722   bool bit_insertion;
1723   bool string_concatenation;
1724   bool only_constants;
1725   bool consecutive;
1726   unsigned int first_nonmergeable_order;
1727   int lp_nr;
1728 
1729   auto_vec<store_immediate_info *> stores;
1730   /* We record the first and last original statements in the sequence because
1731      we'll need their vuse/vdef and replacement position.  It's easier to keep
1732      track of them separately as 'stores' is reordered by apply_stores.  */
1733   gimple *last_stmt;
1734   gimple *first_stmt;
1735   unsigned char *val;
1736   unsigned char *mask;
1737 
1738   merged_store_group (store_immediate_info *);
1739   ~merged_store_group ();
1740   bool can_be_merged_into (store_immediate_info *);
1741   void merge_into (store_immediate_info *);
1742   void merge_overlapping (store_immediate_info *);
1743   bool apply_stores ();
1744 private:
1745   void do_merge (store_immediate_info *);
1746 };
1747 
1748 /* Debug helper.  Dump LEN elements of byte array PTR to FD in hex.  */
1749 
1750 static void
dump_char_array(FILE * fd,unsigned char * ptr,unsigned int len)1751 dump_char_array (FILE *fd, unsigned char *ptr, unsigned int len)
1752 {
1753   if (!fd)
1754     return;
1755 
1756   for (unsigned int i = 0; i < len; i++)
1757     fprintf (fd, "%02x ", ptr[i]);
1758   fprintf (fd, "\n");
1759 }
1760 
1761 /* Clear out LEN bits starting from bit START in the byte array
1762    PTR.  This clears the bits to the *right* from START.
1763    START must be within [0, BITS_PER_UNIT) and counts starting from
1764    the least significant bit.  */
1765 
1766 static void
clear_bit_region_be(unsigned char * ptr,unsigned int start,unsigned int len)1767 clear_bit_region_be (unsigned char *ptr, unsigned int start,
1768                          unsigned int len)
1769 {
1770   if (len == 0)
1771     return;
1772   /* Clear len bits to the right of start.  */
1773   else if (len <= start + 1)
1774     {
1775       unsigned char mask = (~(~0U << len));
1776       mask = mask << (start + 1U - len);
1777       ptr[0] &= ~mask;
1778     }
1779   else if (start != BITS_PER_UNIT - 1)
1780     {
1781       clear_bit_region_be (ptr, start, (start % BITS_PER_UNIT) + 1);
1782       clear_bit_region_be (ptr + 1, BITS_PER_UNIT - 1,
1783                                  len - (start % BITS_PER_UNIT) - 1);
1784     }
1785   else if (start == BITS_PER_UNIT - 1
1786              && len > BITS_PER_UNIT)
1787     {
1788       unsigned int nbytes = len / BITS_PER_UNIT;
1789       memset (ptr, 0, nbytes);
1790       if (len % BITS_PER_UNIT != 0)
1791           clear_bit_region_be (ptr + nbytes, BITS_PER_UNIT - 1,
1792                                    len % BITS_PER_UNIT);
1793     }
1794   else
1795     gcc_unreachable ();
1796 }
1797 
1798 /* In the byte array PTR clear the bit region starting at bit
1799    START and is LEN bits wide.
1800    For regions spanning multiple bytes do this recursively until we reach
1801    zero LEN or a region contained within a single byte.  */
1802 
1803 static void
clear_bit_region(unsigned char * ptr,unsigned int start,unsigned int len)1804 clear_bit_region (unsigned char *ptr, unsigned int start,
1805                       unsigned int len)
1806 {
1807   /* Degenerate base case.  */
1808   if (len == 0)
1809     return;
1810   else if (start >= BITS_PER_UNIT)
1811     clear_bit_region (ptr + 1, start - BITS_PER_UNIT, len);
1812   /* Second base case.  */
1813   else if ((start + len) <= BITS_PER_UNIT)
1814     {
1815       unsigned char mask = (~0U) << (unsigned char) (BITS_PER_UNIT - len);
1816       mask >>= BITS_PER_UNIT - (start + len);
1817 
1818       ptr[0] &= ~mask;
1819 
1820       return;
1821     }
1822   /* Clear most significant bits in a byte and proceed with the next byte.  */
1823   else if (start != 0)
1824     {
1825       clear_bit_region (ptr, start, BITS_PER_UNIT - start);
1826       clear_bit_region (ptr + 1, 0, len - (BITS_PER_UNIT - start));
1827     }
1828   /* Whole bytes need to be cleared.  */
1829   else if (start == 0 && len > BITS_PER_UNIT)
1830     {
1831       unsigned int nbytes = len / BITS_PER_UNIT;
1832       /* We could recurse on each byte but we clear whole bytes, so a simple
1833            memset will do.  */
1834       memset (ptr, '\0', nbytes);
1835       /* Clear the remaining sub-byte region if there is one.  */
1836       if (len % BITS_PER_UNIT != 0)
1837           clear_bit_region (ptr + nbytes, 0, len % BITS_PER_UNIT);
1838     }
1839   else
1840     gcc_unreachable ();
1841 }
1842 
1843 /* Write BITLEN bits of EXPR to the byte array PTR at
1844    bit position BITPOS.  PTR should contain TOTAL_BYTES elements.
1845    Return true if the operation succeeded.  */
1846 
1847 static bool
encode_tree_to_bitpos(tree expr,unsigned char * ptr,int bitlen,int bitpos,unsigned int total_bytes)1848 encode_tree_to_bitpos (tree expr, unsigned char *ptr, int bitlen, int bitpos,
1849                            unsigned int total_bytes)
1850 {
1851   unsigned int first_byte = bitpos / BITS_PER_UNIT;
1852   bool sub_byte_op_p = ((bitlen % BITS_PER_UNIT)
1853                               || (bitpos % BITS_PER_UNIT)
1854                               || !int_mode_for_size (bitlen, 0).exists ());
1855   bool empty_ctor_p
1856     = (TREE_CODE (expr) == CONSTRUCTOR
1857        && CONSTRUCTOR_NELTS (expr) == 0
1858        && TYPE_SIZE_UNIT (TREE_TYPE (expr))
1859                            && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (expr))));
1860 
1861   if (!sub_byte_op_p)
1862     {
1863       if (first_byte >= total_bytes)
1864           return false;
1865       total_bytes -= first_byte;
1866       if (empty_ctor_p)
1867           {
1868             unsigned HOST_WIDE_INT rhs_bytes
1869               = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
1870             if (rhs_bytes > total_bytes)
1871               return false;
1872             memset (ptr + first_byte, '\0', rhs_bytes);
1873             return true;
1874           }
1875       return native_encode_expr (expr, ptr + first_byte, total_bytes) != 0;
1876     }
1877 
1878   /* LITTLE-ENDIAN
1879      We are writing a non byte-sized quantity or at a position that is not
1880      at a byte boundary.
1881      |--------|--------|--------| ptr + first_byte
1882            ^              ^
1883            xxx xxxxxxxx xxx< bp>
1884            |______EXPR____|
1885 
1886      First native_encode_expr EXPR into a temporary buffer and shift each
1887      byte in the buffer by 'bp' (carrying the bits over as necessary).
1888      |00000000|00xxxxxx|xxxxxxxx| << bp = |000xxxxx|xxxxxxxx|xxx00000|
1889                                               <------bitlen---->< bp>
1890     Then we clear the destination bits:
1891     |---00000|00000000|000-----| ptr + first_byte
1892         <-------bitlen--->< bp>
1893 
1894     Finally we ORR the bytes of the shifted EXPR into the cleared region:
1895     |---xxxxx||xxxxxxxx||xxx-----| ptr + first_byte.
1896 
1897    BIG-ENDIAN
1898    We are writing a non byte-sized quantity or at a position that is not
1899    at a byte boundary.
1900      ptr + first_byte |--------|--------|--------|
1901                             ^              ^
1902                        <bp >xxx xxxxxxxx xxx
1903                             |_____EXPR_____|
1904 
1905      First native_encode_expr EXPR into a temporary buffer and shift each
1906      byte in the buffer to the right by (carrying the bits over as necessary).
1907      We shift by as much as needed to align the most significant bit of EXPR
1908      with bitpos:
1909      |00xxxxxx|xxxxxxxx| >> 3 = |00000xxx|xxxxxxxx|xxxxx000|
1910         <---bitlen---->          <bp ><-----bitlen----->
1911     Then we clear the destination bits:
1912     ptr + first_byte |-----000||00000000||00000---|
1913                       <bp ><-------bitlen----->
1914 
1915     Finally we ORR the bytes of the shifted EXPR into the cleared region:
1916     ptr + first_byte |---xxxxx||xxxxxxxx||xxx-----|.
1917     The awkwardness comes from the fact that bitpos is counted from the
1918     most significant bit of a byte.  */
1919 
1920   /* We must be dealing with fixed-size data at this point, since the
1921      total size is also fixed.  */
1922   unsigned int byte_size;
1923   if (empty_ctor_p)
1924     {
1925       unsigned HOST_WIDE_INT rhs_bytes
1926           = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
1927       if (rhs_bytes > total_bytes)
1928           return false;
1929       byte_size = rhs_bytes;
1930     }
1931   else
1932     {
1933       fixed_size_mode mode
1934           = as_a <fixed_size_mode> (TYPE_MODE (TREE_TYPE (expr)));
1935       byte_size
1936           = mode == BLKmode
1937           ? tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)))
1938           : GET_MODE_SIZE (mode);
1939     }
1940   /* Allocate an extra byte so that we have space to shift into.  */
1941   byte_size++;
1942   unsigned char *tmpbuf = XALLOCAVEC (unsigned char, byte_size);
1943   memset (tmpbuf, '\0', byte_size);
1944   /* The store detection code should only have allowed constants that are
1945      accepted by native_encode_expr or empty ctors.  */
1946   if (!empty_ctor_p
1947       && native_encode_expr (expr, tmpbuf, byte_size - 1) == 0)
1948     gcc_unreachable ();
1949 
1950   /* The native_encode_expr machinery uses TYPE_MODE to determine how many
1951      bytes to write.  This means it can write more than
1952      ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT bytes (for example
1953      write 8 bytes for a bitlen of 40).  Skip the bytes that are not within
1954      bitlen and zero out the bits that are not relevant as well (that may
1955      contain a sign bit due to sign-extension).  */
1956   unsigned int padding
1957     = byte_size - ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT - 1;
1958   /* On big-endian the padding is at the 'front' so just skip the initial
1959      bytes.  */
1960   if (BYTES_BIG_ENDIAN)
1961     tmpbuf += padding;
1962 
1963   byte_size -= padding;
1964 
1965   if (bitlen % BITS_PER_UNIT != 0)
1966     {
1967       if (BYTES_BIG_ENDIAN)
1968           clear_bit_region_be (tmpbuf, BITS_PER_UNIT - 1,
1969                                    BITS_PER_UNIT - (bitlen % BITS_PER_UNIT));
1970       else
1971           clear_bit_region (tmpbuf, bitlen,
1972                                 byte_size * BITS_PER_UNIT - bitlen);
1973     }
1974   /* Left shifting relies on the last byte being clear if bitlen is
1975      a multiple of BITS_PER_UNIT, which might not be clear if
1976      there are padding bytes.  */
1977   else if (!BYTES_BIG_ENDIAN)
1978     tmpbuf[byte_size - 1] = '\0';
1979 
1980   /* Clear the bit region in PTR where the bits from TMPBUF will be
1981      inserted into.  */
1982   if (BYTES_BIG_ENDIAN)
1983     clear_bit_region_be (ptr + first_byte,
1984                                BITS_PER_UNIT - 1 - (bitpos % BITS_PER_UNIT), bitlen);
1985   else
1986     clear_bit_region (ptr + first_byte, bitpos % BITS_PER_UNIT, bitlen);
1987 
1988   int shift_amnt;
1989   int bitlen_mod = bitlen % BITS_PER_UNIT;
1990   int bitpos_mod = bitpos % BITS_PER_UNIT;
1991 
1992   bool skip_byte = false;
1993   if (BYTES_BIG_ENDIAN)
1994     {
1995       /* BITPOS and BITLEN are exactly aligned and no shifting
1996            is necessary.  */
1997       if (bitpos_mod + bitlen_mod == BITS_PER_UNIT
1998             || (bitpos_mod == 0 && bitlen_mod == 0))
1999           shift_amnt = 0;
2000       /* |. . . . . . . .|
2001             <bp >   <blen >.
2002            We always shift right for BYTES_BIG_ENDIAN so shift the beginning
2003            of the value until it aligns with 'bp' in the next byte over.  */
2004       else if (bitpos_mod + bitlen_mod < BITS_PER_UNIT)
2005           {
2006             shift_amnt = bitlen_mod + bitpos_mod;
2007             skip_byte = bitlen_mod != 0;
2008           }
2009       /* |. . . . . . . .|
2010             <----bp--->
2011               <---blen---->.
2012            Shift the value right within the same byte so it aligns with 'bp'.  */
2013       else
2014           shift_amnt = bitlen_mod + bitpos_mod - BITS_PER_UNIT;
2015     }
2016   else
2017     shift_amnt = bitpos % BITS_PER_UNIT;
2018 
2019   /* Create the shifted version of EXPR.  */
2020   if (!BYTES_BIG_ENDIAN)
2021     {
2022       shift_bytes_in_array_left (tmpbuf, byte_size, shift_amnt);
2023       if (shift_amnt == 0)
2024           byte_size--;
2025     }
2026   else
2027     {
2028       gcc_assert (BYTES_BIG_ENDIAN);
2029       shift_bytes_in_array_right (tmpbuf, byte_size, shift_amnt);
2030       /* If shifting right forced us to move into the next byte skip the now
2031            empty byte.  */
2032       if (skip_byte)
2033           {
2034             tmpbuf++;
2035             byte_size--;
2036           }
2037     }
2038 
2039   /* Insert the bits from TMPBUF.  */
2040   for (unsigned int i = 0; i < byte_size; i++)
2041     ptr[first_byte + i] |= tmpbuf[i];
2042 
2043   return true;
2044 }
2045 
2046 /* Sorting function for store_immediate_info objects.
2047    Sorts them by bitposition.  */
2048 
2049 static int
sort_by_bitpos(const void * x,const void * y)2050 sort_by_bitpos (const void *x, const void *y)
2051 {
2052   store_immediate_info *const *tmp = (store_immediate_info * const *) x;
2053   store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
2054 
2055   if ((*tmp)->bitpos < (*tmp2)->bitpos)
2056     return -1;
2057   else if ((*tmp)->bitpos > (*tmp2)->bitpos)
2058     return 1;
2059   else
2060     /* If they are the same let's use the order which is guaranteed to
2061        be different.  */
2062     return (*tmp)->order - (*tmp2)->order;
2063 }
2064 
2065 /* Sorting function for store_immediate_info objects.
2066    Sorts them by the order field.  */
2067 
2068 static int
sort_by_order(const void * x,const void * y)2069 sort_by_order (const void *x, const void *y)
2070 {
2071   store_immediate_info *const *tmp = (store_immediate_info * const *) x;
2072   store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
2073 
2074   if ((*tmp)->order < (*tmp2)->order)
2075     return -1;
2076   else if ((*tmp)->order > (*tmp2)->order)
2077     return 1;
2078 
2079   gcc_unreachable ();
2080 }
2081 
2082 /* Initialize a merged_store_group object from a store_immediate_info
2083    object.  */
2084 
merged_store_group(store_immediate_info * info)2085 merged_store_group::merged_store_group (store_immediate_info *info)
2086 {
2087   start = info->bitpos;
2088   width = info->bitsize;
2089   bitregion_start = info->bitregion_start;
2090   bitregion_end = info->bitregion_end;
2091   /* VAL has memory allocated for it in apply_stores once the group
2092      width has been finalized.  */
2093   val = NULL;
2094   mask = NULL;
2095   bit_insertion = info->rhs_code == BIT_INSERT_EXPR;
2096   string_concatenation = info->rhs_code == STRING_CST;
2097   only_constants = info->rhs_code == INTEGER_CST;
2098   consecutive = true;
2099   first_nonmergeable_order = ~0U;
2100   lp_nr = info->lp_nr;
2101   unsigned HOST_WIDE_INT align_bitpos = 0;
2102   get_object_alignment_1 (gimple_assign_lhs (info->stmt),
2103                                 &align, &align_bitpos);
2104   align_base = start - align_bitpos;
2105   for (int i = 0; i < 2; ++i)
2106     {
2107       store_operand_info &op = info->ops[i];
2108       if (op.base_addr == NULL_TREE)
2109           {
2110             load_align[i] = 0;
2111             load_align_base[i] = 0;
2112           }
2113       else
2114           {
2115             get_object_alignment_1 (op.val, &load_align[i], &align_bitpos);
2116             load_align_base[i] = op.bitpos - align_bitpos;
2117           }
2118     }
2119   stores.create (1);
2120   stores.safe_push (info);
2121   last_stmt = info->stmt;
2122   last_order = info->order;
2123   first_stmt = last_stmt;
2124   first_order = last_order;
2125   buf_size = 0;
2126 }
2127 
~merged_store_group()2128 merged_store_group::~merged_store_group ()
2129 {
2130   if (val)
2131     XDELETEVEC (val);
2132 }
2133 
2134 /* Return true if the store described by INFO can be merged into the group.  */
2135 
2136 bool
can_be_merged_into(store_immediate_info * info)2137 merged_store_group::can_be_merged_into (store_immediate_info *info)
2138 {
2139   /* Do not merge bswap patterns.  */
2140   if (info->rhs_code == LROTATE_EXPR)
2141     return false;
2142 
2143   if (info->lp_nr != lp_nr)
2144     return false;
2145 
2146   /* The canonical case.  */
2147   if (info->rhs_code == stores[0]->rhs_code)
2148     return true;
2149 
2150   /* BIT_INSERT_EXPR is compatible with INTEGER_CST if no STRING_CST.  */
2151   if (info->rhs_code == BIT_INSERT_EXPR && stores[0]->rhs_code == INTEGER_CST)
2152     return !string_concatenation;
2153 
2154   if (stores[0]->rhs_code == BIT_INSERT_EXPR && info->rhs_code == INTEGER_CST)
2155     return !string_concatenation;
2156 
2157   /* We can turn MEM_REF into BIT_INSERT_EXPR for bit-field stores, but do it
2158      only for small regions since this can generate a lot of instructions.  */
2159   if (info->rhs_code == MEM_REF
2160       && (stores[0]->rhs_code == INTEGER_CST
2161             || stores[0]->rhs_code == BIT_INSERT_EXPR)
2162       && info->bitregion_start == stores[0]->bitregion_start
2163       && info->bitregion_end == stores[0]->bitregion_end
2164       && info->bitregion_end - info->bitregion_start <= MAX_FIXED_MODE_SIZE)
2165     return !string_concatenation;
2166 
2167   if (stores[0]->rhs_code == MEM_REF
2168       && (info->rhs_code == INTEGER_CST
2169             || info->rhs_code == BIT_INSERT_EXPR)
2170       && info->bitregion_start == stores[0]->bitregion_start
2171       && info->bitregion_end == stores[0]->bitregion_end
2172       && info->bitregion_end - info->bitregion_start <= MAX_FIXED_MODE_SIZE)
2173     return !string_concatenation;
2174 
2175   /* STRING_CST is compatible with INTEGER_CST if no BIT_INSERT_EXPR.  */
2176   if (info->rhs_code == STRING_CST
2177       && stores[0]->rhs_code == INTEGER_CST
2178       && stores[0]->bitsize == CHAR_BIT)
2179     return !bit_insertion;
2180 
2181   if (stores[0]->rhs_code == STRING_CST
2182       && info->rhs_code == INTEGER_CST
2183       && info->bitsize == CHAR_BIT)
2184     return !bit_insertion;
2185 
2186   return false;
2187 }
2188 
2189 /* Helper method for merge_into and merge_overlapping to do
2190    the common part.  */
2191 
2192 void
do_merge(store_immediate_info * info)2193 merged_store_group::do_merge (store_immediate_info *info)
2194 {
2195   bitregion_start = MIN (bitregion_start, info->bitregion_start);
2196   bitregion_end = MAX (bitregion_end, info->bitregion_end);
2197 
2198   unsigned int this_align;
2199   unsigned HOST_WIDE_INT align_bitpos = 0;
2200   get_object_alignment_1 (gimple_assign_lhs (info->stmt),
2201                                 &this_align, &align_bitpos);
2202   if (this_align > align)
2203     {
2204       align = this_align;
2205       align_base = info->bitpos - align_bitpos;
2206     }
2207   for (int i = 0; i < 2; ++i)
2208     {
2209       store_operand_info &op = info->ops[i];
2210       if (!op.base_addr)
2211           continue;
2212 
2213       get_object_alignment_1 (op.val, &this_align, &align_bitpos);
2214       if (this_align > load_align[i])
2215           {
2216             load_align[i] = this_align;
2217             load_align_base[i] = op.bitpos - align_bitpos;
2218           }
2219     }
2220 
2221   gimple *stmt = info->stmt;
2222   stores.safe_push (info);
2223   if (info->order > last_order)
2224     {
2225       last_order = info->order;
2226       last_stmt = stmt;
2227     }
2228   else if (info->order < first_order)
2229     {
2230       first_order = info->order;
2231       first_stmt = stmt;
2232     }
2233 
2234   if (info->bitpos != start + width)
2235     consecutive = false;
2236 
2237   /* We need to use extraction if there is any bit-field.  */
2238   if (info->rhs_code == BIT_INSERT_EXPR)
2239     {
2240       bit_insertion = true;
2241       gcc_assert (!string_concatenation);
2242     }
2243 
2244   /* We want to use concatenation if there is any string.  */
2245   if (info->rhs_code == STRING_CST)
2246     {
2247       string_concatenation = true;
2248       gcc_assert (!bit_insertion);
2249     }
2250 
2251   /* But we cannot use it if we don't have consecutive stores.  */
2252   if (!consecutive)
2253     string_concatenation = false;
2254 
2255   if (info->rhs_code != INTEGER_CST)
2256     only_constants = false;
2257 }
2258 
2259 /* Merge a store recorded by INFO into this merged store.
2260    The store is not overlapping with the existing recorded
2261    stores.  */
2262 
2263 void
merge_into(store_immediate_info * info)2264 merged_store_group::merge_into (store_immediate_info *info)
2265 {
2266   do_merge (info);
2267 
2268   /* Make sure we're inserting in the position we think we're inserting.  */
2269   gcc_assert (info->bitpos >= start + width
2270                 && info->bitregion_start <= bitregion_end);
2271 
2272   width = info->bitpos + info->bitsize - start;
2273 }
2274 
2275 /* Merge a store described by INFO into this merged store.
2276    INFO overlaps in some way with the current store (i.e. it's not contiguous
2277    which is handled by merged_store_group::merge_into).  */
2278 
2279 void
merge_overlapping(store_immediate_info * info)2280 merged_store_group::merge_overlapping (store_immediate_info *info)
2281 {
2282   do_merge (info);
2283 
2284   /* If the store extends the size of the group, extend the width.  */
2285   if (info->bitpos + info->bitsize > start + width)
2286     width = info->bitpos + info->bitsize - start;
2287 }
2288 
2289 /* Go through all the recorded stores in this group in program order and
2290    apply their values to the VAL byte array to create the final merged
2291    value.  Return true if the operation succeeded.  */
2292 
2293 bool
apply_stores()2294 merged_store_group::apply_stores ()
2295 {
2296   store_immediate_info *info;
2297   unsigned int i;
2298 
2299   /* Make sure we have more than one store in the group, otherwise we cannot
2300      merge anything.  */
2301   if (bitregion_start % BITS_PER_UNIT != 0
2302       || bitregion_end % BITS_PER_UNIT != 0
2303       || stores.length () == 1)
2304     return false;
2305 
2306   buf_size = (bitregion_end - bitregion_start) / BITS_PER_UNIT;
2307 
2308   /* Really do string concatenation for large strings only.  */
2309   if (buf_size <= MOVE_MAX)
2310     string_concatenation = false;
2311 
2312   /* String concatenation only works for byte aligned start and end.  */
2313   if (start % BITS_PER_UNIT != 0 || width % BITS_PER_UNIT != 0)
2314     string_concatenation = false;
2315 
2316   /* Create a power-of-2-sized buffer for native_encode_expr.  */
2317   if (!string_concatenation)
2318     buf_size = 1 << ceil_log2 (buf_size);
2319 
2320   val = XNEWVEC (unsigned char, 2 * buf_size);
2321   mask = val + buf_size;
2322   memset (val, 0, buf_size);
2323   memset (mask, ~0U, buf_size);
2324 
2325   stores.qsort (sort_by_order);
2326 
2327   FOR_EACH_VEC_ELT (stores, i, info)
2328     {
2329       unsigned int pos_in_buffer = info->bitpos - bitregion_start;
2330       tree cst;
2331       if (info->ops[0].val && info->ops[0].base_addr == NULL_TREE)
2332           cst = info->ops[0].val;
2333       else if (info->ops[1].val && info->ops[1].base_addr == NULL_TREE)
2334           cst = info->ops[1].val;
2335       else
2336           cst = NULL_TREE;
2337       bool ret = true;
2338       if (cst && info->rhs_code != BIT_INSERT_EXPR)
2339           ret = encode_tree_to_bitpos (cst, val, info->bitsize, pos_in_buffer,
2340                                              buf_size);
2341       unsigned char *m = mask + (pos_in_buffer / BITS_PER_UNIT);
2342       if (BYTES_BIG_ENDIAN)
2343           clear_bit_region_be (m, (BITS_PER_UNIT - 1
2344                                          - (pos_in_buffer % BITS_PER_UNIT)),
2345                                    info->bitsize);
2346       else
2347           clear_bit_region (m, pos_in_buffer % BITS_PER_UNIT, info->bitsize);
2348       if (cst && dump_file && (dump_flags & TDF_DETAILS))
2349           {
2350             if (ret)
2351               {
2352                 fputs ("After writing ", dump_file);
2353                 print_generic_expr (dump_file, cst, TDF_NONE);
2354                 fprintf (dump_file, " of size " HOST_WIDE_INT_PRINT_DEC
2355                            " at position %d\n", info->bitsize, pos_in_buffer);
2356                 fputs ("  the merged value contains ", dump_file);
2357                 dump_char_array (dump_file, val, buf_size);
2358                 fputs ("  the merged mask contains  ", dump_file);
2359                 dump_char_array (dump_file, mask, buf_size);
2360                 if (bit_insertion)
2361                     fputs ("  bit insertion is required\n", dump_file);
2362                 if (string_concatenation)
2363                     fputs ("  string concatenation is required\n", dump_file);
2364               }
2365             else
2366               fprintf (dump_file, "Failed to merge stores\n");
2367           }
2368       if (!ret)
2369           return false;
2370     }
2371   stores.qsort (sort_by_bitpos);
2372   return true;
2373 }
2374 
2375 /* Structure describing the store chain.  */
2376 
2377 class imm_store_chain_info
2378 {
2379 public:
2380   /* Doubly-linked list that imposes an order on chain processing.
2381      PNXP (prev's next pointer) points to the head of a list, or to
2382      the next field in the previous chain in the list.
2383      See pass_store_merging::m_stores_head for more rationale.  */
2384   imm_store_chain_info *next, **pnxp;
2385   tree base_addr;
2386   auto_vec<store_immediate_info *> m_store_info;
2387   auto_vec<merged_store_group *> m_merged_store_groups;
2388 
imm_store_chain_info(imm_store_chain_info * & inspt,tree b_a)2389   imm_store_chain_info (imm_store_chain_info *&inspt, tree b_a)
2390   : next (inspt), pnxp (&inspt), base_addr (b_a)
2391   {
2392     inspt = this;
2393     if (next)
2394       {
2395           gcc_checking_assert (pnxp == next->pnxp);
2396           next->pnxp = &next;
2397       }
2398   }
~imm_store_chain_info()2399   ~imm_store_chain_info ()
2400   {
2401     *pnxp = next;
2402     if (next)
2403       {
2404           gcc_checking_assert (&next == next->pnxp);
2405           next->pnxp = pnxp;
2406       }
2407   }
2408   bool terminate_and_process_chain ();
2409   bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int,
2410                                  unsigned int);
2411   bool coalesce_immediate_stores ();
2412   bool output_merged_store (merged_store_group *);
2413   bool output_merged_stores ();
2414 };
2415 
2416 const pass_data pass_data_tree_store_merging = {
2417   GIMPLE_PASS,     /* type */
2418   "store-merging", /* name */
2419   OPTGROUP_NONE,   /* optinfo_flags */
2420   TV_GIMPLE_STORE_MERGING,     /* tv_id */
2421   PROP_ssa,         /* properties_required */
2422   0,                   /* properties_provided */
2423   0,                   /* properties_destroyed */
2424   0,                   /* todo_flags_start */
2425   TODO_update_ssa, /* todo_flags_finish */
2426 };
2427 
2428 class pass_store_merging : public gimple_opt_pass
2429 {
2430 public:
pass_store_merging(gcc::context * ctxt)2431   pass_store_merging (gcc::context *ctxt)
2432     : gimple_opt_pass (pass_data_tree_store_merging, ctxt), m_stores_head (),
2433       m_n_chains (0), m_n_stores (0)
2434   {
2435   }
2436 
2437   /* Pass not supported for PDP-endian, nor for insane hosts or
2438      target character sizes where native_{encode,interpret}_expr
2439      doesn't work properly.  */
2440   virtual bool
gate(function *)2441   gate (function *)
2442   {
2443     return flag_store_merging
2444              && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
2445              && CHAR_BIT == 8
2446              && BITS_PER_UNIT == 8;
2447   }
2448 
2449   virtual unsigned int execute (function *);
2450 
2451 private:
2452   hash_map<tree_operand_hash, class imm_store_chain_info *> m_stores;
2453 
2454   /* Form a doubly-linked stack of the elements of m_stores, so that
2455      we can iterate over them in a predictable way.  Using this order
2456      avoids extraneous differences in the compiler output just because
2457      of tree pointer variations (e.g. different chains end up in
2458      different positions of m_stores, so they are handled in different
2459      orders, so they allocate or release SSA names in different
2460      orders, and when they get reused, subsequent passes end up
2461      getting different SSA names, which may ultimately change
2462      decisions when going out of SSA).  */
2463   imm_store_chain_info *m_stores_head;
2464 
2465   /* The number of store chains currently tracked.  */
2466   unsigned m_n_chains;
2467   /* The number of stores currently tracked.  */
2468   unsigned m_n_stores;
2469 
2470   bool process_store (gimple *);
2471   bool terminate_and_process_chain (imm_store_chain_info *);
2472   bool terminate_all_aliasing_chains (imm_store_chain_info **, gimple *);
2473   bool terminate_and_process_all_chains ();
2474 }; // class pass_store_merging
2475 
2476 /* Terminate and process all recorded chains.  Return true if any changes
2477    were made.  */
2478 
2479 bool
terminate_and_process_all_chains()2480 pass_store_merging::terminate_and_process_all_chains ()
2481 {
2482   bool ret = false;
2483   while (m_stores_head)
2484     ret |= terminate_and_process_chain (m_stores_head);
2485   gcc_assert (m_stores.is_empty ());
2486   return ret;
2487 }
2488 
2489 /* Terminate all chains that are affected by the statement STMT.
2490    CHAIN_INFO is the chain we should ignore from the checks if
2491    non-NULL.  Return true if any changes were made.  */
2492 
2493 bool
terminate_all_aliasing_chains(imm_store_chain_info ** chain_info,gimple * stmt)2494 pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info
2495                                                                  **chain_info,
2496                                                                gimple *stmt)
2497 {
2498   bool ret = false;
2499 
2500   /* If the statement doesn't touch memory it can't alias.  */
2501   if (!gimple_vuse (stmt))
2502     return false;
2503 
2504   tree store_lhs = gimple_store_p (stmt) ? gimple_get_lhs (stmt) : NULL_TREE;
2505   ao_ref store_lhs_ref;
2506   ao_ref_init (&store_lhs_ref, store_lhs);
2507   for (imm_store_chain_info *next = m_stores_head, *cur = next; cur; cur = next)
2508     {
2509       next = cur->next;
2510 
2511       /* We already checked all the stores in chain_info and terminated the
2512            chain if necessary.  Skip it here.  */
2513       if (chain_info && *chain_info == cur)
2514           continue;
2515 
2516       store_immediate_info *info;
2517       unsigned int i;
2518       FOR_EACH_VEC_ELT (cur->m_store_info, i, info)
2519           {
2520             tree lhs = gimple_assign_lhs (info->stmt);
2521             ao_ref lhs_ref;
2522             ao_ref_init (&lhs_ref, lhs);
2523             if (ref_maybe_used_by_stmt_p (stmt, &lhs_ref)
2524                 || stmt_may_clobber_ref_p_1 (stmt, &lhs_ref)
2525                 || (store_lhs && refs_may_alias_p_1 (&store_lhs_ref,
2526                                                                &lhs_ref, false)))
2527               {
2528                 if (dump_file && (dump_flags & TDF_DETAILS))
2529                     {
2530                       fprintf (dump_file, "stmt causes chain termination:\n");
2531                       print_gimple_stmt (dump_file, stmt, 0);
2532                     }
2533                 ret |= terminate_and_process_chain (cur);
2534                 break;
2535               }
2536           }
2537     }
2538 
2539   return ret;
2540 }
2541 
2542 /* Helper function.  Terminate the recorded chain storing to base object
2543    BASE.  Return true if the merging and output was successful.  The m_stores
2544    entry is removed after the processing in any case.  */
2545 
2546 bool
terminate_and_process_chain(imm_store_chain_info * chain_info)2547 pass_store_merging::terminate_and_process_chain (imm_store_chain_info *chain_info)
2548 {
2549   m_n_stores -= chain_info->m_store_info.length ();
2550   m_n_chains--;
2551   bool ret = chain_info->terminate_and_process_chain ();
2552   m_stores.remove (chain_info->base_addr);
2553   delete chain_info;
2554   return ret;
2555 }
2556 
2557 /* Return true if stmts in between FIRST (inclusive) and LAST (exclusive)
2558    may clobber REF.  FIRST and LAST must have non-NULL vdef.  We want to
2559    be able to sink load of REF across stores between FIRST and LAST, up
2560    to right before LAST.  */
2561 
2562 bool
stmts_may_clobber_ref_p(gimple * first,gimple * last,tree ref)2563 stmts_may_clobber_ref_p (gimple *first, gimple *last, tree ref)
2564 {
2565   ao_ref r;
2566   ao_ref_init (&r, ref);
2567   unsigned int count = 0;
2568   tree vop = gimple_vdef (last);
2569   gimple *stmt;
2570 
2571   /* Return true conservatively if the basic blocks are different.  */
2572   if (gimple_bb (first) != gimple_bb (last))
2573     return true;
2574 
2575   do
2576     {
2577       stmt = SSA_NAME_DEF_STMT (vop);
2578       if (stmt_may_clobber_ref_p_1 (stmt, &r))
2579           return true;
2580       if (gimple_store_p (stmt)
2581             && refs_anti_dependent_p (ref, gimple_get_lhs (stmt)))
2582           return true;
2583       /* Avoid quadratic compile time by bounding the number of checks
2584            we perform.  */
2585       if (++count > MAX_STORE_ALIAS_CHECKS)
2586           return true;
2587       vop = gimple_vuse (stmt);
2588     }
2589   while (stmt != first);
2590 
2591   return false;
2592 }
2593 
2594 /* Return true if INFO->ops[IDX] is mergeable with the
2595    corresponding loads already in MERGED_STORE group.
2596    BASE_ADDR is the base address of the whole store group.  */
2597 
2598 bool
compatible_load_p(merged_store_group * merged_store,store_immediate_info * info,tree base_addr,int idx)2599 compatible_load_p (merged_store_group *merged_store,
2600                        store_immediate_info *info,
2601                        tree base_addr, int idx)
2602 {
2603   store_immediate_info *infof = merged_store->stores[0];
2604   if (!info->ops[idx].base_addr
2605       || maybe_ne (info->ops[idx].bitpos - infof->ops[idx].bitpos,
2606                        info->bitpos - infof->bitpos)
2607       || !operand_equal_p (info->ops[idx].base_addr,
2608                                  infof->ops[idx].base_addr, 0))
2609     return false;
2610 
2611   store_immediate_info *infol = merged_store->stores.last ();
2612   tree load_vuse = gimple_vuse (info->ops[idx].stmt);
2613   /* In this case all vuses should be the same, e.g.
2614      _1 = s.a; _2 = s.b; _3 = _1 | 1; t.a = _3; _4 = _2 | 2; t.b = _4;
2615      or
2616      _1 = s.a; _2 = s.b; t.a = _1; t.b = _2;
2617      and we can emit the coalesced load next to any of those loads.  */
2618   if (gimple_vuse (infof->ops[idx].stmt) == load_vuse
2619       && gimple_vuse (infol->ops[idx].stmt) == load_vuse)
2620     return true;
2621 
2622   /* Otherwise, at least for now require that the load has the same
2623      vuse as the store.  See following examples.  */
2624   if (gimple_vuse (info->stmt) != load_vuse)
2625     return false;
2626 
2627   if (gimple_vuse (infof->stmt) != gimple_vuse (infof->ops[idx].stmt)
2628       || (infof != infol
2629             && gimple_vuse (infol->stmt) != gimple_vuse (infol->ops[idx].stmt)))
2630     return false;
2631 
2632   /* If the load is from the same location as the store, already
2633      the construction of the immediate chain info guarantees no intervening
2634      stores, so no further checks are needed.  Example:
2635      _1 = s.a; _2 = _1 & -7; s.a = _2; _3 = s.b; _4 = _3 & -7; s.b = _4;  */
2636   if (known_eq (info->ops[idx].bitpos, info->bitpos)
2637       && operand_equal_p (info->ops[idx].base_addr, base_addr, 0))
2638     return true;
2639 
2640   /* Otherwise, we need to punt if any of the loads can be clobbered by any
2641      of the stores in the group, or any other stores in between those.
2642      Previous calls to compatible_load_p ensured that for all the
2643      merged_store->stores IDX loads, no stmts starting with
2644      merged_store->first_stmt and ending right before merged_store->last_stmt
2645      clobbers those loads.  */
2646   gimple *first = merged_store->first_stmt;
2647   gimple *last = merged_store->last_stmt;
2648   /* The stores are sorted by increasing store bitpos, so if info->stmt store
2649      comes before the so far first load, we'll be changing
2650      merged_store->first_stmt.  In that case we need to give up if
2651      any of the earlier processed loads clobber with the stmts in the new
2652      range.  */
2653   if (info->order < merged_store->first_order)
2654     {
2655       for (store_immediate_info *infoc : merged_store->stores)
2656           if (stmts_may_clobber_ref_p (info->stmt, first, infoc->ops[idx].val))
2657             return false;
2658       first = info->stmt;
2659     }
2660   /* Similarly, we could change merged_store->last_stmt, so ensure
2661      in that case no stmts in the new range clobber any of the earlier
2662      processed loads.  */
2663   else if (info->order > merged_store->last_order)
2664     {
2665       for (store_immediate_info *infoc : merged_store->stores)
2666           if (stmts_may_clobber_ref_p (last, info->stmt, infoc->ops[idx].val))
2667             return false;
2668       last = info->stmt;
2669     }
2670   /* And finally, we'd be adding a new load to the set, ensure it isn't
2671      clobbered in the new range.  */
2672   if (stmts_may_clobber_ref_p (first, last, info->ops[idx].val))
2673     return false;
2674 
2675   /* Otherwise, we are looking for:
2676      _1 = s.a; _2 = _1 ^ 15; t.a = _2; _3 = s.b; _4 = _3 ^ 15; t.b = _4;
2677      or
2678      _1 = s.a; t.a = _1; _2 = s.b; t.b = _2;  */
2679   return true;
2680 }
2681 
2682 /* Add all refs loaded to compute VAL to REFS vector.  */
2683 
2684 void
gather_bswap_load_refs(vec<tree> * refs,tree val)2685 gather_bswap_load_refs (vec<tree> *refs, tree val)
2686 {
2687   if (TREE_CODE (val) != SSA_NAME)
2688     return;
2689 
2690   gimple *stmt = SSA_NAME_DEF_STMT (val);
2691   if (!is_gimple_assign (stmt))
2692     return;
2693 
2694   if (gimple_assign_load_p (stmt))
2695     {
2696       refs->safe_push (gimple_assign_rhs1 (stmt));
2697       return;
2698     }
2699 
2700   switch (gimple_assign_rhs_class (stmt))
2701     {
2702     case GIMPLE_BINARY_RHS:
2703       gather_bswap_load_refs (refs, gimple_assign_rhs2 (stmt));
2704       /* FALLTHRU */
2705     case GIMPLE_UNARY_RHS:
2706       gather_bswap_load_refs (refs, gimple_assign_rhs1 (stmt));
2707       break;
2708     default:
2709       gcc_unreachable ();
2710     }
2711 }
2712 
2713 /* Check if there are any stores in M_STORE_INFO after index I
2714    (where M_STORE_INFO must be sorted by sort_by_bitpos) that overlap
2715    a potential group ending with END that have their order
2716    smaller than LAST_ORDER.  ALL_INTEGER_CST_P is true if
2717    all the stores already merged and the one under consideration
2718    have rhs_code of INTEGER_CST.  Return true if there are no such stores.
2719    Consider:
2720      MEM[(long long int *)p_28] = 0;
2721      MEM[(long long int *)p_28 + 8B] = 0;
2722      MEM[(long long int *)p_28 + 16B] = 0;
2723      MEM[(long long int *)p_28 + 24B] = 0;
2724      _129 = (int) _130;
2725      MEM[(int *)p_28 + 8B] = _129;
2726      MEM[(int *)p_28].a = -1;
2727    We already have
2728      MEM[(long long int *)p_28] = 0;
2729      MEM[(int *)p_28].a = -1;
2730    stmts in the current group and need to consider if it is safe to
2731    add MEM[(long long int *)p_28 + 8B] = 0; store into the same group.
2732    There is an overlap between that store and the MEM[(int *)p_28 + 8B] = _129;
2733    store though, so if we add the MEM[(long long int *)p_28 + 8B] = 0;
2734    into the group and merging of those 3 stores is successful, merged
2735    stmts will be emitted at the latest store from that group, i.e.
2736    LAST_ORDER, which is the MEM[(int *)p_28].a = -1; store.
2737    The MEM[(int *)p_28 + 8B] = _129; store that originally follows
2738    the MEM[(long long int *)p_28 + 8B] = 0; would now be before it,
2739    so we need to refuse merging MEM[(long long int *)p_28 + 8B] = 0;
2740    into the group.  That way it will be its own store group and will
2741    not be touched.  If ALL_INTEGER_CST_P and there are overlapping
2742    INTEGER_CST stores, those are mergeable using merge_overlapping,
2743    so don't return false for those.
2744 
2745    Similarly, check stores from FIRST_EARLIER (inclusive) to END_EARLIER
2746    (exclusive), whether they don't overlap the bitrange START to END
2747    and have order in between FIRST_ORDER and LAST_ORDER.  This is to
2748    prevent merging in cases like:
2749      MEM <char[12]> [&b + 8B] = {};
2750      MEM[(short *) &b] = 5;
2751      _5 = *x_4(D);
2752      MEM <long long unsigned int> [&b + 2B] = _5;
2753      MEM[(char *)&b + 16B] = 88;
2754      MEM[(int *)&b + 20B] = 1;
2755    The = {} store comes in sort_by_bitpos before the = 88 store, and can't
2756    be merged with it, because the = _5 store overlaps these and is in between
2757    them in sort_by_order ordering.  If it was merged, the merged store would
2758    go after the = _5 store and thus change behavior.  */
2759 
2760 static bool
check_no_overlap(const vec<store_immediate_info * > & m_store_info,unsigned int i,bool all_integer_cst_p,unsigned int first_order,unsigned int last_order,unsigned HOST_WIDE_INT start,unsigned HOST_WIDE_INT end,unsigned int first_earlier,unsigned end_earlier)2761 check_no_overlap (const vec<store_immediate_info *> &m_store_info,
2762                       unsigned int i,
2763                       bool all_integer_cst_p, unsigned int first_order,
2764                       unsigned int last_order, unsigned HOST_WIDE_INT start,
2765                       unsigned HOST_WIDE_INT end, unsigned int first_earlier,
2766                       unsigned end_earlier)
2767 {
2768   unsigned int len = m_store_info.length ();
2769   for (unsigned int j = first_earlier; j < end_earlier; j++)
2770     {
2771       store_immediate_info *info = m_store_info[j];
2772       if (info->order > first_order
2773             && info->order < last_order
2774             && info->bitpos + info->bitsize > start)
2775           return false;
2776     }
2777   for (++i; i < len; ++i)
2778     {
2779       store_immediate_info *info = m_store_info[i];
2780       if (info->bitpos >= end)
2781           break;
2782       if (info->order < last_order
2783             && (!all_integer_cst_p || info->rhs_code != INTEGER_CST))
2784           return false;
2785     }
2786   return true;
2787 }
2788 
2789 /* Return true if m_store_info[first] and at least one following store
2790    form a group which store try_size bitsize value which is byte swapped
2791    from a memory load or some value, or identity from some value.
2792    This uses the bswap pass APIs.  */
2793 
2794 bool
try_coalesce_bswap(merged_store_group * merged_store,unsigned int first,unsigned int try_size,unsigned int first_earlier)2795 imm_store_chain_info::try_coalesce_bswap (merged_store_group *merged_store,
2796                                                     unsigned int first,
2797                                                     unsigned int try_size,
2798                                                     unsigned int first_earlier)
2799 {
2800   unsigned int len = m_store_info.length (), last = first;
2801   unsigned HOST_WIDE_INT width = m_store_info[first]->bitsize;
2802   if (width >= try_size)
2803     return false;
2804   for (unsigned int i = first + 1; i < len; ++i)
2805     {
2806       if (m_store_info[i]->bitpos != m_store_info[first]->bitpos + width
2807             || m_store_info[i]->lp_nr != merged_store->lp_nr
2808             || m_store_info[i]->ins_stmt == NULL)
2809           return false;
2810       width += m_store_info[i]->bitsize;
2811       if (width >= try_size)
2812           {
2813             last = i;
2814             break;
2815           }
2816     }
2817   if (width != try_size)
2818     return false;
2819 
2820   bool allow_unaligned
2821     = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
2822   /* Punt if the combined store would not be aligned and we need alignment.  */
2823   if (!allow_unaligned)
2824     {
2825       unsigned int align = merged_store->align;
2826       unsigned HOST_WIDE_INT align_base = merged_store->align_base;
2827       for (unsigned int i = first + 1; i <= last; ++i)
2828           {
2829             unsigned int this_align;
2830             unsigned HOST_WIDE_INT align_bitpos = 0;
2831             get_object_alignment_1 (gimple_assign_lhs (m_store_info[i]->stmt),
2832                                           &this_align, &align_bitpos);
2833             if (this_align > align)
2834               {
2835                 align = this_align;
2836                 align_base = m_store_info[i]->bitpos - align_bitpos;
2837               }
2838           }
2839       unsigned HOST_WIDE_INT align_bitpos
2840           = (m_store_info[first]->bitpos - align_base) & (align - 1);
2841       if (align_bitpos)
2842           align = least_bit_hwi (align_bitpos);
2843       if (align < try_size)
2844           return false;
2845     }
2846 
2847   tree type;
2848   switch (try_size)
2849     {
2850     case 16: type = uint16_type_node; break;
2851     case 32: type = uint32_type_node; break;
2852     case 64: type = uint64_type_node; break;
2853     default: gcc_unreachable ();
2854     }
2855   struct symbolic_number n;
2856   gimple *ins_stmt = NULL;
2857   int vuse_store = -1;
2858   unsigned int first_order = merged_store->first_order;
2859   unsigned int last_order = merged_store->last_order;
2860   gimple *first_stmt = merged_store->first_stmt;
2861   gimple *last_stmt = merged_store->last_stmt;
2862   unsigned HOST_WIDE_INT end = merged_store->start + merged_store->width;
2863   store_immediate_info *infof = m_store_info[first];
2864 
2865   for (unsigned int i = first; i <= last; ++i)
2866     {
2867       store_immediate_info *info = m_store_info[i];
2868       struct symbolic_number this_n = info->n;
2869       this_n.type = type;
2870       if (!this_n.base_addr)
2871           this_n.range = try_size / BITS_PER_UNIT;
2872       else
2873           /* Update vuse in case it has changed by output_merged_stores.  */
2874           this_n.vuse = gimple_vuse (info->ins_stmt);
2875       unsigned int bitpos = info->bitpos - infof->bitpos;
2876       if (!do_shift_rotate (LSHIFT_EXPR, &this_n,
2877                                   BYTES_BIG_ENDIAN
2878                                   ? try_size - info->bitsize - bitpos
2879                                   : bitpos))
2880           return false;
2881       if (this_n.base_addr && vuse_store)
2882           {
2883             unsigned int j;
2884             for (j = first; j <= last; ++j)
2885               if (this_n.vuse == gimple_vuse (m_store_info[j]->stmt))
2886                 break;
2887             if (j > last)
2888               {
2889                 if (vuse_store == 1)
2890                     return false;
2891                 vuse_store = 0;
2892               }
2893           }
2894       if (i == first)
2895           {
2896             n = this_n;
2897             ins_stmt = info->ins_stmt;
2898           }
2899       else
2900           {
2901             if (n.base_addr && n.vuse != this_n.vuse)
2902               {
2903                 if (vuse_store == 0)
2904                     return false;
2905                 vuse_store = 1;
2906               }
2907             if (info->order > last_order)
2908               {
2909                 last_order = info->order;
2910                 last_stmt = info->stmt;
2911               }
2912             else if (info->order < first_order)
2913               {
2914                 first_order = info->order;
2915                 first_stmt = info->stmt;
2916               }
2917             end = MAX (end, info->bitpos + info->bitsize);
2918 
2919             ins_stmt = perform_symbolic_merge (ins_stmt, &n, info->ins_stmt,
2920                                                        &this_n, &n, BIT_IOR_EXPR);
2921             if (ins_stmt == NULL)
2922               return false;
2923           }
2924     }
2925 
2926   uint64_t cmpxchg, cmpnop;
2927   bool cast64_to_32;
2928   find_bswap_or_nop_finalize (&n, &cmpxchg, &cmpnop, &cast64_to_32);
2929 
2930   /* A complete byte swap should make the symbolic number to start with
2931      the largest digit in the highest order byte.  Unchanged symbolic
2932      number indicates a read with same endianness as target architecture.  */
2933   if (n.n != cmpnop && n.n != cmpxchg)
2934     return false;
2935 
2936   /* For now.  */
2937   if (cast64_to_32)
2938     return false;
2939 
2940   if (n.base_addr == NULL_TREE && !is_gimple_val (n.src))
2941     return false;
2942 
2943   if (!check_no_overlap (m_store_info, last, false, first_order, last_order,
2944                                merged_store->start, end, first_earlier, first))
2945     return false;
2946 
2947   /* Don't handle memory copy this way if normal non-bswap processing
2948      would handle it too.  */
2949   if (n.n == cmpnop && (unsigned) n.n_ops == last - first + 1)
2950     {
2951       unsigned int i;
2952       for (i = first; i <= last; ++i)
2953           if (m_store_info[i]->rhs_code != MEM_REF)
2954             break;
2955       if (i == last + 1)
2956           return false;
2957     }
2958 
2959   if (n.n == cmpxchg)
2960     switch (try_size)
2961       {
2962       case 16:
2963           /* Will emit LROTATE_EXPR.  */
2964           break;
2965       case 32:
2966           if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
2967               && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
2968             break;
2969           return false;
2970       case 64:
2971           if (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
2972               && optab_handler (bswap_optab, DImode) != CODE_FOR_nothing)
2973             break;
2974           return false;
2975       default:
2976           gcc_unreachable ();
2977       }
2978 
2979   if (!allow_unaligned && n.base_addr)
2980     {
2981       unsigned int align = get_object_alignment (n.src);
2982       if (align < try_size)
2983           return false;
2984     }
2985 
2986   /* If each load has vuse of the corresponding store, need to verify
2987      the loads can be sunk right before the last store.  */
2988   if (vuse_store == 1)
2989     {
2990       auto_vec<tree, 64> refs;
2991       for (unsigned int i = first; i <= last; ++i)
2992           gather_bswap_load_refs (&refs,
2993                                         gimple_assign_rhs1 (m_store_info[i]->stmt));
2994 
2995       for (tree ref : refs)
2996           if (stmts_may_clobber_ref_p (first_stmt, last_stmt, ref))
2997             return false;
2998       n.vuse = NULL_TREE;
2999     }
3000 
3001   infof->n = n;
3002   infof->ins_stmt = ins_stmt;
3003   for (unsigned int i = first; i <= last; ++i)
3004     {
3005       m_store_info[i]->rhs_code = n.n == cmpxchg ? LROTATE_EXPR : NOP_EXPR;
3006       m_store_info[i]->ops[0].base_addr = NULL_TREE;
3007       m_store_info[i]->ops[1].base_addr = NULL_TREE;
3008       if (i != first)
3009           merged_store->merge_into (m_store_info[i]);
3010     }
3011 
3012   return true;
3013 }
3014 
3015 /* Go through the candidate stores recorded in m_store_info and merge them
3016    into merged_store_group objects recorded into m_merged_store_groups
3017    representing the widened stores.  Return true if coalescing was successful
3018    and the number of widened stores is fewer than the original number
3019    of stores.  */
3020 
3021 bool
coalesce_immediate_stores()3022 imm_store_chain_info::coalesce_immediate_stores ()
3023 {
3024   /* Anything less can't be processed.  */
3025   if (m_store_info.length () < 2)
3026     return false;
3027 
3028   if (dump_file && (dump_flags & TDF_DETAILS))
3029     fprintf (dump_file, "Attempting to coalesce %u stores in chain\n",
3030                m_store_info.length ());
3031 
3032   store_immediate_info *info;
3033   unsigned int i, ignore = 0;
3034   unsigned int first_earlier = 0;
3035   unsigned int end_earlier = 0;
3036 
3037   /* Order the stores by the bitposition they write to.  */
3038   m_store_info.qsort (sort_by_bitpos);
3039 
3040   info = m_store_info[0];
3041   merged_store_group *merged_store = new merged_store_group (info);
3042   if (dump_file && (dump_flags & TDF_DETAILS))
3043     fputs ("New store group\n", dump_file);
3044 
3045   FOR_EACH_VEC_ELT (m_store_info, i, info)
3046     {
3047       unsigned HOST_WIDE_INT new_bitregion_start, new_bitregion_end;
3048 
3049       if (i <= ignore)
3050           goto done;
3051 
3052       while (first_earlier < end_earlier
3053                && (m_store_info[first_earlier]->bitpos
3054                      + m_store_info[first_earlier]->bitsize
3055                      <= merged_store->start))
3056           first_earlier++;
3057 
3058       /* First try to handle group of stores like:
3059            p[0] = data >> 24;
3060            p[1] = data >> 16;
3061            p[2] = data >> 8;
3062            p[3] = data;
3063            using the bswap framework.  */
3064       if (info->bitpos == merged_store->start + merged_store->width
3065             && merged_store->stores.length () == 1
3066             && merged_store->stores[0]->ins_stmt != NULL
3067             && info->lp_nr == merged_store->lp_nr
3068             && info->ins_stmt != NULL)
3069           {
3070             unsigned int try_size;
3071             for (try_size = 64; try_size >= 16; try_size >>= 1)
3072               if (try_coalesce_bswap (merged_store, i - 1, try_size,
3073                                             first_earlier))
3074                 break;
3075 
3076             if (try_size >= 16)
3077               {
3078                 ignore = i + merged_store->stores.length () - 1;
3079                 m_merged_store_groups.safe_push (merged_store);
3080                 if (ignore < m_store_info.length ())
3081                     {
3082                       merged_store = new merged_store_group (m_store_info[ignore]);
3083                       end_earlier = ignore;
3084                     }
3085                 else
3086                     merged_store = NULL;
3087                 goto done;
3088               }
3089           }
3090 
3091       new_bitregion_start
3092           = MIN (merged_store->bitregion_start, info->bitregion_start);
3093       new_bitregion_end
3094           = MAX (merged_store->bitregion_end, info->bitregion_end);
3095 
3096       if (info->order >= merged_store->first_nonmergeable_order
3097             || (((new_bitregion_end - new_bitregion_start + 1) / BITS_PER_UNIT)
3098                 > (unsigned) param_store_merging_max_size))
3099           ;
3100 
3101       /* |---store 1---|
3102                  |---store 2---|
3103            Overlapping stores.  */
3104       else if (IN_RANGE (info->bitpos, merged_store->start,
3105                                merged_store->start + merged_store->width - 1)
3106                  /* |---store 1---||---store 2---|
3107                       Handle also the consecutive INTEGER_CST stores case here,
3108                       as we have here the code to deal with overlaps.  */
3109                  || (info->bitregion_start <= merged_store->bitregion_end
3110                        && info->rhs_code == INTEGER_CST
3111                        && merged_store->only_constants
3112                        && merged_store->can_be_merged_into (info)))
3113           {
3114             /* Only allow overlapping stores of constants.  */
3115             if (info->rhs_code == INTEGER_CST
3116                 && merged_store->only_constants
3117                 && info->lp_nr == merged_store->lp_nr)
3118               {
3119                 unsigned int first_order
3120                     = MIN (merged_store->first_order, info->order);
3121                 unsigned int last_order
3122                     = MAX (merged_store->last_order, info->order);
3123                 unsigned HOST_WIDE_INT end
3124                     = MAX (merged_store->start + merged_store->width,
3125                            info->bitpos + info->bitsize);
3126                 if (check_no_overlap (m_store_info, i, true, first_order,
3127                                             last_order, merged_store->start, end,
3128                                             first_earlier, end_earlier))
3129                     {
3130                       /* check_no_overlap call above made sure there are no
3131                          overlapping stores with non-INTEGER_CST rhs_code
3132                          in between the first and last of the stores we've
3133                          just merged.  If there are any INTEGER_CST rhs_code
3134                          stores in between, we need to merge_overlapping them
3135                          even if in the sort_by_bitpos order there are other
3136                          overlapping stores in between.  Keep those stores as is.
3137                          Example:
3138                               MEM[(int *)p_28] = 0;
3139                               MEM[(char *)p_28 + 3B] = 1;
3140                               MEM[(char *)p_28 + 1B] = 2;
3141                               MEM[(char *)p_28 + 2B] = MEM[(char *)p_28 + 6B];
3142                          We can't merge the zero store with the store of two and
3143                          not merge anything else, because the store of one is
3144                          in the original order in between those two, but in
3145                          store_by_bitpos order it comes after the last store that
3146                          we can't merge with them.  We can merge the first 3 stores
3147                          and keep the last store as is though.  */
3148                       unsigned int len = m_store_info.length ();
3149                       unsigned int try_order = last_order;
3150                       unsigned int first_nonmergeable_order;
3151                       unsigned int k;
3152                       bool last_iter = false;
3153                       int attempts = 0;
3154                       do
3155                         {
3156                           unsigned int max_order = 0;
3157                           unsigned int min_order = first_order;
3158                           unsigned first_nonmergeable_int_order = ~0U;
3159                           unsigned HOST_WIDE_INT this_end = end;
3160                           k = i;
3161                           first_nonmergeable_order = ~0U;
3162                           for (unsigned int j = i + 1; j < len; ++j)
3163                               {
3164                                 store_immediate_info *info2 = m_store_info[j];
3165                                 if (info2->bitpos >= this_end)
3166                                   break;
3167                                 if (info2->order < try_order)
3168                                   {
3169                                     if (info2->rhs_code != INTEGER_CST
3170                                           || info2->lp_nr != merged_store->lp_nr)
3171                                         {
3172                                           /* Normally check_no_overlap makes sure this
3173                                              doesn't happen, but if end grows below,
3174                                              then we need to process more stores than
3175                                              check_no_overlap verified.  Example:
3176                                               MEM[(int *)p_5] = 0;
3177                                               MEM[(short *)p_5 + 3B] = 1;
3178                                               MEM[(char *)p_5 + 4B] = _9;
3179                                               MEM[(char *)p_5 + 2B] = 2;  */
3180                                           k = 0;
3181                                           break;
3182                                         }
3183                                     k = j;
3184                                     min_order = MIN (min_order, info2->order);
3185                                     this_end = MAX (this_end,
3186                                                         info2->bitpos + info2->bitsize);
3187                                   }
3188                                 else if (info2->rhs_code == INTEGER_CST
3189                                            && info2->lp_nr == merged_store->lp_nr
3190                                            && !last_iter)
3191                                   {
3192                                     max_order = MAX (max_order, info2->order + 1);
3193                                     first_nonmergeable_int_order
3194                                         = MIN (first_nonmergeable_int_order,
3195                                                info2->order);
3196                                   }
3197                                 else
3198                                   first_nonmergeable_order
3199                                     = MIN (first_nonmergeable_order, info2->order);
3200                               }
3201                           if (k > i
3202                                 && !check_no_overlap (m_store_info, len - 1, true,
3203                                                             min_order, try_order,
3204                                                             merged_store->start, this_end,
3205                                                             first_earlier, end_earlier))
3206                               k = 0;
3207                           if (k == 0)
3208                               {
3209                                 if (last_order == try_order)
3210                                   break;
3211                                 /* If this failed, but only because we grew
3212                                    try_order, retry with the last working one,
3213                                    so that we merge at least something.  */
3214                                 try_order = last_order;
3215                                 last_iter = true;
3216                                 continue;
3217                               }
3218                           last_order = try_order;
3219                           /* Retry with a larger try_order to see if we could
3220                                merge some further INTEGER_CST stores.  */
3221                           if (max_order
3222                                 && (first_nonmergeable_int_order
3223                                     < first_nonmergeable_order))
3224                               {
3225                                 try_order = MIN (max_order,
3226                                                      first_nonmergeable_order);
3227                                 try_order
3228                                   = MIN (try_order,
3229                                            merged_store->first_nonmergeable_order);
3230                                 if (try_order > last_order && ++attempts < 16)
3231                                   continue;
3232                               }
3233                           first_nonmergeable_order
3234                               = MIN (first_nonmergeable_order,
3235                                      first_nonmergeable_int_order);
3236                           end = this_end;
3237                           break;
3238                         }
3239                       while (1);
3240 
3241                       if (k != 0)
3242                         {
3243                           merged_store->merge_overlapping (info);
3244 
3245                           merged_store->first_nonmergeable_order
3246                               = MIN (merged_store->first_nonmergeable_order,
3247                                      first_nonmergeable_order);
3248 
3249                           for (unsigned int j = i + 1; j <= k; j++)
3250                               {
3251                                 store_immediate_info *info2 = m_store_info[j];
3252                                 gcc_assert (info2->bitpos < end);
3253                                 if (info2->order < last_order)
3254                                   {
3255                                     gcc_assert (info2->rhs_code == INTEGER_CST);
3256                                     if (info != info2)
3257                                         merged_store->merge_overlapping (info2);
3258                                   }
3259                                 /* Other stores are kept and not merged in any
3260                                    way.  */
3261                               }
3262                           ignore = k;
3263                           goto done;
3264                         }
3265                     }
3266               }
3267           }
3268       /* |---store 1---||---store 2---|
3269            This store is consecutive to the previous one.
3270            Merge it into the current store group.  There can be gaps in between
3271            the stores, but there can't be gaps in between bitregions.  */
3272       else if (info->bitregion_start <= merged_store->bitregion_end
3273                  && merged_store->can_be_merged_into (info))
3274           {
3275             store_immediate_info *infof = merged_store->stores[0];
3276 
3277             /* All the rhs_code ops that take 2 operands are commutative,
3278                swap the operands if it could make the operands compatible.  */
3279             if (infof->ops[0].base_addr
3280                 && infof->ops[1].base_addr
3281                 && info->ops[0].base_addr
3282                 && info->ops[1].base_addr
3283                 && known_eq (info->ops[1].bitpos - infof->ops[0].bitpos,
3284                                  info->bitpos - infof->bitpos)
3285                 && operand_equal_p (info->ops[1].base_addr,
3286                                           infof->ops[0].base_addr, 0))
3287               {
3288                 std::swap (info->ops[0], info->ops[1]);
3289                 info->ops_swapped_p = true;
3290               }
3291             if (check_no_overlap (m_store_info, i, false,
3292                                         MIN (merged_store->first_order, info->order),
3293                                         MAX (merged_store->last_order, info->order),
3294                                         merged_store->start,
3295                                         MAX (merged_store->start + merged_store->width,
3296                                              info->bitpos + info->bitsize),
3297                                         first_earlier, end_earlier))
3298               {
3299                 /* Turn MEM_REF into BIT_INSERT_EXPR for bit-field stores.  */
3300                 if (info->rhs_code == MEM_REF && infof->rhs_code != MEM_REF)
3301                     {
3302                       info->rhs_code = BIT_INSERT_EXPR;
3303                       info->ops[0].val = gimple_assign_rhs1 (info->stmt);
3304                       info->ops[0].base_addr = NULL_TREE;
3305                     }
3306                 else if (infof->rhs_code == MEM_REF && info->rhs_code != MEM_REF)
3307                     {
3308                       for (store_immediate_info *infoj : merged_store->stores)
3309                         {
3310                           infoj->rhs_code = BIT_INSERT_EXPR;
3311                           infoj->ops[0].val = gimple_assign_rhs1 (infoj->stmt);
3312                           infoj->ops[0].base_addr = NULL_TREE;
3313                         }
3314                       merged_store->bit_insertion = true;
3315                     }
3316                 if ((infof->ops[0].base_addr
3317                        ? compatible_load_p (merged_store, info, base_addr, 0)
3318                        : !info->ops[0].base_addr)
3319                       && (infof->ops[1].base_addr
3320                           ? compatible_load_p (merged_store, info, base_addr, 1)
3321                           : !info->ops[1].base_addr))
3322                     {
3323                       merged_store->merge_into (info);
3324                       goto done;
3325                     }
3326               }
3327           }
3328 
3329       /* |---store 1---| <gap> |---store 2---|.
3330            Gap between stores or the rhs not compatible.  Start a new group.  */
3331 
3332       /* Try to apply all the stores recorded for the group to determine
3333            the bitpattern they write and discard it if that fails.
3334            This will also reject single-store groups.  */
3335       if (merged_store->apply_stores ())
3336           m_merged_store_groups.safe_push (merged_store);
3337       else
3338           delete merged_store;
3339 
3340       merged_store = new merged_store_group (info);
3341       end_earlier = i;
3342       if (dump_file && (dump_flags & TDF_DETAILS))
3343           fputs ("New store group\n", dump_file);
3344 
3345     done:
3346       if (dump_file && (dump_flags & TDF_DETAILS))
3347           {
3348             fprintf (dump_file, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC
3349                                     " bitpos:" HOST_WIDE_INT_PRINT_DEC " val:",
3350                        i, info->bitsize, info->bitpos);
3351             print_generic_expr (dump_file, gimple_assign_rhs1 (info->stmt));
3352             fputc ('\n', dump_file);
3353           }
3354     }
3355 
3356   /* Record or discard the last store group.  */
3357   if (merged_store)
3358     {
3359       if (merged_store->apply_stores ())
3360           m_merged_store_groups.safe_push (merged_store);
3361       else
3362           delete merged_store;
3363     }
3364 
3365   gcc_assert (m_merged_store_groups.length () <= m_store_info.length ());
3366 
3367   bool success
3368     = !m_merged_store_groups.is_empty ()
3369       && m_merged_store_groups.length () < m_store_info.length ();
3370 
3371   if (success && dump_file)
3372     fprintf (dump_file, "Coalescing successful!\nMerged into %u stores\n",
3373                m_merged_store_groups.length ());
3374 
3375   return success;
3376 }
3377 
3378 /* Return the type to use for the merged stores or loads described by STMTS.
3379    This is needed to get the alias sets right.  If IS_LOAD, look for rhs,
3380    otherwise lhs.  Additionally set *CLIQUEP and *BASEP to MR_DEPENDENCE_*
3381    of the MEM_REFs if any.  */
3382 
3383 static tree
get_alias_type_for_stmts(vec<gimple * > & stmts,bool is_load,unsigned short * cliquep,unsigned short * basep)3384 get_alias_type_for_stmts (vec<gimple *> &stmts, bool is_load,
3385                                 unsigned short *cliquep, unsigned short *basep)
3386 {
3387   gimple *stmt;
3388   unsigned int i;
3389   tree type = NULL_TREE;
3390   tree ret = NULL_TREE;
3391   *cliquep = 0;
3392   *basep = 0;
3393 
3394   FOR_EACH_VEC_ELT (stmts, i, stmt)
3395     {
3396       tree ref = is_load ? gimple_assign_rhs1 (stmt)
3397                                : gimple_assign_lhs (stmt);
3398       tree type1 = reference_alias_ptr_type (ref);
3399       tree base = get_base_address (ref);
3400 
3401       if (i == 0)
3402           {
3403             if (TREE_CODE (base) == MEM_REF)
3404               {
3405                 *cliquep = MR_DEPENDENCE_CLIQUE (base);
3406                 *basep = MR_DEPENDENCE_BASE (base);
3407               }
3408             ret = type = type1;
3409             continue;
3410           }
3411       if (!alias_ptr_types_compatible_p (type, type1))
3412           ret = ptr_type_node;
3413       if (TREE_CODE (base) != MEM_REF
3414             || *cliquep != MR_DEPENDENCE_CLIQUE (base)
3415             || *basep != MR_DEPENDENCE_BASE (base))
3416           {
3417             *cliquep = 0;
3418             *basep = 0;
3419           }
3420     }
3421   return ret;
3422 }
3423 
3424 /* Return the location_t information we can find among the statements
3425    in STMTS.  */
3426 
3427 static location_t
get_location_for_stmts(vec<gimple * > & stmts)3428 get_location_for_stmts (vec<gimple *> &stmts)
3429 {
3430   for (gimple *stmt : stmts)
3431     if (gimple_has_location (stmt))
3432       return gimple_location (stmt);
3433 
3434   return UNKNOWN_LOCATION;
3435 }
3436 
3437 /* Used to decribe a store resulting from splitting a wide store in smaller
3438    regularly-sized stores in split_group.  */
3439 
3440 class split_store
3441 {
3442 public:
3443   unsigned HOST_WIDE_INT bytepos;
3444   unsigned HOST_WIDE_INT size;
3445   unsigned HOST_WIDE_INT align;
3446   auto_vec<store_immediate_info *> orig_stores;
3447   /* True if there is a single orig stmt covering the whole split store.  */
3448   bool orig;
3449   split_store (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
3450                  unsigned HOST_WIDE_INT);
3451 };
3452 
3453 /* Simple constructor.  */
3454 
split_store(unsigned HOST_WIDE_INT bp,unsigned HOST_WIDE_INT sz,unsigned HOST_WIDE_INT al)3455 split_store::split_store (unsigned HOST_WIDE_INT bp,
3456                                 unsigned HOST_WIDE_INT sz,
3457                                 unsigned HOST_WIDE_INT al)
3458                                 : bytepos (bp), size (sz), align (al), orig (false)
3459 {
3460   orig_stores.create (0);
3461 }
3462 
3463 /* Record all stores in GROUP that write to the region starting at BITPOS and
3464    is of size BITSIZE.  Record infos for such statements in STORES if
3465    non-NULL.  The stores in GROUP must be sorted by bitposition.  Return INFO
3466    if there is exactly one original store in the range (in that case ignore
3467    clobber stmts, unless there are only clobber stmts).  */
3468 
3469 static store_immediate_info *
find_constituent_stores(class merged_store_group * group,vec<store_immediate_info * > * stores,unsigned int * first,unsigned HOST_WIDE_INT bitpos,unsigned HOST_WIDE_INT bitsize)3470 find_constituent_stores (class merged_store_group *group,
3471                                vec<store_immediate_info *> *stores,
3472                                unsigned int *first,
3473                                unsigned HOST_WIDE_INT bitpos,
3474                                unsigned HOST_WIDE_INT bitsize)
3475 {
3476   store_immediate_info *info, *ret = NULL;
3477   unsigned int i;
3478   bool second = false;
3479   bool update_first = true;
3480   unsigned HOST_WIDE_INT end = bitpos + bitsize;
3481   for (i = *first; group->stores.iterate (i, &info); ++i)
3482     {
3483       unsigned HOST_WIDE_INT stmt_start = info->bitpos;
3484       unsigned HOST_WIDE_INT stmt_end = stmt_start + info->bitsize;
3485       if (stmt_end <= bitpos)
3486           {
3487             /* BITPOS passed to this function never decreases from within the
3488                same split_group call, so optimize and don't scan info records
3489                which are known to end before or at BITPOS next time.
3490                Only do it if all stores before this one also pass this.  */
3491             if (update_first)
3492               *first = i + 1;
3493             continue;
3494           }
3495       else
3496           update_first = false;
3497 
3498       /* The stores in GROUP are ordered by bitposition so if we're past
3499            the region for this group return early.  */
3500       if (stmt_start >= end)
3501           return ret;
3502 
3503       if (gimple_clobber_p (info->stmt))
3504           {
3505             if (stores)
3506               stores->safe_push (info);
3507             if (ret == NULL)
3508               ret = info;
3509             continue;
3510           }
3511       if (stores)
3512           {
3513             stores->safe_push (info);
3514             if (ret && !gimple_clobber_p (ret->stmt))
3515               {
3516                 ret = NULL;
3517                 second = true;
3518               }
3519           }
3520       else if (ret && !gimple_clobber_p (ret->stmt))
3521           return NULL;
3522       if (!second)
3523           ret = info;
3524     }
3525   return ret;
3526 }
3527 
3528 /* Return how many SSA_NAMEs used to compute value to store in the INFO
3529    store have multiple uses.  If any SSA_NAME has multiple uses, also
3530    count statements needed to compute it.  */
3531 
3532 static unsigned
count_multiple_uses(store_immediate_info * info)3533 count_multiple_uses (store_immediate_info *info)
3534 {
3535   gimple *stmt = info->stmt;
3536   unsigned ret = 0;
3537   switch (info->rhs_code)
3538     {
3539     case INTEGER_CST:
3540     case STRING_CST:
3541       return 0;
3542     case BIT_AND_EXPR:
3543     case BIT_IOR_EXPR:
3544     case BIT_XOR_EXPR:
3545       if (info->bit_not_p)
3546           {
3547             if (!has_single_use (gimple_assign_rhs1 (stmt)))
3548               ret = 1; /* Fall through below to return
3549                               the BIT_NOT_EXPR stmt and then
3550                               BIT_{AND,IOR,XOR}_EXPR and anything it
3551                               uses.  */
3552             else
3553               /* stmt is after this the BIT_NOT_EXPR.  */
3554               stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3555           }
3556       if (!has_single_use (gimple_assign_rhs1 (stmt)))
3557           {
3558             ret += 1 + info->ops[0].bit_not_p;
3559             if (info->ops[1].base_addr)
3560               ret += 1 + info->ops[1].bit_not_p;
3561             return ret + 1;
3562           }
3563       stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3564       /* stmt is now the BIT_*_EXPR.  */
3565       if (!has_single_use (gimple_assign_rhs1 (stmt)))
3566           ret += 1 + info->ops[info->ops_swapped_p].bit_not_p;
3567       else if (info->ops[info->ops_swapped_p].bit_not_p)
3568           {
3569             gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3570             if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3571               ++ret;
3572           }
3573       if (info->ops[1].base_addr == NULL_TREE)
3574           {
3575             gcc_checking_assert (!info->ops_swapped_p);
3576             return ret;
3577           }
3578       if (!has_single_use (gimple_assign_rhs2 (stmt)))
3579           ret += 1 + info->ops[1 - info->ops_swapped_p].bit_not_p;
3580       else if (info->ops[1 - info->ops_swapped_p].bit_not_p)
3581           {
3582             gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3583             if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3584               ++ret;
3585           }
3586       return ret;
3587     case MEM_REF:
3588       if (!has_single_use (gimple_assign_rhs1 (stmt)))
3589           return 1 + info->ops[0].bit_not_p;
3590       else if (info->ops[0].bit_not_p)
3591           {
3592             stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3593             if (!has_single_use (gimple_assign_rhs1 (stmt)))
3594               return 1;
3595           }
3596       return 0;
3597     case BIT_INSERT_EXPR:
3598       return has_single_use (gimple_assign_rhs1 (stmt)) ? 0 : 1;
3599     default:
3600       gcc_unreachable ();
3601     }
3602 }
3603 
3604 /* Split a merged store described by GROUP by populating the SPLIT_STORES
3605    vector (if non-NULL) with split_store structs describing the byte offset
3606    (from the base), the bit size and alignment of each store as well as the
3607    original statements involved in each such split group.
3608    This is to separate the splitting strategy from the statement
3609    building/emission/linking done in output_merged_store.
3610    Return number of new stores.
3611    If ALLOW_UNALIGNED_STORE is false, then all stores must be aligned.
3612    If ALLOW_UNALIGNED_LOAD is false, then all loads must be aligned.
3613    BZERO_FIRST may be true only when the first store covers the whole group
3614    and clears it; if BZERO_FIRST is true, keep that first store in the set
3615    unmodified and emit further stores for the overrides only.
3616    If SPLIT_STORES is NULL, it is just a dry run to count number of
3617    new stores.  */
3618 
3619 static unsigned int
split_group(merged_store_group * group,bool allow_unaligned_store,bool allow_unaligned_load,bool bzero_first,vec<split_store * > * split_stores,unsigned * total_orig,unsigned * total_new)3620 split_group (merged_store_group *group, bool allow_unaligned_store,
3621                bool allow_unaligned_load, bool bzero_first,
3622                vec<split_store *> *split_stores,
3623                unsigned *total_orig,
3624                unsigned *total_new)
3625 {
3626   unsigned HOST_WIDE_INT pos = group->bitregion_start;
3627   unsigned HOST_WIDE_INT size = group->bitregion_end - pos;
3628   unsigned HOST_WIDE_INT bytepos = pos / BITS_PER_UNIT;
3629   unsigned HOST_WIDE_INT group_align = group->align;
3630   unsigned HOST_WIDE_INT align_base = group->align_base;
3631   unsigned HOST_WIDE_INT group_load_align = group_align;
3632   bool any_orig = false;
3633 
3634   gcc_assert ((size % BITS_PER_UNIT == 0) && (pos % BITS_PER_UNIT == 0));
3635 
3636   /* For bswap framework using sets of stores, all the checking has been done
3637      earlier in try_coalesce_bswap and the result always needs to be emitted
3638      as a single store.  Likewise for string concatenation.  */
3639   if (group->stores[0]->rhs_code == LROTATE_EXPR
3640       || group->stores[0]->rhs_code == NOP_EXPR
3641       || group->string_concatenation)
3642     {
3643       gcc_assert (!bzero_first);
3644       if (total_orig)
3645           {
3646             /* Avoid the old/new stmt count heuristics.  It should be
3647                always beneficial.  */
3648             total_new[0] = 1;
3649             total_orig[0] = 2;
3650           }
3651 
3652       if (split_stores)
3653           {
3654             unsigned HOST_WIDE_INT align_bitpos
3655               = (group->start - align_base) & (group_align - 1);
3656             unsigned HOST_WIDE_INT align = group_align;
3657             if (align_bitpos)
3658               align = least_bit_hwi (align_bitpos);
3659             bytepos = group->start / BITS_PER_UNIT;
3660             split_store *store
3661               = new split_store (bytepos, group->width, align);
3662             unsigned int first = 0;
3663             find_constituent_stores (group, &store->orig_stores,
3664                                            &first, group->start, group->width);
3665             split_stores->safe_push (store);
3666           }
3667 
3668       return 1;
3669     }
3670 
3671   unsigned int ret = 0, first = 0;
3672   unsigned HOST_WIDE_INT try_pos = bytepos;
3673 
3674   if (total_orig)
3675     {
3676       unsigned int i;
3677       store_immediate_info *info = group->stores[0];
3678 
3679       total_new[0] = 0;
3680       total_orig[0] = 1; /* The orig store.  */
3681       info = group->stores[0];
3682       if (info->ops[0].base_addr)
3683           total_orig[0]++;
3684       if (info->ops[1].base_addr)
3685           total_orig[0]++;
3686       switch (info->rhs_code)
3687           {
3688           case BIT_AND_EXPR:
3689           case BIT_IOR_EXPR:
3690           case BIT_XOR_EXPR:
3691             total_orig[0]++; /* The orig BIT_*_EXPR stmt.  */
3692             break;
3693           default:
3694             break;
3695           }
3696       total_orig[0] *= group->stores.length ();
3697 
3698       FOR_EACH_VEC_ELT (group->stores, i, info)
3699           {
3700             total_new[0] += count_multiple_uses (info);
3701             total_orig[0] += (info->bit_not_p
3702                                   + info->ops[0].bit_not_p
3703                                   + info->ops[1].bit_not_p);
3704           }
3705     }
3706 
3707   if (!allow_unaligned_load)
3708     for (int i = 0; i < 2; ++i)
3709       if (group->load_align[i])
3710           group_load_align = MIN (group_load_align, group->load_align[i]);
3711 
3712   if (bzero_first)
3713     {
3714       store_immediate_info *gstore;
3715       FOR_EACH_VEC_ELT (group->stores, first, gstore)
3716           if (!gimple_clobber_p (gstore->stmt))
3717             break;
3718       ++first;
3719       ret = 1;
3720       if (split_stores)
3721           {
3722             split_store *store
3723               = new split_store (bytepos, gstore->bitsize, align_base);
3724             store->orig_stores.safe_push (gstore);
3725             store->orig = true;
3726             any_orig = true;
3727             split_stores->safe_push (store);
3728           }
3729     }
3730 
3731   while (size > 0)
3732     {
3733       if ((allow_unaligned_store || group_align <= BITS_PER_UNIT)
3734             && (group->mask[try_pos - bytepos] == (unsigned char) ~0U
3735                 || (bzero_first && group->val[try_pos - bytepos] == 0)))
3736           {
3737             /* Skip padding bytes.  */
3738             ++try_pos;
3739             size -= BITS_PER_UNIT;
3740             continue;
3741           }
3742 
3743       unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
3744       unsigned int try_size = MAX_STORE_BITSIZE, nonmasked;
3745       unsigned HOST_WIDE_INT align_bitpos
3746           = (try_bitpos - align_base) & (group_align - 1);
3747       unsigned HOST_WIDE_INT align = group_align;
3748       bool found_orig = false;
3749       if (align_bitpos)
3750           align = least_bit_hwi (align_bitpos);
3751       if (!allow_unaligned_store)
3752           try_size = MIN (try_size, align);
3753       if (!allow_unaligned_load)
3754           {
3755             /* If we can't do or don't want to do unaligned stores
3756                as well as loads, we need to take the loads into account
3757                as well.  */
3758             unsigned HOST_WIDE_INT load_align = group_load_align;
3759             align_bitpos = (try_bitpos - align_base) & (load_align - 1);
3760             if (align_bitpos)
3761               load_align = least_bit_hwi (align_bitpos);
3762             for (int i = 0; i < 2; ++i)
3763               if (group->load_align[i])
3764                 {
3765                     align_bitpos
3766                       = known_alignment (try_bitpos
3767                                              - group->stores[0]->bitpos
3768                                              + group->stores[0]->ops[i].bitpos
3769                                              - group->load_align_base[i]);
3770                     if (align_bitpos & (group_load_align - 1))
3771                       {
3772                         unsigned HOST_WIDE_INT a = least_bit_hwi (align_bitpos);
3773                         load_align = MIN (load_align, a);
3774                       }
3775                 }
3776             try_size = MIN (try_size, load_align);
3777           }
3778       store_immediate_info *info
3779           = find_constituent_stores (group, NULL, &first, try_bitpos, try_size);
3780       if (info && !gimple_clobber_p (info->stmt))
3781           {
3782             /* If there is just one original statement for the range, see if
3783                we can just reuse the original store which could be even larger
3784                than try_size.  */
3785             unsigned HOST_WIDE_INT stmt_end
3786               = ROUND_UP (info->bitpos + info->bitsize, BITS_PER_UNIT);
3787             info = find_constituent_stores (group, NULL, &first, try_bitpos,
3788                                                     stmt_end - try_bitpos);
3789             if (info && info->bitpos >= try_bitpos)
3790               {
3791                 store_immediate_info *info2 = NULL;
3792                 unsigned int first_copy = first;
3793                 if (info->bitpos > try_bitpos
3794                       && stmt_end - try_bitpos <= try_size)
3795                     {
3796                       info2 = find_constituent_stores (group, NULL, &first_copy,
3797                                                                try_bitpos,
3798                                                                info->bitpos - try_bitpos);
3799                       gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
3800                     }
3801                 if (info2 == NULL && stmt_end - try_bitpos < try_size)
3802                     {
3803                       info2 = find_constituent_stores (group, NULL, &first_copy,
3804                                                                stmt_end,
3805                                                                (try_bitpos + try_size)
3806                                                                - stmt_end);
3807                       gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
3808                     }
3809                 if (info2 == NULL)
3810                     {
3811                       try_size = stmt_end - try_bitpos;
3812                       found_orig = true;
3813                       goto found;
3814                     }
3815               }
3816           }
3817 
3818       /* Approximate store bitsize for the case when there are no padding
3819            bits.  */
3820       while (try_size > size)
3821           try_size /= 2;
3822       /* Now look for whole padding bytes at the end of that bitsize.  */
3823       for (nonmasked = try_size / BITS_PER_UNIT; nonmasked > 0; --nonmasked)
3824           if (group->mask[try_pos - bytepos + nonmasked - 1]
3825               != (unsigned char) ~0U
3826               && (!bzero_first
3827                     || group->val[try_pos - bytepos + nonmasked - 1] != 0))
3828             break;
3829       if (nonmasked == 0 || (info && gimple_clobber_p (info->stmt)))
3830           {
3831             /* If entire try_size range is padding, skip it.  */
3832             try_pos += try_size / BITS_PER_UNIT;
3833             size -= try_size;
3834             continue;
3835           }
3836       /* Otherwise try to decrease try_size if second half, last 3 quarters
3837            etc. are padding.  */
3838       nonmasked *= BITS_PER_UNIT;
3839       while (nonmasked <= try_size / 2)
3840           try_size /= 2;
3841       if (!allow_unaligned_store && group_align > BITS_PER_UNIT)
3842           {
3843             /* Now look for whole padding bytes at the start of that bitsize.  */
3844             unsigned int try_bytesize = try_size / BITS_PER_UNIT, masked;
3845             for (masked = 0; masked < try_bytesize; ++masked)
3846               if (group->mask[try_pos - bytepos + masked] != (unsigned char) ~0U
3847                     && (!bzero_first
3848                         || group->val[try_pos - bytepos + masked] != 0))
3849                 break;
3850             masked *= BITS_PER_UNIT;
3851             gcc_assert (masked < try_size);
3852             if (masked >= try_size / 2)
3853               {
3854                 while (masked >= try_size / 2)
3855                     {
3856                       try_size /= 2;
3857                       try_pos += try_size / BITS_PER_UNIT;
3858                       size -= try_size;
3859                       masked -= try_size;
3860                     }
3861                 /* Need to recompute the alignment, so just retry at the new
3862                      position.  */
3863                 continue;
3864               }
3865           }
3866 
3867     found:
3868       ++ret;
3869 
3870       if (split_stores)
3871           {
3872             split_store *store
3873               = new split_store (try_pos, try_size, align);
3874             info = find_constituent_stores (group, &store->orig_stores,
3875                                                     &first, try_bitpos, try_size);
3876             if (info
3877                 && !gimple_clobber_p (info->stmt)
3878                 && info->bitpos >= try_bitpos
3879                 && info->bitpos + info->bitsize <= try_bitpos + try_size
3880                 && (store->orig_stores.length () == 1
3881                       || found_orig
3882                       || (info->bitpos == try_bitpos
3883                           && (info->bitpos + info->bitsize
3884                                 == try_bitpos + try_size))))
3885               {
3886                 store->orig = true;
3887                 any_orig = true;
3888               }
3889             split_stores->safe_push (store);
3890           }
3891 
3892       try_pos += try_size / BITS_PER_UNIT;
3893       size -= try_size;
3894     }
3895 
3896   if (total_orig)
3897     {
3898       unsigned int i;
3899       split_store *store;
3900       /* If we are reusing some original stores and any of the
3901            original SSA_NAMEs had multiple uses, we need to subtract
3902            those now before we add the new ones.  */
3903       if (total_new[0] && any_orig)
3904           {
3905             FOR_EACH_VEC_ELT (*split_stores, i, store)
3906               if (store->orig)
3907                 total_new[0] -= count_multiple_uses (store->orig_stores[0]);
3908           }
3909       total_new[0] += ret; /* The new store.  */
3910       store_immediate_info *info = group->stores[0];
3911       if (info->ops[0].base_addr)
3912           total_new[0] += ret;
3913       if (info->ops[1].base_addr)
3914           total_new[0] += ret;
3915       switch (info->rhs_code)
3916           {
3917           case BIT_AND_EXPR:
3918           case BIT_IOR_EXPR:
3919           case BIT_XOR_EXPR:
3920             total_new[0] += ret; /* The new BIT_*_EXPR stmt.  */
3921             break;
3922           default:
3923             break;
3924           }
3925       FOR_EACH_VEC_ELT (*split_stores, i, store)
3926           {
3927             unsigned int j;
3928             bool bit_not_p[3] = { false, false, false };
3929             /* If all orig_stores have certain bit_not_p set, then
3930                we'd use a BIT_NOT_EXPR stmt and need to account for it.
3931                If some orig_stores have certain bit_not_p set, then
3932                we'd use a BIT_XOR_EXPR with a mask and need to account for
3933                it.  */
3934             FOR_EACH_VEC_ELT (store->orig_stores, j, info)
3935               {
3936                 if (info->ops[0].bit_not_p)
3937                     bit_not_p[0] = true;
3938                 if (info->ops[1].bit_not_p)
3939                     bit_not_p[1] = true;
3940                 if (info->bit_not_p)
3941                     bit_not_p[2] = true;
3942               }
3943             total_new[0] += bit_not_p[0] + bit_not_p[1] + bit_not_p[2];
3944           }
3945 
3946     }
3947 
3948   return ret;
3949 }
3950 
3951 /* Return the operation through which the operand IDX (if < 2) or
3952    result (IDX == 2) should be inverted.  If NOP_EXPR, no inversion
3953    is done, if BIT_NOT_EXPR, all bits are inverted, if BIT_XOR_EXPR,
3954    the bits should be xored with mask.  */
3955 
3956 static enum tree_code
invert_op(split_store * split_store,int idx,tree int_type,tree & mask)3957 invert_op (split_store *split_store, int idx, tree int_type, tree &mask)
3958 {
3959   unsigned int i;
3960   store_immediate_info *info;
3961   unsigned int cnt = 0;
3962   bool any_paddings = false;
3963   FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
3964     {
3965       bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
3966       if (bit_not_p)
3967           {
3968             ++cnt;
3969             tree lhs = gimple_assign_lhs (info->stmt);
3970             if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3971                 && TYPE_PRECISION (TREE_TYPE (lhs)) < info->bitsize)
3972               any_paddings = true;
3973           }
3974     }
3975   mask = NULL_TREE;
3976   if (cnt == 0)
3977     return NOP_EXPR;
3978   if (cnt == split_store->orig_stores.length () && !any_paddings)
3979     return BIT_NOT_EXPR;
3980 
3981   unsigned HOST_WIDE_INT try_bitpos = split_store->bytepos * BITS_PER_UNIT;
3982   unsigned buf_size = split_store->size / BITS_PER_UNIT;
3983   unsigned char *buf
3984     = XALLOCAVEC (unsigned char, buf_size);
3985   memset (buf, ~0U, buf_size);
3986   FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
3987     {
3988       bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
3989       if (!bit_not_p)
3990           continue;
3991       /* Clear regions with bit_not_p and invert afterwards, rather than
3992            clear regions with !bit_not_p, so that gaps in between stores aren't
3993            set in the mask.  */
3994       unsigned HOST_WIDE_INT bitsize = info->bitsize;
3995       unsigned HOST_WIDE_INT prec = bitsize;
3996       unsigned int pos_in_buffer = 0;
3997       if (any_paddings)
3998           {
3999             tree lhs = gimple_assign_lhs (info->stmt);
4000             if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4001                 && TYPE_PRECISION (TREE_TYPE (lhs)) < bitsize)
4002               prec = TYPE_PRECISION (TREE_TYPE (lhs));
4003           }
4004       if (info->bitpos < try_bitpos)
4005           {
4006             gcc_assert (info->bitpos + bitsize > try_bitpos);
4007             if (!BYTES_BIG_ENDIAN)
4008               {
4009                 if (prec <= try_bitpos - info->bitpos)
4010                     continue;
4011                 prec -= try_bitpos - info->bitpos;
4012               }
4013             bitsize -= try_bitpos - info->bitpos;
4014             if (BYTES_BIG_ENDIAN && prec > bitsize)
4015               prec = bitsize;
4016           }
4017       else
4018           pos_in_buffer = info->bitpos - try_bitpos;
4019       if (prec < bitsize)
4020           {
4021             /* If this is a bool inversion, invert just the least significant
4022                prec bits rather than all bits of it.  */
4023             if (BYTES_BIG_ENDIAN)
4024               {
4025                 pos_in_buffer += bitsize - prec;
4026                 if (pos_in_buffer >= split_store->size)
4027                     continue;
4028               }
4029             bitsize = prec;
4030           }
4031       if (pos_in_buffer + bitsize > split_store->size)
4032           bitsize = split_store->size - pos_in_buffer;
4033       unsigned char *p = buf + (pos_in_buffer / BITS_PER_UNIT);
4034       if (BYTES_BIG_ENDIAN)
4035           clear_bit_region_be (p, (BITS_PER_UNIT - 1
4036                                          - (pos_in_buffer % BITS_PER_UNIT)), bitsize);
4037       else
4038           clear_bit_region (p, pos_in_buffer % BITS_PER_UNIT, bitsize);
4039     }
4040   for (unsigned int i = 0; i < buf_size; ++i)
4041     buf[i] = ~buf[i];
4042   mask = native_interpret_expr (int_type, buf, buf_size);
4043   return BIT_XOR_EXPR;
4044 }
4045 
4046 /* Given a merged store group GROUP output the widened version of it.
4047    The store chain is against the base object BASE.
4048    Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output
4049    unaligned stores for STRICT_ALIGNMENT targets or if it's too expensive.
4050    Make sure that the number of statements output is less than the number of
4051    original statements.  If a better sequence is possible emit it and
4052    return true.  */
4053 
4054 bool
output_merged_store(merged_store_group * group)4055 imm_store_chain_info::output_merged_store (merged_store_group *group)
4056 {
4057   const unsigned HOST_WIDE_INT start_byte_pos
4058     = group->bitregion_start / BITS_PER_UNIT;
4059   unsigned int orig_num_stmts = group->stores.length ();
4060   if (orig_num_stmts < 2)
4061     return false;
4062 
4063   bool allow_unaligned_store
4064     = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
4065   bool allow_unaligned_load = allow_unaligned_store;
4066   bool bzero_first = false;
4067   store_immediate_info *store;
4068   unsigned int num_clobber_stmts = 0;
4069   if (group->stores[0]->rhs_code == INTEGER_CST)
4070     {
4071       unsigned int i;
4072       FOR_EACH_VEC_ELT (group->stores, i, store)
4073           if (gimple_clobber_p (store->stmt))
4074             num_clobber_stmts++;
4075           else if (TREE_CODE (gimple_assign_rhs1 (store->stmt)) == CONSTRUCTOR
4076                      && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (store->stmt)) == 0
4077                      && group->start == store->bitpos
4078                      && group->width == store->bitsize
4079                      && (group->start % BITS_PER_UNIT) == 0
4080                      && (group->width % BITS_PER_UNIT) == 0)
4081             {
4082               bzero_first = true;
4083               break;
4084             }
4085           else
4086             break;
4087       FOR_EACH_VEC_ELT_FROM (group->stores, i, store, i)
4088           if (gimple_clobber_p (store->stmt))
4089             num_clobber_stmts++;
4090       if (num_clobber_stmts == orig_num_stmts)
4091           return false;
4092       orig_num_stmts -= num_clobber_stmts;
4093     }
4094   if (allow_unaligned_store || bzero_first)
4095     {
4096       /* If unaligned stores are allowed, see how many stores we'd emit
4097            for unaligned and how many stores we'd emit for aligned stores.
4098            Only use unaligned stores if it allows fewer stores than aligned.
4099            Similarly, if there is a whole region clear first, prefer expanding
4100            it together compared to expanding clear first followed by merged
4101            further stores.  */
4102       unsigned cnt[4] = { ~0U, ~0U, ~0U, ~0U };
4103       int pass_min = 0;
4104       for (int pass = 0; pass < 4; ++pass)
4105           {
4106             if (!allow_unaligned_store && (pass & 1) != 0)
4107               continue;
4108             if (!bzero_first && (pass & 2) != 0)
4109               continue;
4110             cnt[pass] = split_group (group, (pass & 1) != 0,
4111                                            allow_unaligned_load, (pass & 2) != 0,
4112                                            NULL, NULL, NULL);
4113             if (cnt[pass] < cnt[pass_min])
4114               pass_min = pass;
4115           }
4116       if ((pass_min & 1) == 0)
4117           allow_unaligned_store = false;
4118       if ((pass_min & 2) == 0)
4119           bzero_first = false;
4120     }
4121 
4122   auto_vec<class split_store *, 32> split_stores;
4123   split_store *split_store;
4124   unsigned total_orig, total_new, i;
4125   split_group (group, allow_unaligned_store, allow_unaligned_load, bzero_first,
4126                  &split_stores, &total_orig, &total_new);
4127 
4128   /* Determine if there is a clobber covering the whole group at the start,
4129      followed by proposed split stores that cover the whole group.  In that
4130      case, prefer the transformation even if
4131      split_stores.length () == orig_num_stmts.  */
4132   bool clobber_first = false;
4133   if (num_clobber_stmts
4134       && gimple_clobber_p (group->stores[0]->stmt)
4135       && group->start == group->stores[0]->bitpos
4136       && group->width == group->stores[0]->bitsize
4137       && (group->start % BITS_PER_UNIT) == 0
4138       && (group->width % BITS_PER_UNIT) == 0)
4139     {
4140       clobber_first = true;
4141       unsigned HOST_WIDE_INT pos = group->start / BITS_PER_UNIT;
4142       FOR_EACH_VEC_ELT (split_stores, i, split_store)
4143           if (split_store->bytepos != pos)
4144             {
4145               clobber_first = false;
4146               break;
4147             }
4148           else
4149             pos += split_store->size / BITS_PER_UNIT;
4150       if (pos != (group->start + group->width) / BITS_PER_UNIT)
4151           clobber_first = false;
4152     }
4153 
4154   if (split_stores.length () >= orig_num_stmts + clobber_first)
4155     {
4156 
4157       /* We didn't manage to reduce the number of statements.  Bail out.  */
4158       if (dump_file && (dump_flags & TDF_DETAILS))
4159           fprintf (dump_file, "Exceeded original number of stmts (%u)."
4160                                   "  Not profitable to emit new sequence.\n",
4161                      orig_num_stmts);
4162       FOR_EACH_VEC_ELT (split_stores, i, split_store)
4163           delete split_store;
4164       return false;
4165     }
4166   if (total_orig <= total_new)
4167     {
4168       /* If number of estimated new statements is above estimated original
4169            statements, bail out too.  */
4170       if (dump_file && (dump_flags & TDF_DETAILS))
4171           fprintf (dump_file, "Estimated number of original stmts (%u)"
4172                                   " not larger than estimated number of new"
4173                                   " stmts (%u).\n",
4174                      total_orig, total_new);
4175       FOR_EACH_VEC_ELT (split_stores, i, split_store)
4176           delete split_store;
4177       return false;
4178     }
4179   if (group->stores[0]->rhs_code == INTEGER_CST)
4180     {
4181       bool all_orig = true;
4182       FOR_EACH_VEC_ELT (split_stores, i, split_store)
4183           if (!split_store->orig)
4184             {
4185               all_orig = false;
4186               break;
4187             }
4188       if (all_orig)
4189           {
4190             unsigned int cnt = split_stores.length ();
4191             store_immediate_info *store;
4192             FOR_EACH_VEC_ELT (group->stores, i, store)
4193               if (gimple_clobber_p (store->stmt))
4194                 ++cnt;
4195             /* Punt if we wouldn't make any real changes, i.e. keep all
4196                orig stmts + all clobbers.  */
4197             if (cnt == group->stores.length ())
4198               {
4199                 if (dump_file && (dump_flags & TDF_DETAILS))
4200                     fprintf (dump_file, "Exceeded original number of stmts (%u)."
4201                                             "  Not profitable to emit new sequence.\n",
4202                                orig_num_stmts);
4203                 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4204                     delete split_store;
4205                 return false;
4206               }
4207           }
4208     }
4209 
4210   gimple_stmt_iterator last_gsi = gsi_for_stmt (group->last_stmt);
4211   gimple_seq seq = NULL;
4212   tree last_vdef, new_vuse;
4213   last_vdef = gimple_vdef (group->last_stmt);
4214   new_vuse = gimple_vuse (group->last_stmt);
4215   tree bswap_res = NULL_TREE;
4216 
4217   /* Clobbers are not removed.  */
4218   if (gimple_clobber_p (group->last_stmt))
4219     {
4220       new_vuse = make_ssa_name (gimple_vop (cfun), group->last_stmt);
4221       gimple_set_vdef (group->last_stmt, new_vuse);
4222     }
4223 
4224   if (group->stores[0]->rhs_code == LROTATE_EXPR
4225       || group->stores[0]->rhs_code == NOP_EXPR)
4226     {
4227       tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
4228       gimple *ins_stmt = group->stores[0]->ins_stmt;
4229       struct symbolic_number *n = &group->stores[0]->n;
4230       bool bswap = group->stores[0]->rhs_code == LROTATE_EXPR;
4231 
4232       switch (n->range)
4233           {
4234           case 16:
4235             load_type = bswap_type = uint16_type_node;
4236             break;
4237           case 32:
4238             load_type = uint32_type_node;
4239             if (bswap)
4240               {
4241                 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
4242                 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
4243               }
4244             break;
4245           case 64:
4246             load_type = uint64_type_node;
4247             if (bswap)
4248               {
4249                 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
4250                 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
4251               }
4252             break;
4253           default:
4254             gcc_unreachable ();
4255           }
4256 
4257       /* If the loads have each vuse of the corresponding store,
4258            we've checked the aliasing already in try_coalesce_bswap and
4259            we want to sink the need load into seq.  So need to use new_vuse
4260            on the load.  */
4261       if (n->base_addr)
4262           {
4263             if (n->vuse == NULL)
4264               {
4265                 n->vuse = new_vuse;
4266                 ins_stmt = NULL;
4267               }
4268             else
4269               /* Update vuse in case it has changed by output_merged_stores.  */
4270               n->vuse = gimple_vuse (ins_stmt);
4271           }
4272       bswap_res = bswap_replace (gsi_start (seq), ins_stmt, fndecl,
4273                                          bswap_type, load_type, n, bswap,
4274                                          ~(uint64_t) 0);
4275       gcc_assert (bswap_res);
4276     }
4277 
4278   gimple *stmt = NULL;
4279   auto_vec<gimple *, 32> orig_stmts;
4280   gimple_seq this_seq;
4281   tree addr = force_gimple_operand_1 (unshare_expr (base_addr), &this_seq,
4282                                               is_gimple_mem_ref_addr, NULL_TREE);
4283   gimple_seq_add_seq_without_update (&seq, this_seq);
4284 
4285   tree load_addr[2] = { NULL_TREE, NULL_TREE };
4286   gimple_seq load_seq[2] = { NULL, NULL };
4287   gimple_stmt_iterator load_gsi[2] = { gsi_none (), gsi_none () };
4288   for (int j = 0; j < 2; ++j)
4289     {
4290       store_operand_info &op = group->stores[0]->ops[j];
4291       if (op.base_addr == NULL_TREE)
4292           continue;
4293 
4294       store_immediate_info *infol = group->stores.last ();
4295       if (gimple_vuse (op.stmt) == gimple_vuse (infol->ops[j].stmt))
4296           {
4297             /* We can't pick the location randomly; while we've verified
4298                all the loads have the same vuse, they can be still in different
4299                basic blocks and we need to pick the one from the last bb:
4300                int x = q[0];
4301                if (x == N) return;
4302                int y = q[1];
4303                p[0] = x;
4304                p[1] = y;
4305                otherwise if we put the wider load at the q[0] load, we might
4306                segfault if q[1] is not mapped.  */
4307             basic_block bb = gimple_bb (op.stmt);
4308             gimple *ostmt = op.stmt;
4309             store_immediate_info *info;
4310             FOR_EACH_VEC_ELT (group->stores, i, info)
4311               {
4312                 gimple *tstmt = info->ops[j].stmt;
4313                 basic_block tbb = gimple_bb (tstmt);
4314                 if (dominated_by_p (CDI_DOMINATORS, tbb, bb))
4315                     {
4316                       ostmt = tstmt;
4317                       bb = tbb;
4318                     }
4319               }
4320             load_gsi[j] = gsi_for_stmt (ostmt);
4321             load_addr[j]
4322               = force_gimple_operand_1 (unshare_expr (op.base_addr),
4323                                               &load_seq[j], is_gimple_mem_ref_addr,
4324                                               NULL_TREE);
4325           }
4326       else if (operand_equal_p (base_addr, op.base_addr, 0))
4327           load_addr[j] = addr;
4328       else
4329           {
4330             load_addr[j]
4331               = force_gimple_operand_1 (unshare_expr (op.base_addr),
4332                                               &this_seq, is_gimple_mem_ref_addr,
4333                                               NULL_TREE);
4334             gimple_seq_add_seq_without_update (&seq, this_seq);
4335           }
4336     }
4337 
4338   FOR_EACH_VEC_ELT (split_stores, i, split_store)
4339     {
4340       const unsigned HOST_WIDE_INT try_size = split_store->size;
4341       const unsigned HOST_WIDE_INT try_pos = split_store->bytepos;
4342       const unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
4343       const unsigned HOST_WIDE_INT try_align = split_store->align;
4344       const unsigned HOST_WIDE_INT try_offset = try_pos - start_byte_pos;
4345       tree dest, src;
4346       location_t loc;
4347 
4348       if (split_store->orig)
4349           {
4350             /* If there is just a single non-clobber constituent store
4351                which covers the whole area, just reuse the lhs and rhs.  */
4352             gimple *orig_stmt = NULL;
4353             store_immediate_info *store;
4354             unsigned int j;
4355             FOR_EACH_VEC_ELT (split_store->orig_stores, j, store)
4356               if (!gimple_clobber_p (store->stmt))
4357                 {
4358                     orig_stmt = store->stmt;
4359                     break;
4360                 }
4361             dest = gimple_assign_lhs (orig_stmt);
4362             src = gimple_assign_rhs1 (orig_stmt);
4363             loc = gimple_location (orig_stmt);
4364           }
4365       else
4366           {
4367             store_immediate_info *info;
4368             unsigned short clique, base;
4369             unsigned int k;
4370             FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4371               orig_stmts.safe_push (info->stmt);
4372             tree offset_type
4373               = get_alias_type_for_stmts (orig_stmts, false, &clique, &base);
4374             tree dest_type;
4375             loc = get_location_for_stmts (orig_stmts);
4376             orig_stmts.truncate (0);
4377 
4378             if (group->string_concatenation)
4379               dest_type
4380                 = build_array_type_nelts (char_type_node,
4381                                                   try_size / BITS_PER_UNIT);
4382             else
4383               {
4384                 dest_type = build_nonstandard_integer_type (try_size, UNSIGNED);
4385                 dest_type = build_aligned_type (dest_type, try_align);
4386               }
4387             dest = fold_build2 (MEM_REF, dest_type, addr,
4388                                     build_int_cst (offset_type, try_pos));
4389             if (TREE_CODE (dest) == MEM_REF)
4390               {
4391                 MR_DEPENDENCE_CLIQUE (dest) = clique;
4392                 MR_DEPENDENCE_BASE (dest) = base;
4393               }
4394 
4395             tree mask;
4396             if (bswap_res || group->string_concatenation)
4397               mask = integer_zero_node;
4398             else
4399               mask = native_interpret_expr (dest_type,
4400                                                     group->mask + try_offset,
4401                                                     group->buf_size);
4402 
4403             tree ops[2];
4404             for (int j = 0;
4405                  j < 1 + (split_store->orig_stores[0]->ops[1].val != NULL_TREE);
4406                  ++j)
4407               {
4408                 store_operand_info &op = split_store->orig_stores[0]->ops[j];
4409                 if (bswap_res)
4410                     ops[j] = bswap_res;
4411                 else if (group->string_concatenation)
4412                     {
4413                       ops[j] = build_string (try_size / BITS_PER_UNIT,
4414                                                    (const char *) group->val + try_offset);
4415                       TREE_TYPE (ops[j]) = dest_type;
4416                     }
4417                 else if (op.base_addr)
4418                     {
4419                       FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4420                         orig_stmts.safe_push (info->ops[j].stmt);
4421 
4422                       offset_type = get_alias_type_for_stmts (orig_stmts, true,
4423                                                                         &clique, &base);
4424                       location_t load_loc = get_location_for_stmts (orig_stmts);
4425                       orig_stmts.truncate (0);
4426 
4427                       unsigned HOST_WIDE_INT load_align = group->load_align[j];
4428                       unsigned HOST_WIDE_INT align_bitpos
4429                         = known_alignment (try_bitpos
4430                                                - split_store->orig_stores[0]->bitpos
4431                                                + op.bitpos);
4432                       if (align_bitpos & (load_align - 1))
4433                         load_align = least_bit_hwi (align_bitpos);
4434 
4435                       tree load_int_type
4436                         = build_nonstandard_integer_type (try_size, UNSIGNED);
4437                       load_int_type
4438                         = build_aligned_type (load_int_type, load_align);
4439 
4440                       poly_uint64 load_pos
4441                         = exact_div (try_bitpos
4442                                          - split_store->orig_stores[0]->bitpos
4443                                          + op.bitpos,
4444                                          BITS_PER_UNIT);
4445                       ops[j] = fold_build2 (MEM_REF, load_int_type, load_addr[j],
4446                                                   build_int_cst (offset_type, load_pos));
4447                       if (TREE_CODE (ops[j]) == MEM_REF)
4448                         {
4449                           MR_DEPENDENCE_CLIQUE (ops[j]) = clique;
4450                           MR_DEPENDENCE_BASE (ops[j]) = base;
4451                         }
4452                       if (!integer_zerop (mask))
4453                         {
4454                           /* The load might load some bits (that will be masked
4455                                off later on) uninitialized, avoid -W*uninitialized
4456                                warnings in that case.  */
4457                           suppress_warning (ops[j], OPT_Wuninitialized);
4458                         }
4459 
4460                       stmt = gimple_build_assign (make_ssa_name (dest_type), ops[j]);
4461                       gimple_set_location (stmt, load_loc);
4462                       if (gsi_bb (load_gsi[j]))
4463                         {
4464                           gimple_set_vuse (stmt, gimple_vuse (op.stmt));
4465                           gimple_seq_add_stmt_without_update (&load_seq[j], stmt);
4466                         }
4467                       else
4468                         {
4469                           gimple_set_vuse (stmt, new_vuse);
4470                           gimple_seq_add_stmt_without_update (&seq, stmt);
4471                         }
4472                       ops[j] = gimple_assign_lhs (stmt);
4473                       tree xor_mask;
4474                       enum tree_code inv_op
4475                         = invert_op (split_store, j, dest_type, xor_mask);
4476                       if (inv_op != NOP_EXPR)
4477                         {
4478                           stmt = gimple_build_assign (make_ssa_name (dest_type),
4479                                                               inv_op, ops[j], xor_mask);
4480                           gimple_set_location (stmt, load_loc);
4481                           ops[j] = gimple_assign_lhs (stmt);
4482 
4483                           if (gsi_bb (load_gsi[j]))
4484                               gimple_seq_add_stmt_without_update (&load_seq[j],
4485                                                                           stmt);
4486                           else
4487                               gimple_seq_add_stmt_without_update (&seq, stmt);
4488                         }
4489                     }
4490                 else
4491                     ops[j] = native_interpret_expr (dest_type,
4492                                                             group->val + try_offset,
4493                                                             group->buf_size);
4494               }
4495 
4496             switch (split_store->orig_stores[0]->rhs_code)
4497               {
4498               case BIT_AND_EXPR:
4499               case BIT_IOR_EXPR:
4500               case BIT_XOR_EXPR:
4501                 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4502                     {
4503                       tree rhs1 = gimple_assign_rhs1 (info->stmt);
4504                       orig_stmts.safe_push (SSA_NAME_DEF_STMT (rhs1));
4505                     }
4506                 location_t bit_loc;
4507                 bit_loc = get_location_for_stmts (orig_stmts);
4508                 orig_stmts.truncate (0);
4509 
4510                 stmt
4511                     = gimple_build_assign (make_ssa_name (dest_type),
4512                                                split_store->orig_stores[0]->rhs_code,
4513                                                ops[0], ops[1]);
4514                 gimple_set_location (stmt, bit_loc);
4515                 /* If there is just one load and there is a separate
4516                      load_seq[0], emit the bitwise op right after it.  */
4517                 if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
4518                     gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
4519                 /* Otherwise, if at least one load is in seq, we need to
4520                      emit the bitwise op right before the store.  If there
4521                      are two loads and are emitted somewhere else, it would
4522                      be better to emit the bitwise op as early as possible;
4523                      we don't track where that would be possible right now
4524                      though.  */
4525                 else
4526                     gimple_seq_add_stmt_without_update (&seq, stmt);
4527                 src = gimple_assign_lhs (stmt);
4528                 tree xor_mask;
4529                 enum tree_code inv_op;
4530                 inv_op = invert_op (split_store, 2, dest_type, xor_mask);
4531                 if (inv_op != NOP_EXPR)
4532                     {
4533                       stmt = gimple_build_assign (make_ssa_name (dest_type),
4534                                                         inv_op, src, xor_mask);
4535                       gimple_set_location (stmt, bit_loc);
4536                       if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
4537                         gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
4538                       else
4539                         gimple_seq_add_stmt_without_update (&seq, stmt);
4540                       src = gimple_assign_lhs (stmt);
4541                     }
4542                 break;
4543               case LROTATE_EXPR:
4544               case NOP_EXPR:
4545                 src = ops[0];
4546                 if (!is_gimple_val (src))
4547                     {
4548                       stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (src)),
4549                                                         src);
4550                       gimple_seq_add_stmt_without_update (&seq, stmt);
4551                       src = gimple_assign_lhs (stmt);
4552                     }
4553                 if (!useless_type_conversion_p (dest_type, TREE_TYPE (src)))
4554                     {
4555                       stmt = gimple_build_assign (make_ssa_name (dest_type),
4556                                                         NOP_EXPR, src);
4557                       gimple_seq_add_stmt_without_update (&seq, stmt);
4558                       src = gimple_assign_lhs (stmt);
4559                     }
4560                 inv_op = invert_op (split_store, 2, dest_type, xor_mask);
4561                 if (inv_op != NOP_EXPR)
4562                     {
4563                       stmt = gimple_build_assign (make_ssa_name (dest_type),
4564                                                         inv_op, src, xor_mask);
4565                       gimple_set_location (stmt, loc);
4566                       gimple_seq_add_stmt_without_update (&seq, stmt);
4567                       src = gimple_assign_lhs (stmt);
4568                     }
4569                 break;
4570               default:
4571                 src = ops[0];
4572                 break;
4573               }
4574 
4575             /* If bit insertion is required, we use the source as an accumulator
4576                into which the successive bit-field values are manually inserted.
4577                FIXME: perhaps use BIT_INSERT_EXPR instead in some cases?  */
4578             if (group->bit_insertion)
4579               FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4580                 if (info->rhs_code == BIT_INSERT_EXPR
4581                       && info->bitpos < try_bitpos + try_size
4582                       && info->bitpos + info->bitsize > try_bitpos)
4583                     {
4584                       /* Mask, truncate, convert to final type, shift and ior into
4585                          the accumulator.  Note that every step can be a no-op.  */
4586                       const HOST_WIDE_INT start_gap = info->bitpos - try_bitpos;
4587                       const HOST_WIDE_INT end_gap
4588                         = (try_bitpos + try_size) - (info->bitpos + info->bitsize);
4589                       tree tem = info->ops[0].val;
4590                       if (!INTEGRAL_TYPE_P (TREE_TYPE (tem)))
4591                         {
4592                           const unsigned HOST_WIDE_INT size
4593                               = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (tem)));
4594                           tree integer_type
4595                               = build_nonstandard_integer_type (size, UNSIGNED);
4596                           tem = gimple_build (&seq, loc, VIEW_CONVERT_EXPR,
4597                                                     integer_type, tem);
4598                         }
4599                       if (TYPE_PRECISION (TREE_TYPE (tem)) <= info->bitsize)
4600                         {
4601                           tree bitfield_type
4602                               = build_nonstandard_integer_type (info->bitsize,
4603                                                                         UNSIGNED);
4604                           tem = gimple_convert (&seq, loc, bitfield_type, tem);
4605                         }
4606                       else if ((BYTES_BIG_ENDIAN ? start_gap : end_gap) > 0)
4607                         {
4608                           wide_int imask
4609                               = wi::mask (info->bitsize, false,
4610                                             TYPE_PRECISION (TREE_TYPE (tem)));
4611                           tem = gimple_build (&seq, loc,
4612                                                     BIT_AND_EXPR, TREE_TYPE (tem), tem,
4613                                                     wide_int_to_tree (TREE_TYPE (tem),
4614                                                                           imask));
4615                         }
4616                       const HOST_WIDE_INT shift
4617                         = (BYTES_BIG_ENDIAN ? end_gap : start_gap);
4618                       if (shift < 0)
4619                         tem = gimple_build (&seq, loc,
4620                                                   RSHIFT_EXPR, TREE_TYPE (tem), tem,
4621                                                   build_int_cst (NULL_TREE, -shift));
4622                       tem = gimple_convert (&seq, loc, dest_type, tem);
4623                       if (shift > 0)
4624                         tem = gimple_build (&seq, loc,
4625                                                   LSHIFT_EXPR, dest_type, tem,
4626                                                   build_int_cst (NULL_TREE, shift));
4627                       src = gimple_build (&seq, loc,
4628                                               BIT_IOR_EXPR, dest_type, tem, src);
4629                     }
4630 
4631             if (!integer_zerop (mask))
4632               {
4633                 tree tem = make_ssa_name (dest_type);
4634                 tree load_src = unshare_expr (dest);
4635                 /* The load might load some or all bits uninitialized,
4636                      avoid -W*uninitialized warnings in that case.
4637                      As optimization, it would be nice if all the bits are
4638                      provably uninitialized (no stores at all yet or previous
4639                      store a CLOBBER) we'd optimize away the load and replace
4640                      it e.g. with 0.  */
4641                 suppress_warning (load_src, OPT_Wuninitialized);
4642                 stmt = gimple_build_assign (tem, load_src);
4643                 gimple_set_location (stmt, loc);
4644                 gimple_set_vuse (stmt, new_vuse);
4645                 gimple_seq_add_stmt_without_update (&seq, stmt);
4646 
4647                 /* FIXME: If there is a single chunk of zero bits in mask,
4648                      perhaps use BIT_INSERT_EXPR instead?  */
4649                 stmt = gimple_build_assign (make_ssa_name (dest_type),
4650                                                     BIT_AND_EXPR, tem, mask);
4651                 gimple_set_location (stmt, loc);
4652                 gimple_seq_add_stmt_without_update (&seq, stmt);
4653                 tem = gimple_assign_lhs (stmt);
4654 
4655                 if (TREE_CODE (src) == INTEGER_CST)
4656                     src = wide_int_to_tree (dest_type,
4657                                                   wi::bit_and_not (wi::to_wide (src),
4658                                                                        wi::to_wide (mask)));
4659                 else
4660                     {
4661                       tree nmask
4662                         = wide_int_to_tree (dest_type,
4663                                                   wi::bit_not (wi::to_wide (mask)));
4664                       stmt = gimple_build_assign (make_ssa_name (dest_type),
4665                                                         BIT_AND_EXPR, src, nmask);
4666                       gimple_set_location (stmt, loc);
4667                       gimple_seq_add_stmt_without_update (&seq, stmt);
4668                       src = gimple_assign_lhs (stmt);
4669                     }
4670                 stmt = gimple_build_assign (make_ssa_name (dest_type),
4671                                                     BIT_IOR_EXPR, tem, src);
4672                 gimple_set_location (stmt, loc);
4673                 gimple_seq_add_stmt_without_update (&seq, stmt);
4674                 src = gimple_assign_lhs (stmt);
4675               }
4676           }
4677 
4678       stmt = gimple_build_assign (dest, src);
4679       gimple_set_location (stmt, loc);
4680       gimple_set_vuse (stmt, new_vuse);
4681       gimple_seq_add_stmt_without_update (&seq, stmt);
4682 
4683       if (group->lp_nr && stmt_could_throw_p (cfun, stmt))
4684           add_stmt_to_eh_lp (stmt, group->lp_nr);
4685 
4686       tree new_vdef;
4687       if (i < split_stores.length () - 1)
4688           new_vdef = make_ssa_name (gimple_vop (cfun), stmt);
4689       else
4690           new_vdef = last_vdef;
4691 
4692       gimple_set_vdef (stmt, new_vdef);
4693       SSA_NAME_DEF_STMT (new_vdef) = stmt;
4694       new_vuse = new_vdef;
4695     }
4696 
4697   FOR_EACH_VEC_ELT (split_stores, i, split_store)
4698     delete split_store;
4699 
4700   gcc_assert (seq);
4701   if (dump_file)
4702     {
4703       fprintf (dump_file,
4704                  "New sequence of %u stores to replace old one of %u stores\n",
4705                  split_stores.length (), orig_num_stmts);
4706       if (dump_flags & TDF_DETAILS)
4707           print_gimple_seq (dump_file, seq, 0, TDF_VOPS | TDF_MEMSYMS);
4708     }
4709 
4710   if (gimple_clobber_p (group->last_stmt))
4711     update_stmt (group->last_stmt);
4712 
4713   if (group->lp_nr > 0)
4714     {
4715       /* We're going to insert a sequence of (potentially) throwing stores
4716            into an active EH region.  This means that we're going to create
4717            new basic blocks with EH edges pointing to the post landing pad
4718            and, therefore, to have to update its PHI nodes, if any.  For the
4719            virtual PHI node, we're going to use the VDEFs created above, but
4720            for the other nodes, we need to record the original reaching defs.  */
4721       eh_landing_pad lp = get_eh_landing_pad_from_number (group->lp_nr);
4722       basic_block lp_bb = label_to_block (cfun, lp->post_landing_pad);
4723       basic_block last_bb = gimple_bb (group->last_stmt);
4724       edge last_edge = find_edge (last_bb, lp_bb);
4725       auto_vec<tree, 16> last_defs;
4726       gphi_iterator gpi;
4727       for (gpi = gsi_start_phis (lp_bb); !gsi_end_p (gpi); gsi_next (&gpi))
4728           {
4729             gphi *phi = gpi.phi ();
4730             tree last_def;
4731             if (virtual_operand_p (gimple_phi_result (phi)))
4732               last_def = NULL_TREE;
4733             else
4734               last_def = gimple_phi_arg_def (phi, last_edge->dest_idx);
4735             last_defs.safe_push (last_def);
4736           }
4737 
4738       /* Do the insertion.  Then, if new basic blocks have been created in the
4739            process, rewind the chain of VDEFs create above to walk the new basic
4740            blocks and update the corresponding arguments of the PHI nodes.  */
4741       update_modified_stmts (seq);
4742       if (gimple_find_sub_bbs (seq, &last_gsi))
4743           while (last_vdef != gimple_vuse (group->last_stmt))
4744             {
4745               gimple *stmt = SSA_NAME_DEF_STMT (last_vdef);
4746               if (stmt_could_throw_p (cfun, stmt))
4747                 {
4748                     edge new_edge = find_edge (gimple_bb (stmt), lp_bb);
4749                     unsigned int i;
4750                     for (gpi = gsi_start_phis (lp_bb), i = 0;
4751                          !gsi_end_p (gpi);
4752                          gsi_next (&gpi), i++)
4753                       {
4754                         gphi *phi = gpi.phi ();
4755                         tree new_def;
4756                         if (virtual_operand_p (gimple_phi_result (phi)))
4757                           new_def = last_vdef;
4758                         else
4759                           new_def = last_defs[i];
4760                         add_phi_arg (phi, new_def, new_edge, UNKNOWN_LOCATION);
4761                       }
4762                 }
4763               last_vdef = gimple_vuse (stmt);
4764             }
4765     }
4766   else
4767     gsi_insert_seq_after (&last_gsi, seq, GSI_SAME_STMT);
4768 
4769   for (int j = 0; j < 2; ++j)
4770     if (load_seq[j])
4771       gsi_insert_seq_after (&load_gsi[j], load_seq[j], GSI_SAME_STMT);
4772 
4773   return true;
4774 }
4775 
4776 /* Process the merged_store_group objects created in the coalescing phase.
4777    The stores are all against the base object BASE.
4778    Try to output the widened stores and delete the original statements if
4779    successful.  Return true iff any changes were made.  */
4780 
4781 bool
output_merged_stores()4782 imm_store_chain_info::output_merged_stores ()
4783 {
4784   unsigned int i;
4785   merged_store_group *merged_store;
4786   bool ret = false;
4787   FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_store)
4788     {
4789       if (dbg_cnt (store_merging)
4790             && output_merged_store (merged_store))
4791           {
4792             unsigned int j;
4793             store_immediate_info *store;
4794             FOR_EACH_VEC_ELT (merged_store->stores, j, store)
4795               {
4796                 gimple *stmt = store->stmt;
4797                 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4798                 /* Don't remove clobbers, they are still useful even if
4799                      everything is overwritten afterwards.  */
4800                 if (gimple_clobber_p (stmt))
4801                     continue;
4802                 gsi_remove (&gsi, true);
4803                 if (store->lp_nr)
4804                     remove_stmt_from_eh_lp (stmt);
4805                 if (stmt != merged_store->last_stmt)
4806                     {
4807                       unlink_stmt_vdef (stmt);
4808                       release_defs (stmt);
4809                     }
4810               }
4811             ret = true;
4812           }
4813     }
4814   if (ret && dump_file)
4815     fprintf (dump_file, "Merging successful!\n");
4816 
4817   return ret;
4818 }
4819 
4820 /* Coalesce the store_immediate_info objects recorded against the base object
4821    BASE in the first phase and output them.
4822    Delete the allocated structures.
4823    Return true if any changes were made.  */
4824 
4825 bool
terminate_and_process_chain()4826 imm_store_chain_info::terminate_and_process_chain ()
4827 {
4828   if (dump_file && (dump_flags & TDF_DETAILS))
4829     fprintf (dump_file, "Terminating chain with %u stores\n",
4830                m_store_info.length ());
4831   /* Process store chain.  */
4832   bool ret = false;
4833   if (m_store_info.length () > 1)
4834     {
4835       ret = coalesce_immediate_stores ();
4836       if (ret)
4837           ret = output_merged_stores ();
4838     }
4839 
4840   /* Delete all the entries we allocated ourselves.  */
4841   store_immediate_info *info;
4842   unsigned int i;
4843   FOR_EACH_VEC_ELT (m_store_info, i, info)
4844     delete info;
4845 
4846   merged_store_group *merged_info;
4847   FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_info)
4848     delete merged_info;
4849 
4850   return ret;
4851 }
4852 
4853 /* Return true iff LHS is a destination potentially interesting for
4854    store merging.  In practice these are the codes that get_inner_reference
4855    can process.  */
4856 
4857 static bool
lhs_valid_for_store_merging_p(tree lhs)4858 lhs_valid_for_store_merging_p (tree lhs)
4859 {
4860   if (DECL_P (lhs))
4861     return true;
4862 
4863   switch (TREE_CODE (lhs))
4864     {
4865     case ARRAY_REF:
4866     case ARRAY_RANGE_REF:
4867     case BIT_FIELD_REF:
4868     case COMPONENT_REF:
4869     case MEM_REF:
4870     case VIEW_CONVERT_EXPR:
4871       return true;
4872     default:
4873       return false;
4874     }
4875 }
4876 
4877 /* Return true if the tree RHS is a constant we want to consider
4878    during store merging.  In practice accept all codes that
4879    native_encode_expr accepts.  */
4880 
4881 static bool
rhs_valid_for_store_merging_p(tree rhs)4882 rhs_valid_for_store_merging_p (tree rhs)
4883 {
4884   unsigned HOST_WIDE_INT size;
4885   if (TREE_CODE (rhs) == CONSTRUCTOR
4886       && CONSTRUCTOR_NELTS (rhs) == 0
4887       && TYPE_SIZE_UNIT (TREE_TYPE (rhs))
4888       && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (rhs))))
4889     return true;
4890   return (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (rhs))).is_constant (&size)
4891             && native_encode_expr (rhs, NULL, size) != 0);
4892 }
4893 
4894 /* Adjust *PBITPOS, *PBITREGION_START and *PBITREGION_END by BYTE_OFF bytes
4895    and return true on success or false on failure.  */
4896 
4897 static bool
adjust_bit_pos(poly_offset_int byte_off,poly_int64 * pbitpos,poly_uint64 * pbitregion_start,poly_uint64 * pbitregion_end)4898 adjust_bit_pos (poly_offset_int byte_off,
4899                     poly_int64  *pbitpos,
4900                     poly_uint64 *pbitregion_start,
4901                     poly_uint64 *pbitregion_end)
4902 {
4903   poly_offset_int bit_off = byte_off << LOG2_BITS_PER_UNIT;
4904   bit_off += *pbitpos;
4905 
4906   if (known_ge (bit_off, 0) && bit_off.to_shwi (pbitpos))
4907     {
4908       if (maybe_ne (*pbitregion_end, 0U))
4909           {
4910             bit_off = byte_off << LOG2_BITS_PER_UNIT;
4911             bit_off += *pbitregion_start;
4912             if (bit_off.to_uhwi (pbitregion_start))
4913               {
4914                 bit_off = byte_off << LOG2_BITS_PER_UNIT;
4915                 bit_off += *pbitregion_end;
4916                 if (!bit_off.to_uhwi (pbitregion_end))
4917                     *pbitregion_end = 0;
4918               }
4919             else
4920               *pbitregion_end = 0;
4921           }
4922       return true;
4923     }
4924   else
4925     return false;
4926 }
4927 
4928 /* If MEM is a memory reference usable for store merging (either as
4929    store destination or for loads), return the non-NULL base_addr
4930    and set *PBITSIZE, *PBITPOS, *PBITREGION_START and *PBITREGION_END.
4931    Otherwise return NULL, *PBITPOS should be still valid even for that
4932    case.  */
4933 
4934 static tree
mem_valid_for_store_merging(tree mem,poly_uint64 * pbitsize,poly_uint64 * pbitpos,poly_uint64 * pbitregion_start,poly_uint64 * pbitregion_end)4935 mem_valid_for_store_merging (tree mem, poly_uint64 *pbitsize,
4936                                    poly_uint64 *pbitpos,
4937                                    poly_uint64 *pbitregion_start,
4938                                    poly_uint64 *pbitregion_end)
4939 {
4940   poly_int64 bitsize, bitpos;
4941   poly_uint64 bitregion_start = 0, bitregion_end = 0;
4942   machine_mode mode;
4943   int unsignedp = 0, reversep = 0, volatilep = 0;
4944   tree offset;
4945   tree base_addr = get_inner_reference (mem, &bitsize, &bitpos, &offset, &mode,
4946                                                   &unsignedp, &reversep, &volatilep);
4947   *pbitsize = bitsize;
4948   if (known_le (bitsize, 0))
4949     return NULL_TREE;
4950 
4951   if (TREE_CODE (mem) == COMPONENT_REF
4952       && DECL_BIT_FIELD_TYPE (TREE_OPERAND (mem, 1)))
4953     {
4954       get_bit_range (&bitregion_start, &bitregion_end, mem, &bitpos, &offset);
4955       if (maybe_ne (bitregion_end, 0U))
4956           bitregion_end += 1;
4957     }
4958 
4959   if (reversep)
4960     return NULL_TREE;
4961 
4962   /* We do not want to rewrite TARGET_MEM_REFs.  */
4963   if (TREE_CODE (base_addr) == TARGET_MEM_REF)
4964     return NULL_TREE;
4965   /* In some cases get_inner_reference may return a
4966      MEM_REF [ptr + byteoffset].  For the purposes of this pass
4967      canonicalize the base_addr to MEM_REF [ptr] and take
4968      byteoffset into account in the bitpos.  This occurs in
4969      PR 23684 and this way we can catch more chains.  */
4970   else if (TREE_CODE (base_addr) == MEM_REF)
4971     {
4972       if (!adjust_bit_pos (mem_ref_offset (base_addr), &bitpos,
4973                                  &bitregion_start, &bitregion_end))
4974           return NULL_TREE;
4975       base_addr = TREE_OPERAND (base_addr, 0);
4976     }
4977   /* get_inner_reference returns the base object, get at its
4978      address now.  */
4979   else
4980     {
4981       if (maybe_lt (bitpos, 0))
4982           return NULL_TREE;
4983       base_addr = build_fold_addr_expr (base_addr);
4984     }
4985 
4986   if (offset)
4987     {
4988       /* If the access is variable offset then a base decl has to be
4989            address-taken to be able to emit pointer-based stores to it.
4990            ???  We might be able to get away with re-using the original
4991            base up to the first variable part and then wrapping that inside
4992            a BIT_FIELD_REF.  */
4993       tree base = get_base_address (base_addr);
4994       if (!base || (DECL_P (base) && !TREE_ADDRESSABLE (base)))
4995           return NULL_TREE;
4996 
4997       /* Similarly to above for the base, remove constant from the offset.  */
4998       if (TREE_CODE (offset) == PLUS_EXPR
4999             && TREE_CODE (TREE_OPERAND (offset, 1)) == INTEGER_CST
5000             && adjust_bit_pos (wi::to_poly_offset (TREE_OPERAND (offset, 1)),
5001                                    &bitpos, &bitregion_start, &bitregion_end))
5002           offset = TREE_OPERAND (offset, 0);
5003 
5004       base_addr = build2 (POINTER_PLUS_EXPR, TREE_TYPE (base_addr),
5005                                 base_addr, offset);
5006     }
5007 
5008   if (known_eq (bitregion_end, 0U))
5009     {
5010       bitregion_start = round_down_to_byte_boundary (bitpos);
5011       bitregion_end = round_up_to_byte_boundary (bitpos + bitsize);
5012     }
5013 
5014   *pbitsize = bitsize;
5015   *pbitpos = bitpos;
5016   *pbitregion_start = bitregion_start;
5017   *pbitregion_end = bitregion_end;
5018   return base_addr;
5019 }
5020 
5021 /* Return true if STMT is a load that can be used for store merging.
5022    In that case fill in *OP.  BITSIZE, BITPOS, BITREGION_START and
5023    BITREGION_END are properties of the corresponding store.  */
5024 
5025 static bool
handled_load(gimple * stmt,store_operand_info * op,poly_uint64 bitsize,poly_uint64 bitpos,poly_uint64 bitregion_start,poly_uint64 bitregion_end)5026 handled_load (gimple *stmt, store_operand_info *op,
5027                 poly_uint64 bitsize, poly_uint64 bitpos,
5028                 poly_uint64 bitregion_start, poly_uint64 bitregion_end)
5029 {
5030   if (!is_gimple_assign (stmt))
5031     return false;
5032   if (gimple_assign_rhs_code (stmt) == BIT_NOT_EXPR)
5033     {
5034       tree rhs1 = gimple_assign_rhs1 (stmt);
5035       if (TREE_CODE (rhs1) == SSA_NAME
5036             && handled_load (SSA_NAME_DEF_STMT (rhs1), op, bitsize, bitpos,
5037                                  bitregion_start, bitregion_end))
5038           {
5039             /* Don't allow _1 = load; _2 = ~1; _3 = ~_2; which should have
5040                been optimized earlier, but if allowed here, would confuse the
5041                multiple uses counting.  */
5042             if (op->bit_not_p)
5043               return false;
5044             op->bit_not_p = !op->bit_not_p;
5045             return true;
5046           }
5047       return false;
5048     }
5049   if (gimple_vuse (stmt)
5050       && gimple_assign_load_p (stmt)
5051       && !stmt_can_throw_internal (cfun, stmt)
5052       && !gimple_has_volatile_ops (stmt))
5053     {
5054       tree mem = gimple_assign_rhs1 (stmt);
5055       op->base_addr
5056           = mem_valid_for_store_merging (mem, &op->bitsize, &op->bitpos,
5057                                                &op->bitregion_start,
5058                                                &op->bitregion_end);
5059       if (op->base_addr != NULL_TREE
5060             && known_eq (op->bitsize, bitsize)
5061             && multiple_p (op->bitpos - bitpos, BITS_PER_UNIT)
5062             && known_ge (op->bitpos - op->bitregion_start,
5063                            bitpos - bitregion_start)
5064             && known_ge (op->bitregion_end - op->bitpos,
5065                            bitregion_end - bitpos))
5066           {
5067             op->stmt = stmt;
5068             op->val = mem;
5069             op->bit_not_p = false;
5070             return true;
5071           }
5072     }
5073   return false;
5074 }
5075 
5076 /* Return the index number of the landing pad for STMT, if any.  */
5077 
5078 static int
lp_nr_for_store(gimple * stmt)5079 lp_nr_for_store (gimple *stmt)
5080 {
5081   if (!cfun->can_throw_non_call_exceptions || !cfun->eh)
5082     return 0;
5083 
5084   if (!stmt_could_throw_p (cfun, stmt))
5085     return 0;
5086 
5087   return lookup_stmt_eh_lp (stmt);
5088 }
5089 
5090 /* Record the store STMT for store merging optimization if it can be
5091    optimized.  Return true if any changes were made.  */
5092 
5093 bool
process_store(gimple * stmt)5094 pass_store_merging::process_store (gimple *stmt)
5095 {
5096   tree lhs = gimple_assign_lhs (stmt);
5097   tree rhs = gimple_assign_rhs1 (stmt);
5098   poly_uint64 bitsize, bitpos = 0;
5099   poly_uint64 bitregion_start = 0, bitregion_end = 0;
5100   tree base_addr
5101     = mem_valid_for_store_merging (lhs, &bitsize, &bitpos,
5102                                            &bitregion_start, &bitregion_end);
5103   if (known_eq (bitsize, 0U))
5104     return false;
5105 
5106   bool invalid = (base_addr == NULL_TREE
5107                       || (maybe_gt (bitsize,
5108                                         (unsigned int) MAX_BITSIZE_MODE_ANY_INT)
5109                           && TREE_CODE (rhs) != INTEGER_CST
5110                           && (TREE_CODE (rhs) != CONSTRUCTOR
5111                                 || CONSTRUCTOR_NELTS (rhs) != 0)));
5112   enum tree_code rhs_code = ERROR_MARK;
5113   bool bit_not_p = false;
5114   struct symbolic_number n;
5115   gimple *ins_stmt = NULL;
5116   store_operand_info ops[2];
5117   if (invalid)
5118     ;
5119   else if (TREE_CODE (rhs) == STRING_CST)
5120     {
5121       rhs_code = STRING_CST;
5122       ops[0].val = rhs;
5123     }
5124   else if (rhs_valid_for_store_merging_p (rhs))
5125     {
5126       rhs_code = INTEGER_CST;
5127       ops[0].val = rhs;
5128     }
5129   else if (TREE_CODE (rhs) == SSA_NAME)
5130     {
5131       gimple *def_stmt = SSA_NAME_DEF_STMT (rhs), *def_stmt1, *def_stmt2;
5132       if (!is_gimple_assign (def_stmt))
5133           invalid = true;
5134       else if (handled_load (def_stmt, &ops[0], bitsize, bitpos,
5135                                    bitregion_start, bitregion_end))
5136           rhs_code = MEM_REF;
5137       else if (gimple_assign_rhs_code (def_stmt) == BIT_NOT_EXPR)
5138           {
5139             tree rhs1 = gimple_assign_rhs1 (def_stmt);
5140             if (TREE_CODE (rhs1) == SSA_NAME
5141                 && is_gimple_assign (SSA_NAME_DEF_STMT (rhs1)))
5142               {
5143                 bit_not_p = true;
5144                 def_stmt = SSA_NAME_DEF_STMT (rhs1);
5145               }
5146           }
5147 
5148       if (rhs_code == ERROR_MARK && !invalid)
5149           switch ((rhs_code = gimple_assign_rhs_code (def_stmt)))
5150             {
5151             case BIT_AND_EXPR:
5152             case BIT_IOR_EXPR:
5153             case BIT_XOR_EXPR:
5154               tree rhs1, rhs2;
5155               rhs1 = gimple_assign_rhs1 (def_stmt);
5156               rhs2 = gimple_assign_rhs2 (def_stmt);
5157               invalid = true;
5158               if (TREE_CODE (rhs1) != SSA_NAME)
5159                 break;
5160               def_stmt1 = SSA_NAME_DEF_STMT (rhs1);
5161               if (!is_gimple_assign (def_stmt1)
5162                     || !handled_load (def_stmt1, &ops[0], bitsize, bitpos,
5163                                           bitregion_start, bitregion_end))
5164                 break;
5165               if (rhs_valid_for_store_merging_p (rhs2))
5166                 ops[1].val = rhs2;
5167               else if (TREE_CODE (rhs2) != SSA_NAME)
5168                 break;
5169               else
5170                 {
5171                     def_stmt2 = SSA_NAME_DEF_STMT (rhs2);
5172                     if (!is_gimple_assign (def_stmt2))
5173                       break;
5174                     else if (!handled_load (def_stmt2, &ops[1], bitsize, bitpos,
5175                                                   bitregion_start, bitregion_end))
5176                       break;
5177                 }
5178               invalid = false;
5179               break;
5180             default:
5181               invalid = true;
5182               break;
5183             }
5184 
5185       unsigned HOST_WIDE_INT const_bitsize;
5186       if (bitsize.is_constant (&const_bitsize)
5187             && (const_bitsize % BITS_PER_UNIT) == 0
5188             && const_bitsize <= 64
5189             && multiple_p (bitpos, BITS_PER_UNIT))
5190           {
5191             ins_stmt = find_bswap_or_nop_1 (def_stmt, &n, 12);
5192             if (ins_stmt)
5193               {
5194                 uint64_t nn = n.n;
5195                 for (unsigned HOST_WIDE_INT i = 0;
5196                        i < const_bitsize;
5197                        i += BITS_PER_UNIT, nn >>= BITS_PER_MARKER)
5198                     if ((nn & MARKER_MASK) == 0
5199                         || (nn & MARKER_MASK) == MARKER_BYTE_UNKNOWN)
5200                       {
5201                         ins_stmt = NULL;
5202                         break;
5203                       }
5204                 if (ins_stmt)
5205                     {
5206                       if (invalid)
5207                         {
5208                           rhs_code = LROTATE_EXPR;
5209                           ops[0].base_addr = NULL_TREE;
5210                           ops[1].base_addr = NULL_TREE;
5211                         }
5212                       invalid = false;
5213                     }
5214               }
5215           }
5216 
5217       if (invalid
5218             && bitsize.is_constant (&const_bitsize)
5219             && ((const_bitsize % BITS_PER_UNIT) != 0
5220                 || !multiple_p (bitpos, BITS_PER_UNIT))
5221             && const_bitsize <= MAX_FIXED_MODE_SIZE)
5222           {
5223             /* Bypass a conversion to the bit-field type.  */
5224             if (!bit_not_p
5225                 && is_gimple_assign (def_stmt)
5226                 && CONVERT_EXPR_CODE_P (rhs_code))
5227               {
5228                 tree rhs1 = gimple_assign_rhs1 (def_stmt);
5229                 if (TREE_CODE (rhs1) == SSA_NAME
5230                       && INTEGRAL_TYPE_P (TREE_TYPE (rhs1)))
5231                     rhs = rhs1;
5232               }
5233             rhs_code = BIT_INSERT_EXPR;
5234             bit_not_p = false;
5235             ops[0].val = rhs;
5236             ops[0].base_addr = NULL_TREE;
5237             ops[1].base_addr = NULL_TREE;
5238             invalid = false;
5239           }
5240     }
5241   else
5242     invalid = true;
5243 
5244   unsigned HOST_WIDE_INT const_bitsize, const_bitpos;
5245   unsigned HOST_WIDE_INT const_bitregion_start, const_bitregion_end;
5246   if (invalid
5247       || !bitsize.is_constant (&const_bitsize)
5248       || !bitpos.is_constant (&const_bitpos)
5249       || !bitregion_start.is_constant (&const_bitregion_start)
5250       || !bitregion_end.is_constant (&const_bitregion_end))
5251     return terminate_all_aliasing_chains (NULL, stmt);
5252 
5253   if (!ins_stmt)
5254     memset (&n, 0, sizeof (n));
5255 
5256   class imm_store_chain_info **chain_info = NULL;
5257   bool ret = false;
5258   if (base_addr)
5259     chain_info = m_stores.get (base_addr);
5260 
5261   store_immediate_info *info;
5262   if (chain_info)
5263     {
5264       unsigned int ord = (*chain_info)->m_store_info.length ();
5265       info = new store_immediate_info (const_bitsize, const_bitpos,
5266                                                const_bitregion_start,
5267                                                const_bitregion_end,
5268                                                stmt, ord, rhs_code, n, ins_stmt,
5269                                                bit_not_p, lp_nr_for_store (stmt),
5270                                                ops[0], ops[1]);
5271       if (dump_file && (dump_flags & TDF_DETAILS))
5272           {
5273             fprintf (dump_file, "Recording immediate store from stmt:\n");
5274             print_gimple_stmt (dump_file, stmt, 0);
5275           }
5276       (*chain_info)->m_store_info.safe_push (info);
5277       m_n_stores++;
5278       ret |= terminate_all_aliasing_chains (chain_info, stmt);
5279       /* If we reach the limit of stores to merge in a chain terminate and
5280            process the chain now.  */
5281       if ((*chain_info)->m_store_info.length ()
5282             == (unsigned int) param_max_stores_to_merge)
5283           {
5284             if (dump_file && (dump_flags & TDF_DETAILS))
5285               fprintf (dump_file,
5286                          "Reached maximum number of statements to merge:\n");
5287             ret |= terminate_and_process_chain (*chain_info);
5288           }
5289     }
5290   else
5291     {
5292       /* Store aliases any existing chain?  */
5293       ret |= terminate_all_aliasing_chains (NULL, stmt);
5294 
5295       /* Start a new chain.  */
5296       class imm_store_chain_info *new_chain
5297             = new imm_store_chain_info (m_stores_head, base_addr);
5298       info = new store_immediate_info (const_bitsize, const_bitpos,
5299                                                const_bitregion_start,
5300                                                const_bitregion_end,
5301                                                stmt, 0, rhs_code, n, ins_stmt,
5302                                                bit_not_p, lp_nr_for_store (stmt),
5303                                                ops[0], ops[1]);
5304       new_chain->m_store_info.safe_push (info);
5305       m_n_stores++;
5306       m_stores.put (base_addr, new_chain);
5307       m_n_chains++;
5308       if (dump_file && (dump_flags & TDF_DETAILS))
5309           {
5310             fprintf (dump_file, "Starting active chain number %u with statement:\n",
5311                        m_n_chains);
5312             print_gimple_stmt (dump_file, stmt, 0);
5313             fprintf (dump_file, "The base object is:\n");
5314             print_generic_expr (dump_file, base_addr);
5315             fprintf (dump_file, "\n");
5316           }
5317     }
5318 
5319   /* Prune oldest chains so that after adding the chain or store above
5320      we're again within the limits set by the params.  */
5321   if (m_n_chains > (unsigned)param_max_store_chains_to_track
5322       || m_n_stores > (unsigned)param_max_stores_to_track)
5323     {
5324       if (dump_file && (dump_flags & TDF_DETAILS))
5325           fprintf (dump_file, "Too many chains (%u > %d) or stores (%u > %d), "
5326                      "terminating oldest chain(s).\n", m_n_chains,
5327                      param_max_store_chains_to_track, m_n_stores,
5328                      param_max_stores_to_track);
5329       imm_store_chain_info **e = &m_stores_head;
5330       unsigned idx = 0;
5331       unsigned n_stores = 0;
5332       while (*e)
5333           {
5334             if (idx >= (unsigned)param_max_store_chains_to_track
5335                 || (n_stores + (*e)->m_store_info.length ()
5336                       > (unsigned)param_max_stores_to_track))
5337               ret |= terminate_and_process_chain (*e);
5338             else
5339               {
5340                 n_stores += (*e)->m_store_info.length ();
5341                 e = &(*e)->next;
5342                 ++idx;
5343               }
5344           }
5345     }
5346 
5347   return ret;
5348 }
5349 
5350 /* Return true if STMT is a store valid for store merging.  */
5351 
5352 static bool
store_valid_for_store_merging_p(gimple * stmt)5353 store_valid_for_store_merging_p (gimple *stmt)
5354 {
5355   return gimple_assign_single_p (stmt)
5356            && gimple_vdef (stmt)
5357            && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt))
5358            && (!gimple_has_volatile_ops (stmt) || gimple_clobber_p (stmt));
5359 }
5360 
5361 enum basic_block_status { BB_INVALID, BB_VALID, BB_EXTENDED_VALID };
5362 
5363 /* Return the status of basic block BB wrt store merging.  */
5364 
5365 static enum basic_block_status
get_status_for_store_merging(basic_block bb)5366 get_status_for_store_merging (basic_block bb)
5367 {
5368   unsigned int num_statements = 0;
5369   unsigned int num_constructors = 0;
5370   gimple_stmt_iterator gsi;
5371   edge e;
5372   gimple *last_stmt = NULL;
5373 
5374   for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5375     {
5376       gimple *stmt = gsi_stmt (gsi);
5377 
5378       if (is_gimple_debug (stmt))
5379           continue;
5380 
5381       last_stmt = stmt;
5382 
5383       if (store_valid_for_store_merging_p (stmt) && ++num_statements >= 2)
5384           break;
5385 
5386       if (is_gimple_assign (stmt)
5387             && gimple_assign_rhs_code (stmt) == CONSTRUCTOR)
5388           {
5389             tree rhs = gimple_assign_rhs1 (stmt);
5390             if (VECTOR_TYPE_P (TREE_TYPE (rhs))
5391                 && INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs)))
5392                 && gimple_assign_lhs (stmt) != NULL_TREE)
5393               {
5394                 HOST_WIDE_INT sz
5395                     = int_size_in_bytes (TREE_TYPE (rhs)) * BITS_PER_UNIT;
5396                 if (sz == 16 || sz == 32 || sz == 64)
5397                     {
5398                       num_constructors = 1;
5399                       break;
5400                     }
5401               }
5402           }
5403     }
5404 
5405   if (num_statements == 0 && num_constructors == 0)
5406     return BB_INVALID;
5407 
5408   if (cfun->can_throw_non_call_exceptions && cfun->eh
5409       && store_valid_for_store_merging_p (last_stmt)
5410       && (e = find_fallthru_edge (bb->succs))
5411       && e->dest == bb->next_bb)
5412     return BB_EXTENDED_VALID;
5413 
5414   return (num_statements >= 2 || num_constructors) ? BB_VALID : BB_INVALID;
5415 }
5416 
5417 /* Entry point for the pass.  Go over each basic block recording chains of
5418    immediate stores.  Upon encountering a terminating statement (as defined
5419    by stmt_terminates_chain_p) process the recorded stores and emit the widened
5420    variants.  */
5421 
5422 unsigned int
execute(function * fun)5423 pass_store_merging::execute (function *fun)
5424 {
5425   basic_block bb;
5426   hash_set<gimple *> orig_stmts;
5427   bool changed = false, open_chains = false;
5428 
5429   /* If the function can throw and catch non-call exceptions, we'll be trying
5430      to merge stores across different basic blocks so we need to first unsplit
5431      the EH edges in order to streamline the CFG of the function.  */
5432   if (cfun->can_throw_non_call_exceptions && cfun->eh)
5433     unsplit_eh_edges ();
5434 
5435   calculate_dominance_info (CDI_DOMINATORS);
5436 
5437   FOR_EACH_BB_FN (bb, fun)
5438     {
5439       const basic_block_status bb_status = get_status_for_store_merging (bb);
5440       gimple_stmt_iterator gsi;
5441 
5442       if (open_chains && (bb_status == BB_INVALID || !single_pred_p (bb)))
5443           {
5444             changed |= terminate_and_process_all_chains ();
5445             open_chains = false;
5446           }
5447 
5448       if (bb_status == BB_INVALID)
5449           continue;
5450 
5451       if (dump_file && (dump_flags & TDF_DETAILS))
5452           fprintf (dump_file, "Processing basic block <%d>:\n", bb->index);
5453 
5454       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); )
5455           {
5456             gimple *stmt = gsi_stmt (gsi);
5457             gsi_next (&gsi);
5458 
5459             if (is_gimple_debug (stmt))
5460               continue;
5461 
5462             if (gimple_has_volatile_ops (stmt) && !gimple_clobber_p (stmt))
5463               {
5464                 /* Terminate all chains.  */
5465                 if (dump_file && (dump_flags & TDF_DETAILS))
5466                     fprintf (dump_file, "Volatile access terminates "
5467                                             "all chains\n");
5468                 changed |= terminate_and_process_all_chains ();
5469                 open_chains = false;
5470                 continue;
5471               }
5472 
5473             if (is_gimple_assign (stmt)
5474                 && gimple_assign_rhs_code (stmt) == CONSTRUCTOR
5475                 && maybe_optimize_vector_constructor (stmt))
5476               continue;
5477 
5478             if (store_valid_for_store_merging_p (stmt))
5479               changed |= process_store (stmt);
5480             else
5481               changed |= terminate_all_aliasing_chains (NULL, stmt);
5482           }
5483 
5484       if (bb_status == BB_EXTENDED_VALID)
5485           open_chains = true;
5486       else
5487           {
5488             changed |= terminate_and_process_all_chains ();
5489             open_chains = false;
5490           }
5491     }
5492 
5493   if (open_chains)
5494     changed |= terminate_and_process_all_chains ();
5495 
5496   /* If the function can throw and catch non-call exceptions and something
5497      changed during the pass, then the CFG has (very likely) changed too.  */
5498   if (cfun->can_throw_non_call_exceptions && cfun->eh && changed)
5499     {
5500       free_dominance_info (CDI_DOMINATORS);
5501       return TODO_cleanup_cfg;
5502     }
5503 
5504   return 0;
5505 }
5506 
5507 } // anon namespace
5508 
5509 /* Construct and return a store merging pass object.  */
5510 
5511 gimple_opt_pass *
make_pass_store_merging(gcc::context * ctxt)5512 make_pass_store_merging (gcc::context *ctxt)
5513 {
5514   return new pass_store_merging (ctxt);
5515 }
5516 
5517 #if CHECKING_P
5518 
5519 namespace selftest {
5520 
5521 /* Selftests for store merging helpers.  */
5522 
5523 /* Assert that all elements of the byte arrays X and Y, both of length N
5524    are equal.  */
5525 
5526 static void
verify_array_eq(unsigned char * x,unsigned char * y,unsigned int n)5527 verify_array_eq (unsigned char *x, unsigned char *y, unsigned int n)
5528 {
5529   for (unsigned int i = 0; i < n; i++)
5530     {
5531       if (x[i] != y[i])
5532           {
5533             fprintf (stderr, "Arrays do not match.  X:\n");
5534             dump_char_array (stderr, x, n);
5535             fprintf (stderr, "Y:\n");
5536             dump_char_array (stderr, y, n);
5537           }
5538       ASSERT_EQ (x[i], y[i]);
5539     }
5540 }
5541 
5542 /* Test shift_bytes_in_array_left and that it carries bits across between
5543    bytes correctly.  */
5544 
5545 static void
verify_shift_bytes_in_array_left(void)5546 verify_shift_bytes_in_array_left (void)
5547 {
5548    /* byte 1   | byte 0
5549       00011111 | 11100000.  */
5550   unsigned char orig[2] = { 0xe0, 0x1f };
5551   unsigned char in[2];
5552   memcpy (in, orig, sizeof orig);
5553 
5554   unsigned char expected[2] = { 0x80, 0x7f };
5555   shift_bytes_in_array_left (in, sizeof (in), 2);
5556   verify_array_eq (in, expected, sizeof (in));
5557 
5558   memcpy (in, orig, sizeof orig);
5559   memcpy (expected, orig, sizeof orig);
5560   /* Check that shifting by zero doesn't change anything.  */
5561   shift_bytes_in_array_left (in, sizeof (in), 0);
5562   verify_array_eq (in, expected, sizeof (in));
5563 
5564 }
5565 
5566 /* Test shift_bytes_in_array_right and that it carries bits across between
5567    bytes correctly.  */
5568 
5569 static void
verify_shift_bytes_in_array_right(void)5570 verify_shift_bytes_in_array_right (void)
5571 {
5572    /* byte 1   | byte 0
5573       00011111 | 11100000.  */
5574   unsigned char orig[2] = { 0x1f, 0xe0};
5575   unsigned char in[2];
5576   memcpy (in, orig, sizeof orig);
5577   unsigned char expected[2] = { 0x07, 0xf8};
5578   shift_bytes_in_array_right (in, sizeof (in), 2);
5579   verify_array_eq (in, expected, sizeof (in));
5580 
5581   memcpy (in, orig, sizeof orig);
5582   memcpy (expected, orig, sizeof orig);
5583   /* Check that shifting by zero doesn't change anything.  */
5584   shift_bytes_in_array_right (in, sizeof (in), 0);
5585   verify_array_eq (in, expected, sizeof (in));
5586 }
5587 
5588 /* Test clear_bit_region that it clears exactly the bits asked and
5589    nothing more.  */
5590 
5591 static void
verify_clear_bit_region(void)5592 verify_clear_bit_region (void)
5593 {
5594   /* Start with all bits set and test clearing various patterns in them.  */
5595   unsigned char orig[3] = { 0xff, 0xff, 0xff};
5596   unsigned char in[3];
5597   unsigned char expected[3];
5598   memcpy (in, orig, sizeof in);
5599 
5600   /* Check zeroing out all the bits.  */
5601   clear_bit_region (in, 0, 3 * BITS_PER_UNIT);
5602   expected[0] = expected[1] = expected[2] = 0;
5603   verify_array_eq (in, expected, sizeof in);
5604 
5605   memcpy (in, orig, sizeof in);
5606   /* Leave the first and last bits intact.  */
5607   clear_bit_region (in, 1, 3 * BITS_PER_UNIT - 2);
5608   expected[0] = 0x1;
5609   expected[1] = 0;
5610   expected[2] = 0x80;
5611   verify_array_eq (in, expected, sizeof in);
5612 }
5613 
5614 /* Test clear_bit_region_be that it clears exactly the bits asked and
5615    nothing more.  */
5616 
5617 static void
verify_clear_bit_region_be(void)5618 verify_clear_bit_region_be (void)
5619 {
5620   /* Start with all bits set and test clearing various patterns in them.  */
5621   unsigned char orig[3] = { 0xff, 0xff, 0xff};
5622   unsigned char in[3];
5623   unsigned char expected[3];
5624   memcpy (in, orig, sizeof in);
5625 
5626   /* Check zeroing out all the bits.  */
5627   clear_bit_region_be (in, BITS_PER_UNIT - 1, 3 * BITS_PER_UNIT);
5628   expected[0] = expected[1] = expected[2] = 0;
5629   verify_array_eq (in, expected, sizeof in);
5630 
5631   memcpy (in, orig, sizeof in);
5632   /* Leave the first and last bits intact.  */
5633   clear_bit_region_be (in, BITS_PER_UNIT - 2, 3 * BITS_PER_UNIT - 2);
5634   expected[0] = 0x80;
5635   expected[1] = 0;
5636   expected[2] = 0x1;
5637   verify_array_eq (in, expected, sizeof in);
5638 }
5639 
5640 
5641 /* Run all of the selftests within this file.  */
5642 
5643 void
store_merging_cc_tests(void)5644 store_merging_cc_tests (void)
5645 {
5646   verify_shift_bytes_in_array_left ();
5647   verify_shift_bytes_in_array_right ();
5648   verify_clear_bit_region ();
5649   verify_clear_bit_region_be ();
5650 }
5651 
5652 } // namespace selftest
5653 #endif /* CHECKING_P.  */
5654