1 /* Medium-level subroutines: convert bit-field store and extract
2    and shifts, multiplies and divides to rtl instructions.
3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4    1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006
5    Free Software Foundation, Inc.
6 
7 This file is part of GCC.
8 
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 2, or (at your option) any later
12 version.
13 
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17 for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING.  If not, write to the Free
21 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
22 02110-1301, USA.  */
23 
24 
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "toplev.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "tm_p.h"
33 #include "flags.h"
34 #include "insn-config.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "real.h"
38 #include "recog.h"
39 #include "langhooks.h"
40 
41 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
42 				   unsigned HOST_WIDE_INT,
43 				   unsigned HOST_WIDE_INT, rtx);
44 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
45 				   unsigned HOST_WIDE_INT, rtx);
46 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
47 				    unsigned HOST_WIDE_INT,
48 				    unsigned HOST_WIDE_INT,
49 				    unsigned HOST_WIDE_INT, rtx, int);
50 static rtx mask_rtx (enum machine_mode, int, int, int);
51 static rtx lshift_value (enum machine_mode, rtx, int, int);
52 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
53 				    unsigned HOST_WIDE_INT, int);
54 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
55 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
56 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
57 
58 /* Test whether a value is zero of a power of two.  */
59 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
60 
61 /* Nonzero means divides or modulus operations are relatively cheap for
62    powers of two, so don't use branches; emit the operation instead.
63    Usually, this will mean that the MD file will emit non-branch
64    sequences.  */
65 
66 static bool sdiv_pow2_cheap[NUM_MACHINE_MODES];
67 static bool smod_pow2_cheap[NUM_MACHINE_MODES];
68 
69 #ifndef SLOW_UNALIGNED_ACCESS
70 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
71 #endif
72 
73 /* For compilers that support multiple targets with different word sizes,
74    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
75    is the H8/300(H) compiler.  */
76 
77 #ifndef MAX_BITS_PER_WORD
78 #define MAX_BITS_PER_WORD BITS_PER_WORD
79 #endif
80 
81 /* Reduce conditional compilation elsewhere.  */
82 #ifndef HAVE_insv
83 #define HAVE_insv	0
84 #define CODE_FOR_insv	CODE_FOR_nothing
85 #define gen_insv(a,b,c,d) NULL_RTX
86 #endif
87 #ifndef HAVE_extv
88 #define HAVE_extv	0
89 #define CODE_FOR_extv	CODE_FOR_nothing
90 #define gen_extv(a,b,c,d) NULL_RTX
91 #endif
92 #ifndef HAVE_extzv
93 #define HAVE_extzv	0
94 #define CODE_FOR_extzv	CODE_FOR_nothing
95 #define gen_extzv(a,b,c,d) NULL_RTX
96 #endif
97 
98 /* Cost of various pieces of RTL.  Note that some of these are indexed by
99    shift count and some by mode.  */
100 static int zero_cost;
101 static int add_cost[NUM_MACHINE_MODES];
102 static int neg_cost[NUM_MACHINE_MODES];
103 static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
104 static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
105 static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
106 static int mul_cost[NUM_MACHINE_MODES];
107 static int sdiv_cost[NUM_MACHINE_MODES];
108 static int udiv_cost[NUM_MACHINE_MODES];
109 static int mul_widen_cost[NUM_MACHINE_MODES];
110 static int mul_highpart_cost[NUM_MACHINE_MODES];
111 
112 void
init_expmed(void)113 init_expmed (void)
114 {
115   struct
116   {
117     struct rtx_def reg;		rtunion reg_fld[2];
118     struct rtx_def plus;	rtunion plus_fld1;
119     struct rtx_def neg;
120     struct rtx_def mult;	rtunion mult_fld1;
121     struct rtx_def sdiv;	rtunion sdiv_fld1;
122     struct rtx_def udiv;	rtunion udiv_fld1;
123     struct rtx_def zext;
124     struct rtx_def sdiv_32;	rtunion sdiv_32_fld1;
125     struct rtx_def smod_32;	rtunion smod_32_fld1;
126     struct rtx_def wide_mult;	rtunion wide_mult_fld1;
127     struct rtx_def wide_lshr;	rtunion wide_lshr_fld1;
128     struct rtx_def wide_trunc;
129     struct rtx_def shift;	rtunion shift_fld1;
130     struct rtx_def shift_mult;	rtunion shift_mult_fld1;
131     struct rtx_def shift_add;	rtunion shift_add_fld1;
132     struct rtx_def shift_sub;	rtunion shift_sub_fld1;
133   } all;
134 
135   rtx pow2[MAX_BITS_PER_WORD];
136   rtx cint[MAX_BITS_PER_WORD];
137   int m, n;
138   enum machine_mode mode, wider_mode;
139 
140   zero_cost = rtx_cost (const0_rtx, 0);
141 
142   for (m = 1; m < MAX_BITS_PER_WORD; m++)
143     {
144       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
145       cint[m] = GEN_INT (m);
146     }
147 
148   memset (&all, 0, sizeof all);
149 
150   PUT_CODE (&all.reg, REG);
151   /* Avoid using hard regs in ways which may be unsupported.  */
152   REGNO (&all.reg) = LAST_VIRTUAL_REGISTER + 1;
153 
154   PUT_CODE (&all.plus, PLUS);
155   XEXP (&all.plus, 0) = &all.reg;
156   XEXP (&all.plus, 1) = &all.reg;
157 
158   PUT_CODE (&all.neg, NEG);
159   XEXP (&all.neg, 0) = &all.reg;
160 
161   PUT_CODE (&all.mult, MULT);
162   XEXP (&all.mult, 0) = &all.reg;
163   XEXP (&all.mult, 1) = &all.reg;
164 
165   PUT_CODE (&all.sdiv, DIV);
166   XEXP (&all.sdiv, 0) = &all.reg;
167   XEXP (&all.sdiv, 1) = &all.reg;
168 
169   PUT_CODE (&all.udiv, UDIV);
170   XEXP (&all.udiv, 0) = &all.reg;
171   XEXP (&all.udiv, 1) = &all.reg;
172 
173   PUT_CODE (&all.sdiv_32, DIV);
174   XEXP (&all.sdiv_32, 0) = &all.reg;
175   XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
176 
177   PUT_CODE (&all.smod_32, MOD);
178   XEXP (&all.smod_32, 0) = &all.reg;
179   XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
180 
181   PUT_CODE (&all.zext, ZERO_EXTEND);
182   XEXP (&all.zext, 0) = &all.reg;
183 
184   PUT_CODE (&all.wide_mult, MULT);
185   XEXP (&all.wide_mult, 0) = &all.zext;
186   XEXP (&all.wide_mult, 1) = &all.zext;
187 
188   PUT_CODE (&all.wide_lshr, LSHIFTRT);
189   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
190 
191   PUT_CODE (&all.wide_trunc, TRUNCATE);
192   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
193 
194   PUT_CODE (&all.shift, ASHIFT);
195   XEXP (&all.shift, 0) = &all.reg;
196 
197   PUT_CODE (&all.shift_mult, MULT);
198   XEXP (&all.shift_mult, 0) = &all.reg;
199 
200   PUT_CODE (&all.shift_add, PLUS);
201   XEXP (&all.shift_add, 0) = &all.shift_mult;
202   XEXP (&all.shift_add, 1) = &all.reg;
203 
204   PUT_CODE (&all.shift_sub, MINUS);
205   XEXP (&all.shift_sub, 0) = &all.shift_mult;
206   XEXP (&all.shift_sub, 1) = &all.reg;
207 
208   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
209        mode != VOIDmode;
210        mode = GET_MODE_WIDER_MODE (mode))
211     {
212       PUT_MODE (&all.reg, mode);
213       PUT_MODE (&all.plus, mode);
214       PUT_MODE (&all.neg, mode);
215       PUT_MODE (&all.mult, mode);
216       PUT_MODE (&all.sdiv, mode);
217       PUT_MODE (&all.udiv, mode);
218       PUT_MODE (&all.sdiv_32, mode);
219       PUT_MODE (&all.smod_32, mode);
220       PUT_MODE (&all.wide_trunc, mode);
221       PUT_MODE (&all.shift, mode);
222       PUT_MODE (&all.shift_mult, mode);
223       PUT_MODE (&all.shift_add, mode);
224       PUT_MODE (&all.shift_sub, mode);
225 
226       add_cost[mode] = rtx_cost (&all.plus, SET);
227       neg_cost[mode] = rtx_cost (&all.neg, SET);
228       mul_cost[mode] = rtx_cost (&all.mult, SET);
229       sdiv_cost[mode] = rtx_cost (&all.sdiv, SET);
230       udiv_cost[mode] = rtx_cost (&all.udiv, SET);
231 
232       sdiv_pow2_cheap[mode] = (rtx_cost (&all.sdiv_32, SET)
233 			       <= 2 * add_cost[mode]);
234       smod_pow2_cheap[mode] = (rtx_cost (&all.smod_32, SET)
235 			       <= 4 * add_cost[mode]);
236 
237       wider_mode = GET_MODE_WIDER_MODE (mode);
238       if (wider_mode != VOIDmode)
239 	{
240 	  PUT_MODE (&all.zext, wider_mode);
241 	  PUT_MODE (&all.wide_mult, wider_mode);
242 	  PUT_MODE (&all.wide_lshr, wider_mode);
243 	  XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
244 
245 	  mul_widen_cost[wider_mode] = rtx_cost (&all.wide_mult, SET);
246 	  mul_highpart_cost[mode] = rtx_cost (&all.wide_trunc, SET);
247 	}
248 
249       shift_cost[mode][0] = 0;
250       shiftadd_cost[mode][0] = shiftsub_cost[mode][0] = add_cost[mode];
251 
252       n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
253       for (m = 1; m < n; m++)
254 	{
255 	  XEXP (&all.shift, 1) = cint[m];
256 	  XEXP (&all.shift_mult, 1) = pow2[m];
257 
258 	  shift_cost[mode][m] = rtx_cost (&all.shift, SET);
259 	  shiftadd_cost[mode][m] = rtx_cost (&all.shift_add, SET);
260 	  shiftsub_cost[mode][m] = rtx_cost (&all.shift_sub, SET);
261 	}
262     }
263 }
264 
265 /* Return an rtx representing minus the value of X.
266    MODE is the intended mode of the result,
267    useful if X is a CONST_INT.  */
268 
269 rtx
negate_rtx(enum machine_mode mode,rtx x)270 negate_rtx (enum machine_mode mode, rtx x)
271 {
272   rtx result = simplify_unary_operation (NEG, mode, x, mode);
273 
274   if (result == 0)
275     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
276 
277   return result;
278 }
279 
280 /* Report on the availability of insv/extv/extzv and the desired mode
281    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
282    is false; else the mode of the specified operand.  If OPNO is -1,
283    all the caller cares about is whether the insn is available.  */
284 enum machine_mode
mode_for_extraction(enum extraction_pattern pattern,int opno)285 mode_for_extraction (enum extraction_pattern pattern, int opno)
286 {
287   const struct insn_data *data;
288 
289   switch (pattern)
290     {
291     case EP_insv:
292       if (HAVE_insv)
293 	{
294 	  data = &insn_data[CODE_FOR_insv];
295 	  break;
296 	}
297       return MAX_MACHINE_MODE;
298 
299     case EP_extv:
300       if (HAVE_extv)
301 	{
302 	  data = &insn_data[CODE_FOR_extv];
303 	  break;
304 	}
305       return MAX_MACHINE_MODE;
306 
307     case EP_extzv:
308       if (HAVE_extzv)
309 	{
310 	  data = &insn_data[CODE_FOR_extzv];
311 	  break;
312 	}
313       return MAX_MACHINE_MODE;
314 
315     default:
316       gcc_unreachable ();
317     }
318 
319   if (opno == -1)
320     return VOIDmode;
321 
322   /* Everyone who uses this function used to follow it with
323      if (result == VOIDmode) result = word_mode; */
324   if (data->operand[opno].mode == VOIDmode)
325     return word_mode;
326   return data->operand[opno].mode;
327 }
328 
329 
330 /* Generate code to store value from rtx VALUE
331    into a bit-field within structure STR_RTX
332    containing BITSIZE bits starting at bit BITNUM.
333    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
334    ALIGN is the alignment that STR_RTX is known to have.
335    TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
336 
337 /* ??? Note that there are two different ideas here for how
338    to determine the size to count bits within, for a register.
339    One is BITS_PER_WORD, and the other is the size of operand 3
340    of the insv pattern.
341 
342    If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
343    else, we use the mode of operand 3.  */
344 
345 rtx
store_bit_field(rtx str_rtx,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,enum machine_mode fieldmode,rtx value)346 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
347 		 unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
348 		 rtx value)
349 {
350   unsigned int unit
351     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
352   unsigned HOST_WIDE_INT offset, bitpos;
353   rtx op0 = str_rtx;
354   int byte_offset;
355   rtx orig_value;
356 
357   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
358 
359   while (GET_CODE (op0) == SUBREG)
360     {
361       /* The following line once was done only if WORDS_BIG_ENDIAN,
362 	 but I think that is a mistake.  WORDS_BIG_ENDIAN is
363 	 meaningful at a much higher level; when structures are copied
364 	 between memory and regs, the higher-numbered regs
365 	 always get higher addresses.  */
366       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
367       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
368 
369       byte_offset = 0;
370 
371       /* Paradoxical subregs need special handling on big endian machines.  */
372       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
373 	{
374 	  int difference = inner_mode_size - outer_mode_size;
375 
376 	  if (WORDS_BIG_ENDIAN)
377 	    byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
378 	  if (BYTES_BIG_ENDIAN)
379 	    byte_offset += difference % UNITS_PER_WORD;
380 	}
381       else
382 	byte_offset = SUBREG_BYTE (op0);
383 
384       bitnum += byte_offset * BITS_PER_UNIT;
385       op0 = SUBREG_REG (op0);
386     }
387 
388   /* No action is needed if the target is a register and if the field
389      lies completely outside that register.  This can occur if the source
390      code contains an out-of-bounds access to a small array.  */
391   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
392     return value;
393 
394   /* Use vec_set patterns for inserting parts of vectors whenever
395      available.  */
396   if (VECTOR_MODE_P (GET_MODE (op0))
397       && !MEM_P (op0)
398       && (vec_set_optab->handlers[GET_MODE (op0)].insn_code
399 	  != CODE_FOR_nothing)
400       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
401       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
402       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
403     {
404       enum machine_mode outermode = GET_MODE (op0);
405       enum machine_mode innermode = GET_MODE_INNER (outermode);
406       int icode = (int) vec_set_optab->handlers[outermode].insn_code;
407       int pos = bitnum / GET_MODE_BITSIZE (innermode);
408       rtx rtxpos = GEN_INT (pos);
409       rtx src = value;
410       rtx dest = op0;
411       rtx pat, seq;
412       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
413       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
414       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
415 
416       start_sequence ();
417 
418       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
419 	src = copy_to_mode_reg (mode1, src);
420 
421       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
422 	rtxpos = copy_to_mode_reg (mode1, rtxpos);
423 
424       /* We could handle this, but we should always be called with a pseudo
425 	 for our targets and all insns should take them as outputs.  */
426       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
427 		  && (*insn_data[icode].operand[1].predicate) (src, mode1)
428 		  && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
429       pat = GEN_FCN (icode) (dest, src, rtxpos);
430       seq = get_insns ();
431       end_sequence ();
432       if (pat)
433 	{
434 	  emit_insn (seq);
435 	  emit_insn (pat);
436 	  return dest;
437 	}
438     }
439 
440   /* If the target is a register, overwriting the entire object, or storing
441      a full-word or multi-word field can be done with just a SUBREG.
442 
443      If the target is memory, storing any naturally aligned field can be
444      done with a simple store.  For targets that support fast unaligned
445      memory, any naturally sized, unit aligned field can be done directly.  */
446 
447   offset = bitnum / unit;
448   bitpos = bitnum % unit;
449   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
450                 + (offset * UNITS_PER_WORD);
451 
452   if (bitpos == 0
453       && bitsize == GET_MODE_BITSIZE (fieldmode)
454       && (!MEM_P (op0)
455 	  ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
456 	     || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
457 	     && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
458 	  : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
459 	     || (offset * BITS_PER_UNIT % bitsize == 0
460 		 && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
461     {
462       if (MEM_P (op0))
463 	op0 = adjust_address (op0, fieldmode, offset);
464       else if (GET_MODE (op0) != fieldmode)
465 	op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
466 				   byte_offset);
467       emit_move_insn (op0, value);
468       return value;
469     }
470 
471   /* Make sure we are playing with integral modes.  Pun with subregs
472      if we aren't.  This must come after the entire register case above,
473      since that case is valid for any mode.  The following cases are only
474      valid for integral modes.  */
475   {
476     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
477     if (imode != GET_MODE (op0))
478       {
479 	if (MEM_P (op0))
480 	  op0 = adjust_address (op0, imode, 0);
481 	else
482 	  {
483 	    gcc_assert (imode != BLKmode);
484 	    op0 = gen_lowpart (imode, op0);
485 	  }
486       }
487   }
488 
489   /* We may be accessing data outside the field, which means
490      we can alias adjacent data.  */
491   if (MEM_P (op0))
492     {
493       op0 = shallow_copy_rtx (op0);
494       set_mem_alias_set (op0, 0);
495       set_mem_expr (op0, 0);
496     }
497 
498   /* If OP0 is a register, BITPOS must count within a word.
499      But as we have it, it counts within whatever size OP0 now has.
500      On a bigendian machine, these are not the same, so convert.  */
501   if (BYTES_BIG_ENDIAN
502       && !MEM_P (op0)
503       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
504     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
505 
506   /* Storing an lsb-aligned field in a register
507      can be done with a movestrict instruction.  */
508 
509   if (!MEM_P (op0)
510       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
511       && bitsize == GET_MODE_BITSIZE (fieldmode)
512       && (movstrict_optab->handlers[fieldmode].insn_code
513 	  != CODE_FOR_nothing))
514     {
515       int icode = movstrict_optab->handlers[fieldmode].insn_code;
516 
517       /* Get appropriate low part of the value being stored.  */
518       if (GET_CODE (value) == CONST_INT || REG_P (value))
519 	value = gen_lowpart (fieldmode, value);
520       else if (!(GET_CODE (value) == SYMBOL_REF
521 		 || GET_CODE (value) == LABEL_REF
522 		 || GET_CODE (value) == CONST))
523 	value = convert_to_mode (fieldmode, value, 0);
524 
525       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
526 	value = copy_to_mode_reg (fieldmode, value);
527 
528       if (GET_CODE (op0) == SUBREG)
529 	{
530 	  /* Else we've got some float mode source being extracted into
531 	     a different float mode destination -- this combination of
532 	     subregs results in Severe Tire Damage.  */
533 	  gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode
534 		      || GET_MODE_CLASS (fieldmode) == MODE_INT
535 		      || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
536 	  op0 = SUBREG_REG (op0);
537 	}
538 
539       emit_insn (GEN_FCN (icode)
540 		 (gen_rtx_SUBREG (fieldmode, op0,
541 				  (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
542 				  + (offset * UNITS_PER_WORD)),
543 				  value));
544 
545       return value;
546     }
547 
548   /* Handle fields bigger than a word.  */
549 
550   if (bitsize > BITS_PER_WORD)
551     {
552       /* Here we transfer the words of the field
553 	 in the order least significant first.
554 	 This is because the most significant word is the one which may
555 	 be less than full.
556 	 However, only do that if the value is not BLKmode.  */
557 
558       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
559       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
560       unsigned int i;
561 
562       /* This is the mode we must force value to, so that there will be enough
563 	 subwords to extract.  Note that fieldmode will often (always?) be
564 	 VOIDmode, because that is what store_field uses to indicate that this
565 	 is a bit field, but passing VOIDmode to operand_subword_force
566 	 is not allowed.  */
567       fieldmode = GET_MODE (value);
568       if (fieldmode == VOIDmode)
569 	fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
570 
571       for (i = 0; i < nwords; i++)
572 	{
573 	  /* If I is 0, use the low-order word in both field and target;
574 	     if I is 1, use the next to lowest word; and so on.  */
575 	  unsigned int wordnum = (backwards ? nwords - i - 1 : i);
576 	  unsigned int bit_offset = (backwards
577 				     ? MAX ((int) bitsize - ((int) i + 1)
578 					    * BITS_PER_WORD,
579 					    0)
580 				     : (int) i * BITS_PER_WORD);
581 
582 	  store_bit_field (op0, MIN (BITS_PER_WORD,
583 				     bitsize - i * BITS_PER_WORD),
584 			   bitnum + bit_offset, word_mode,
585 			   operand_subword_force (value, wordnum, fieldmode));
586 	}
587       return value;
588     }
589 
590   /* From here on we can assume that the field to be stored in is
591      a full-word (whatever type that is), since it is shorter than a word.  */
592 
593   /* OFFSET is the number of words or bytes (UNIT says which)
594      from STR_RTX to the first word or byte containing part of the field.  */
595 
596   if (!MEM_P (op0))
597     {
598       if (offset != 0
599 	  || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
600 	{
601 	  if (!REG_P (op0))
602 	    {
603 	      /* Since this is a destination (lvalue), we can't copy
604 		 it to a pseudo.  We can remove a SUBREG that does not
605 		 change the size of the operand.  Such a SUBREG may
606 		 have been added above.  */
607 	      gcc_assert (GET_CODE (op0) == SUBREG
608 			  && (GET_MODE_SIZE (GET_MODE (op0))
609 			      == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
610 	      op0 = SUBREG_REG (op0);
611 	    }
612 	  op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
613 		                op0, (offset * UNITS_PER_WORD));
614 	}
615       offset = 0;
616     }
617 
618   /* If VALUE has a floating-point or complex mode, access it as an
619      integer of the corresponding size.  This can occur on a machine
620      with 64 bit registers that uses SFmode for float.  It can also
621      occur for unaligned float or complex fields.  */
622   orig_value = value;
623   if (GET_MODE (value) != VOIDmode
624       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
625       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
626     {
627       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
628       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
629     }
630 
631   /* Now OFFSET is nonzero only if OP0 is memory
632      and is therefore always measured in bytes.  */
633 
634   if (HAVE_insv
635       && GET_MODE (value) != BLKmode
636       && bitsize > 0
637       && GET_MODE_BITSIZE (op_mode) >= bitsize
638       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
639 	    && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
640       && insn_data[CODE_FOR_insv].operand[1].predicate (GEN_INT (bitsize),
641 							VOIDmode))
642     {
643       int xbitpos = bitpos;
644       rtx value1;
645       rtx xop0 = op0;
646       rtx last = get_last_insn ();
647       rtx pat;
648       enum machine_mode maxmode = mode_for_extraction (EP_insv, 3);
649       int save_volatile_ok = volatile_ok;
650 
651       volatile_ok = 1;
652 
653       /* If this machine's insv can only insert into a register, copy OP0
654 	 into a register and save it back later.  */
655       if (MEM_P (op0)
656 	  && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
657 		(op0, VOIDmode)))
658 	{
659 	  rtx tempreg;
660 	  enum machine_mode bestmode;
661 
662 	  /* Get the mode to use for inserting into this field.  If OP0 is
663 	     BLKmode, get the smallest mode consistent with the alignment. If
664 	     OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
665 	     mode. Otherwise, use the smallest mode containing the field.  */
666 
667 	  if (GET_MODE (op0) == BLKmode
668 	      || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
669 	    bestmode
670 	      = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), maxmode,
671 			       MEM_VOLATILE_P (op0));
672 	  else
673 	    bestmode = GET_MODE (op0);
674 
675 	  if (bestmode == VOIDmode
676 	      || GET_MODE_SIZE (bestmode) < GET_MODE_SIZE (fieldmode)
677 	      || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
678 		  && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
679 	    goto insv_loses;
680 
681 	  /* Adjust address to point to the containing unit of that mode.
682 	     Compute offset as multiple of this unit, counting in bytes.  */
683 	  unit = GET_MODE_BITSIZE (bestmode);
684 	  offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
685 	  bitpos = bitnum % unit;
686 	  op0 = adjust_address (op0, bestmode,  offset);
687 
688 	  /* Fetch that unit, store the bitfield in it, then store
689 	     the unit.  */
690 	  tempreg = copy_to_reg (op0);
691 	  store_bit_field (tempreg, bitsize, bitpos, fieldmode, orig_value);
692 	  emit_move_insn (op0, tempreg);
693 	  return value;
694 	}
695       volatile_ok = save_volatile_ok;
696 
697       /* Add OFFSET into OP0's address.  */
698       if (MEM_P (xop0))
699 	xop0 = adjust_address (xop0, byte_mode, offset);
700 
701       /* If xop0 is a register, we need it in MAXMODE
702 	 to make it acceptable to the format of insv.  */
703       if (GET_CODE (xop0) == SUBREG)
704 	/* We can't just change the mode, because this might clobber op0,
705 	   and we will need the original value of op0 if insv fails.  */
706 	xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
707       if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
708 	xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
709 
710       /* On big-endian machines, we count bits from the most significant.
711 	 If the bit field insn does not, we must invert.  */
712 
713       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
714 	xbitpos = unit - bitsize - xbitpos;
715 
716       /* We have been counting XBITPOS within UNIT.
717 	 Count instead within the size of the register.  */
718       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
719 	xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
720 
721       unit = GET_MODE_BITSIZE (maxmode);
722 
723       /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
724       value1 = value;
725       if (GET_MODE (value) != maxmode)
726 	{
727 	  if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
728 	    {
729 	      /* Optimization: Don't bother really extending VALUE
730 		 if it has all the bits we will actually use.  However,
731 		 if we must narrow it, be sure we do it correctly.  */
732 
733 	      if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
734 		{
735 		  rtx tmp;
736 
737 		  tmp = simplify_subreg (maxmode, value1, GET_MODE (value), 0);
738 		  if (! tmp)
739 		    tmp = simplify_gen_subreg (maxmode,
740 					       force_reg (GET_MODE (value),
741 							  value1),
742 					       GET_MODE (value), 0);
743 		  value1 = tmp;
744 		}
745 	      else
746 		value1 = gen_lowpart (maxmode, value1);
747 	    }
748 	  else if (GET_CODE (value) == CONST_INT)
749 	    value1 = gen_int_mode (INTVAL (value), maxmode);
750 	  else
751 	    /* Parse phase is supposed to make VALUE's data type
752 	       match that of the component reference, which is a type
753 	       at least as wide as the field; so VALUE should have
754 	       a mode that corresponds to that type.  */
755 	    gcc_assert (CONSTANT_P (value));
756 	}
757 
758       /* If this machine's insv insists on a register,
759 	 get VALUE1 into a register.  */
760       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
761 	     (value1, maxmode)))
762 	value1 = force_reg (maxmode, value1);
763 
764       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
765       if (pat)
766 	emit_insn (pat);
767       else
768 	{
769 	  delete_insns_since (last);
770 	  store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
771 	}
772     }
773   else
774     insv_loses:
775     /* Insv is not available; store using shifts and boolean ops.  */
776     store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
777   return value;
778 }
779 
780 /* Use shifts and boolean operations to store VALUE
781    into a bit field of width BITSIZE
782    in a memory location specified by OP0 except offset by OFFSET bytes.
783      (OFFSET must be 0 if OP0 is a register.)
784    The field starts at position BITPOS within the byte.
785     (If OP0 is a register, it may be a full word or a narrower mode,
786      but BITPOS still counts within a full word,
787      which is significant on bigendian machines.)  */
788 
789 static void
store_fixed_bit_field(rtx op0,unsigned HOST_WIDE_INT offset,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitpos,rtx value)790 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
791 		       unsigned HOST_WIDE_INT bitsize,
792 		       unsigned HOST_WIDE_INT bitpos, rtx value)
793 {
794   enum machine_mode mode;
795   unsigned int total_bits = BITS_PER_WORD;
796   rtx temp;
797   int all_zero = 0;
798   int all_one = 0;
799 
800   /* There is a case not handled here:
801      a structure with a known alignment of just a halfword
802      and a field split across two aligned halfwords within the structure.
803      Or likewise a structure with a known alignment of just a byte
804      and a field split across two bytes.
805      Such cases are not supposed to be able to occur.  */
806 
807   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
808     {
809       gcc_assert (!offset);
810       /* Special treatment for a bit field split across two registers.  */
811       if (bitsize + bitpos > BITS_PER_WORD)
812 	{
813 	  store_split_bit_field (op0, bitsize, bitpos, value);
814 	  return;
815 	}
816     }
817   else
818     {
819       /* Get the proper mode to use for this field.  We want a mode that
820 	 includes the entire field.  If such a mode would be larger than
821 	 a word, we won't be doing the extraction the normal way.
822 	 We don't want a mode bigger than the destination.  */
823 
824       mode = GET_MODE (op0);
825       if (GET_MODE_BITSIZE (mode) == 0
826 	  || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
827 	mode = word_mode;
828       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
829 			    MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
830 
831       if (mode == VOIDmode)
832 	{
833 	  /* The only way this should occur is if the field spans word
834 	     boundaries.  */
835 	  store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
836 				 value);
837 	  return;
838 	}
839 
840       total_bits = GET_MODE_BITSIZE (mode);
841 
842       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
843 	 be in the range 0 to total_bits-1, and put any excess bytes in
844 	 OFFSET.  */
845       if (bitpos >= total_bits)
846 	{
847 	  offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
848 	  bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
849 		     * BITS_PER_UNIT);
850 	}
851 
852       /* Get ref to an aligned byte, halfword, or word containing the field.
853 	 Adjust BITPOS to be position within a word,
854 	 and OFFSET to be the offset of that word.
855 	 Then alter OP0 to refer to that word.  */
856       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
857       offset -= (offset % (total_bits / BITS_PER_UNIT));
858       op0 = adjust_address (op0, mode, offset);
859     }
860 
861   mode = GET_MODE (op0);
862 
863   /* Now MODE is either some integral mode for a MEM as OP0,
864      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
865      The bit field is contained entirely within OP0.
866      BITPOS is the starting bit number within OP0.
867      (OP0's mode may actually be narrower than MODE.)  */
868 
869   if (BYTES_BIG_ENDIAN)
870       /* BITPOS is the distance between our msb
871 	 and that of the containing datum.
872 	 Convert it to the distance from the lsb.  */
873       bitpos = total_bits - bitsize - bitpos;
874 
875   /* Now BITPOS is always the distance between our lsb
876      and that of OP0.  */
877 
878   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
879      we must first convert its mode to MODE.  */
880 
881   if (GET_CODE (value) == CONST_INT)
882     {
883       HOST_WIDE_INT v = INTVAL (value);
884 
885       if (bitsize < HOST_BITS_PER_WIDE_INT)
886 	v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
887 
888       if (v == 0)
889 	all_zero = 1;
890       else if ((bitsize < HOST_BITS_PER_WIDE_INT
891 		&& v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
892 	       || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
893 	all_one = 1;
894 
895       value = lshift_value (mode, value, bitpos, bitsize);
896     }
897   else
898     {
899       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
900 		      && bitpos + bitsize != GET_MODE_BITSIZE (mode));
901 
902       if (GET_MODE (value) != mode)
903 	{
904 	  if ((REG_P (value) || GET_CODE (value) == SUBREG)
905 	      && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
906 	    value = gen_lowpart (mode, value);
907 	  else
908 	    value = convert_to_mode (mode, value, 1);
909 	}
910 
911       if (must_and)
912 	value = expand_binop (mode, and_optab, value,
913 			      mask_rtx (mode, 0, bitsize, 0),
914 			      NULL_RTX, 1, OPTAB_LIB_WIDEN);
915       if (bitpos > 0)
916 	value = expand_shift (LSHIFT_EXPR, mode, value,
917 			      build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
918     }
919 
920   /* Now clear the chosen bits in OP0,
921      except that if VALUE is -1 we need not bother.  */
922   /* We keep the intermediates in registers to allow CSE to combine
923      consecutive bitfield assignments.  */
924 
925   temp = force_reg (mode, op0);
926 
927   if (! all_one)
928     {
929       temp = expand_binop (mode, and_optab, temp,
930 			   mask_rtx (mode, bitpos, bitsize, 1),
931 			   NULL_RTX, 1, OPTAB_LIB_WIDEN);
932       temp = force_reg (mode, temp);
933     }
934 
935   /* Now logical-or VALUE into OP0, unless it is zero.  */
936 
937   if (! all_zero)
938     {
939       temp = expand_binop (mode, ior_optab, temp, value,
940 			   NULL_RTX, 1, OPTAB_LIB_WIDEN);
941       temp = force_reg (mode, temp);
942     }
943 
944   if (op0 != temp)
945     emit_move_insn (op0, temp);
946 }
947 
948 /* Store a bit field that is split across multiple accessible memory objects.
949 
950    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
951    BITSIZE is the field width; BITPOS the position of its first bit
952    (within the word).
953    VALUE is the value to store.
954 
955    This does not yet handle fields wider than BITS_PER_WORD.  */
956 
957 static void
store_split_bit_field(rtx op0,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitpos,rtx value)958 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
959 		       unsigned HOST_WIDE_INT bitpos, rtx value)
960 {
961   unsigned int unit;
962   unsigned int bitsdone = 0;
963 
964   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
965      much at a time.  */
966   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
967     unit = BITS_PER_WORD;
968   else
969     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
970 
971   /* If VALUE is a constant other than a CONST_INT, get it into a register in
972      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
973      that VALUE might be a floating-point constant.  */
974   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
975     {
976       rtx word = gen_lowpart_common (word_mode, value);
977 
978       if (word && (value != word))
979 	value = word;
980       else
981 	value = gen_lowpart_common (word_mode,
982 				    force_reg (GET_MODE (value) != VOIDmode
983 					       ? GET_MODE (value)
984 					       : word_mode, value));
985     }
986 
987   while (bitsdone < bitsize)
988     {
989       unsigned HOST_WIDE_INT thissize;
990       rtx part, word;
991       unsigned HOST_WIDE_INT thispos;
992       unsigned HOST_WIDE_INT offset;
993 
994       offset = (bitpos + bitsdone) / unit;
995       thispos = (bitpos + bitsdone) % unit;
996 
997       /* THISSIZE must not overrun a word boundary.  Otherwise,
998 	 store_fixed_bit_field will call us again, and we will mutually
999 	 recurse forever.  */
1000       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1001       thissize = MIN (thissize, unit - thispos);
1002 
1003       if (BYTES_BIG_ENDIAN)
1004 	{
1005 	  int total_bits;
1006 
1007 	  /* We must do an endian conversion exactly the same way as it is
1008 	     done in extract_bit_field, so that the two calls to
1009 	     extract_fixed_bit_field will have comparable arguments.  */
1010 	  if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1011 	    total_bits = BITS_PER_WORD;
1012 	  else
1013 	    total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1014 
1015 	  /* Fetch successively less significant portions.  */
1016 	  if (GET_CODE (value) == CONST_INT)
1017 	    part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1018 			     >> (bitsize - bitsdone - thissize))
1019 			    & (((HOST_WIDE_INT) 1 << thissize) - 1));
1020 	  else
1021 	    /* The args are chosen so that the last part includes the
1022 	       lsb.  Give extract_bit_field the value it needs (with
1023 	       endianness compensation) to fetch the piece we want.  */
1024 	    part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1025 					    total_bits - bitsize + bitsdone,
1026 					    NULL_RTX, 1);
1027 	}
1028       else
1029 	{
1030 	  /* Fetch successively more significant portions.  */
1031 	  if (GET_CODE (value) == CONST_INT)
1032 	    part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1033 			     >> bitsdone)
1034 			    & (((HOST_WIDE_INT) 1 << thissize) - 1));
1035 	  else
1036 	    part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1037 					    bitsdone, NULL_RTX, 1);
1038 	}
1039 
1040       /* If OP0 is a register, then handle OFFSET here.
1041 
1042 	 When handling multiword bitfields, extract_bit_field may pass
1043 	 down a word_mode SUBREG of a larger REG for a bitfield that actually
1044 	 crosses a word boundary.  Thus, for a SUBREG, we must find
1045 	 the current word starting from the base register.  */
1046       if (GET_CODE (op0) == SUBREG)
1047 	{
1048 	  int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1049 	  word = operand_subword_force (SUBREG_REG (op0), word_offset,
1050 					GET_MODE (SUBREG_REG (op0)));
1051 	  offset = 0;
1052 	}
1053       else if (REG_P (op0))
1054 	{
1055 	  word = operand_subword_force (op0, offset, GET_MODE (op0));
1056 	  offset = 0;
1057 	}
1058       else
1059 	word = op0;
1060 
1061       /* OFFSET is in UNITs, and UNIT is in bits.
1062          store_fixed_bit_field wants offset in bytes.  */
1063       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1064 			     thispos, part);
1065       bitsdone += thissize;
1066     }
1067 }
1068 
1069 /* Generate code to extract a byte-field from STR_RTX
1070    containing BITSIZE bits, starting at BITNUM,
1071    and put it in TARGET if possible (if TARGET is nonzero).
1072    Regardless of TARGET, we return the rtx for where the value is placed.
1073 
1074    STR_RTX is the structure containing the byte (a REG or MEM).
1075    UNSIGNEDP is nonzero if this is an unsigned bit field.
1076    MODE is the natural mode of the field value once extracted.
1077    TMODE is the mode the caller would like the value to have;
1078    but the value may be returned with type MODE instead.
1079 
1080    TOTAL_SIZE is the size in bytes of the containing structure,
1081    or -1 if varying.
1082 
1083    If a TARGET is specified and we can store in it at no extra cost,
1084    we do so, and return TARGET.
1085    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1086    if they are equally easy.  */
1087 
1088 rtx
extract_bit_field(rtx str_rtx,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,int unsignedp,rtx target,enum machine_mode mode,enum machine_mode tmode)1089 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1090 		   unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1091 		   enum machine_mode mode, enum machine_mode tmode)
1092 {
1093   unsigned int unit
1094     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1095   unsigned HOST_WIDE_INT offset, bitpos;
1096   rtx op0 = str_rtx;
1097   rtx spec_target = target;
1098   rtx spec_target_subreg = 0;
1099   enum machine_mode int_mode;
1100   enum machine_mode extv_mode = mode_for_extraction (EP_extv, 0);
1101   enum machine_mode extzv_mode = mode_for_extraction (EP_extzv, 0);
1102   enum machine_mode mode1;
1103   int byte_offset;
1104 
1105   if (tmode == VOIDmode)
1106     tmode = mode;
1107 
1108   while (GET_CODE (op0) == SUBREG)
1109     {
1110       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1111       op0 = SUBREG_REG (op0);
1112     }
1113 
1114   /* If we have an out-of-bounds access to a register, just return an
1115      uninitialized register of the required mode.  This can occur if the
1116      source code contains an out-of-bounds access to a small array.  */
1117   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1118     return gen_reg_rtx (tmode);
1119 
1120   if (REG_P (op0)
1121       && mode == GET_MODE (op0)
1122       && bitnum == 0
1123       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1124     {
1125       /* We're trying to extract a full register from itself.  */
1126       return op0;
1127     }
1128 
1129   /* Use vec_extract patterns for extracting parts of vectors whenever
1130      available.  */
1131   if (VECTOR_MODE_P (GET_MODE (op0))
1132       && !MEM_P (op0)
1133       && (vec_extract_optab->handlers[GET_MODE (op0)].insn_code
1134 	  != CODE_FOR_nothing)
1135       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1136 	  == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1137     {
1138       enum machine_mode outermode = GET_MODE (op0);
1139       enum machine_mode innermode = GET_MODE_INNER (outermode);
1140       int icode = (int) vec_extract_optab->handlers[outermode].insn_code;
1141       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1142       rtx rtxpos = GEN_INT (pos);
1143       rtx src = op0;
1144       rtx dest = NULL, pat, seq;
1145       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1146       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1147       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1148 
1149       if (innermode == tmode || innermode == mode)
1150 	dest = target;
1151 
1152       if (!dest)
1153 	dest = gen_reg_rtx (innermode);
1154 
1155       start_sequence ();
1156 
1157       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1158 	dest = copy_to_mode_reg (mode0, dest);
1159 
1160       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1161 	src = copy_to_mode_reg (mode1, src);
1162 
1163       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1164 	rtxpos = copy_to_mode_reg (mode1, rtxpos);
1165 
1166       /* We could handle this, but we should always be called with a pseudo
1167 	 for our targets and all insns should take them as outputs.  */
1168       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
1169 		  && (*insn_data[icode].operand[1].predicate) (src, mode1)
1170 		  && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
1171 
1172       pat = GEN_FCN (icode) (dest, src, rtxpos);
1173       seq = get_insns ();
1174       end_sequence ();
1175       if (pat)
1176 	{
1177 	  emit_insn (seq);
1178 	  emit_insn (pat);
1179 	  return dest;
1180 	}
1181     }
1182 
1183   /* Make sure we are playing with integral modes.  Pun with subregs
1184      if we aren't.  */
1185   {
1186     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1187     if (imode != GET_MODE (op0))
1188       {
1189 	if (MEM_P (op0))
1190 	  op0 = adjust_address (op0, imode, 0);
1191 	else
1192 	  {
1193 	    gcc_assert (imode != BLKmode);
1194 	    op0 = gen_lowpart (imode, op0);
1195 
1196 	    /* If we got a SUBREG, force it into a register since we
1197 	       aren't going to be able to do another SUBREG on it.  */
1198 	    if (GET_CODE (op0) == SUBREG)
1199 	      op0 = force_reg (imode, op0);
1200 	  }
1201       }
1202   }
1203 
1204   /* We may be accessing data outside the field, which means
1205      we can alias adjacent data.  */
1206   if (MEM_P (op0))
1207     {
1208       op0 = shallow_copy_rtx (op0);
1209       set_mem_alias_set (op0, 0);
1210       set_mem_expr (op0, 0);
1211     }
1212 
1213   /* Extraction of a full-word or multi-word value from a structure
1214      in a register or aligned memory can be done with just a SUBREG.
1215      A subword value in the least significant part of a register
1216      can also be extracted with a SUBREG.  For this, we need the
1217      byte offset of the value in op0.  */
1218 
1219   bitpos = bitnum % unit;
1220   offset = bitnum / unit;
1221   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1222 
1223   /* If OP0 is a register, BITPOS must count within a word.
1224      But as we have it, it counts within whatever size OP0 now has.
1225      On a bigendian machine, these are not the same, so convert.  */
1226   if (BYTES_BIG_ENDIAN
1227       && !MEM_P (op0)
1228       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1229     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1230 
1231   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1232      If that's wrong, the solution is to test for it and set TARGET to 0
1233      if needed.  */
1234 
1235   /* Only scalar integer modes can be converted via subregs.  There is an
1236      additional problem for FP modes here in that they can have a precision
1237      which is different from the size.  mode_for_size uses precision, but
1238      we want a mode based on the size, so we must avoid calling it for FP
1239      modes.  */
1240   mode1  = (SCALAR_INT_MODE_P (tmode)
1241 	    ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1242 	    : mode);
1243 
1244   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1245 	&& bitpos % BITS_PER_WORD == 0)
1246        || (mode1 != BLKmode
1247 	   /* ??? The big endian test here is wrong.  This is correct
1248 	      if the value is in a register, and if mode_for_size is not
1249 	      the same mode as op0.  This causes us to get unnecessarily
1250 	      inefficient code from the Thumb port when -mbig-endian.  */
1251 	   && (BYTES_BIG_ENDIAN
1252 	       ? bitpos + bitsize == BITS_PER_WORD
1253 	       : bitpos == 0)))
1254       && ((!MEM_P (op0)
1255 	   && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1256 				     GET_MODE_BITSIZE (GET_MODE (op0)))
1257 	   && GET_MODE_SIZE (mode1) != 0
1258 	   && byte_offset % GET_MODE_SIZE (mode1) == 0)
1259 	  || (MEM_P (op0)
1260 	      && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1261 		  || (offset * BITS_PER_UNIT % bitsize == 0
1262 		      && MEM_ALIGN (op0) % bitsize == 0)))))
1263     {
1264       if (mode1 != GET_MODE (op0))
1265 	{
1266 	  if (MEM_P (op0))
1267 	    op0 = adjust_address (op0, mode1, offset);
1268 	  else
1269 	    {
1270 	      rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1271 					     byte_offset);
1272 	      if (sub == NULL)
1273 		goto no_subreg_mode_swap;
1274 	      op0 = sub;
1275 	    }
1276 	}
1277       if (mode1 != mode)
1278 	return convert_to_mode (tmode, op0, unsignedp);
1279       return op0;
1280     }
1281  no_subreg_mode_swap:
1282 
1283   /* Handle fields bigger than a word.  */
1284 
1285   if (bitsize > BITS_PER_WORD)
1286     {
1287       /* Here we transfer the words of the field
1288 	 in the order least significant first.
1289 	 This is because the most significant word is the one which may
1290 	 be less than full.  */
1291 
1292       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1293       unsigned int i;
1294 
1295       if (target == 0 || !REG_P (target))
1296 	target = gen_reg_rtx (mode);
1297 
1298       /* Indicate for flow that the entire target reg is being set.  */
1299       emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1300 
1301       for (i = 0; i < nwords; i++)
1302 	{
1303 	  /* If I is 0, use the low-order word in both field and target;
1304 	     if I is 1, use the next to lowest word; and so on.  */
1305 	  /* Word number in TARGET to use.  */
1306 	  unsigned int wordnum
1307 	    = (WORDS_BIG_ENDIAN
1308 	       ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1309 	       : i);
1310 	  /* Offset from start of field in OP0.  */
1311 	  unsigned int bit_offset = (WORDS_BIG_ENDIAN
1312 				     ? MAX (0, ((int) bitsize - ((int) i + 1)
1313 						* (int) BITS_PER_WORD))
1314 				     : (int) i * BITS_PER_WORD);
1315 	  rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1316 	  rtx result_part
1317 	    = extract_bit_field (op0, MIN (BITS_PER_WORD,
1318 					   bitsize - i * BITS_PER_WORD),
1319 				 bitnum + bit_offset, 1, target_part, mode,
1320 				 word_mode);
1321 
1322 	  gcc_assert (target_part);
1323 
1324 	  if (result_part != target_part)
1325 	    emit_move_insn (target_part, result_part);
1326 	}
1327 
1328       if (unsignedp)
1329 	{
1330 	  /* Unless we've filled TARGET, the upper regs in a multi-reg value
1331 	     need to be zero'd out.  */
1332 	  if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1333 	    {
1334 	      unsigned int i, total_words;
1335 
1336 	      total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1337 	      for (i = nwords; i < total_words; i++)
1338 		emit_move_insn
1339 		  (operand_subword (target,
1340 				    WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1341 				    1, VOIDmode),
1342 		   const0_rtx);
1343 	    }
1344 	  return target;
1345 	}
1346 
1347       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1348       target = expand_shift (LSHIFT_EXPR, mode, target,
1349 			     build_int_cst (NULL_TREE,
1350 					    GET_MODE_BITSIZE (mode) - bitsize),
1351 			     NULL_RTX, 0);
1352       return expand_shift (RSHIFT_EXPR, mode, target,
1353 			   build_int_cst (NULL_TREE,
1354 					  GET_MODE_BITSIZE (mode) - bitsize),
1355 			   NULL_RTX, 0);
1356     }
1357 
1358   /* From here on we know the desired field is smaller than a word.  */
1359 
1360   /* Check if there is a correspondingly-sized integer field, so we can
1361      safely extract it as one size of integer, if necessary; then
1362      truncate or extend to the size that is wanted; then use SUBREGs or
1363      convert_to_mode to get one of the modes we really wanted.  */
1364 
1365   int_mode = int_mode_for_mode (tmode);
1366   if (int_mode == BLKmode)
1367     int_mode = int_mode_for_mode (mode);
1368   /* Should probably push op0 out to memory and then do a load.  */
1369   gcc_assert (int_mode != BLKmode);
1370 
1371   /* OFFSET is the number of words or bytes (UNIT says which)
1372      from STR_RTX to the first word or byte containing part of the field.  */
1373   if (!MEM_P (op0))
1374     {
1375       if (offset != 0
1376 	  || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1377 	{
1378 	  if (!REG_P (op0))
1379 	    op0 = copy_to_reg (op0);
1380 	  op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1381 		                op0, (offset * UNITS_PER_WORD));
1382 	}
1383       offset = 0;
1384     }
1385 
1386   /* Now OFFSET is nonzero only for memory operands.  */
1387 
1388   if (unsignedp)
1389     {
1390       if (HAVE_extzv
1391 	  && bitsize > 0
1392 	  && GET_MODE_BITSIZE (extzv_mode) >= bitsize
1393 	  && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1394 		&& (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode))))
1395 	{
1396 	  unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1397 	  rtx bitsize_rtx, bitpos_rtx;
1398 	  rtx last = get_last_insn ();
1399 	  rtx xop0 = op0;
1400 	  rtx xtarget = target;
1401 	  rtx xspec_target = spec_target;
1402 	  rtx xspec_target_subreg = spec_target_subreg;
1403 	  rtx pat;
1404 	  enum machine_mode maxmode = mode_for_extraction (EP_extzv, 0);
1405 
1406 	  if (MEM_P (xop0))
1407 	    {
1408 	      int save_volatile_ok = volatile_ok;
1409 	      volatile_ok = 1;
1410 
1411 	      /* Is the memory operand acceptable?  */
1412 	      if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1413 		     (xop0, GET_MODE (xop0))))
1414 		{
1415 		  /* No, load into a reg and extract from there.  */
1416 		  enum machine_mode bestmode;
1417 
1418 		  /* Get the mode to use for inserting into this field.  If
1419 		     OP0 is BLKmode, get the smallest mode consistent with the
1420 		     alignment. If OP0 is a non-BLKmode object that is no
1421 		     wider than MAXMODE, use its mode. Otherwise, use the
1422 		     smallest mode containing the field.  */
1423 
1424 		  if (GET_MODE (xop0) == BLKmode
1425 		      || (GET_MODE_SIZE (GET_MODE (op0))
1426 			  > GET_MODE_SIZE (maxmode)))
1427 		    bestmode = get_best_mode (bitsize, bitnum,
1428 					      MEM_ALIGN (xop0), maxmode,
1429 					      MEM_VOLATILE_P (xop0));
1430 		  else
1431 		    bestmode = GET_MODE (xop0);
1432 
1433 		  if (bestmode == VOIDmode
1434 		      || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1435 			  && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1436 		    goto extzv_loses;
1437 
1438 		  /* Compute offset as multiple of this unit,
1439 		     counting in bytes.  */
1440 		  unit = GET_MODE_BITSIZE (bestmode);
1441 		  xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1442 		  xbitpos = bitnum % unit;
1443 		  xop0 = adjust_address (xop0, bestmode, xoffset);
1444 
1445 		  /* Make sure register is big enough for the whole field. */
1446 		  if (xoffset * BITS_PER_UNIT + unit
1447 		      < offset * BITS_PER_UNIT + bitsize)
1448 		    goto extzv_loses;
1449 
1450 		  /* Fetch it to a register in that size.  */
1451 		  xop0 = force_reg (bestmode, xop0);
1452 
1453 		  /* XBITPOS counts within UNIT, which is what is expected.  */
1454 		}
1455 	      else
1456 		/* Get ref to first byte containing part of the field.  */
1457 		xop0 = adjust_address (xop0, byte_mode, xoffset);
1458 
1459 	      volatile_ok = save_volatile_ok;
1460 	    }
1461 
1462 	  /* If op0 is a register, we need it in MAXMODE (which is usually
1463 	     SImode). to make it acceptable to the format of extzv.  */
1464 	  if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1465 	    goto extzv_loses;
1466 	  if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1467 	    xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1468 
1469 	  /* On big-endian machines, we count bits from the most significant.
1470 	     If the bit field insn does not, we must invert.  */
1471 	  if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1472 	    xbitpos = unit - bitsize - xbitpos;
1473 
1474 	  /* Now convert from counting within UNIT to counting in MAXMODE.  */
1475 	  if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1476 	    xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1477 
1478 	  unit = GET_MODE_BITSIZE (maxmode);
1479 
1480 	  if (xtarget == 0)
1481 	    xtarget = xspec_target = gen_reg_rtx (tmode);
1482 
1483 	  if (GET_MODE (xtarget) != maxmode)
1484 	    {
1485 	      if (REG_P (xtarget))
1486 		{
1487 		  int wider = (GET_MODE_SIZE (maxmode)
1488 			       > GET_MODE_SIZE (GET_MODE (xtarget)));
1489 		  xtarget = gen_lowpart (maxmode, xtarget);
1490 		  if (wider)
1491 		    xspec_target_subreg = xtarget;
1492 		}
1493 	      else
1494 		xtarget = gen_reg_rtx (maxmode);
1495 	    }
1496 
1497 	  /* If this machine's extzv insists on a register target,
1498 	     make sure we have one.  */
1499 	  if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1500 		 (xtarget, maxmode)))
1501 	    xtarget = gen_reg_rtx (maxmode);
1502 
1503 	  bitsize_rtx = GEN_INT (bitsize);
1504 	  bitpos_rtx = GEN_INT (xbitpos);
1505 
1506 	  pat = gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1507 	  if (pat)
1508 	    {
1509 	      emit_insn (pat);
1510 	      target = xtarget;
1511 	      spec_target = xspec_target;
1512 	      spec_target_subreg = xspec_target_subreg;
1513 	    }
1514 	  else
1515 	    {
1516 	      delete_insns_since (last);
1517 	      target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1518 						bitpos, target, 1);
1519 	    }
1520 	}
1521       else
1522       extzv_loses:
1523 	target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1524 					  bitpos, target, 1);
1525     }
1526   else
1527     {
1528       if (HAVE_extv
1529 	  && bitsize > 0
1530 	  && GET_MODE_BITSIZE (extv_mode) >= bitsize
1531 	  && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1532 		&& (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode))))
1533 	{
1534 	  int xbitpos = bitpos, xoffset = offset;
1535 	  rtx bitsize_rtx, bitpos_rtx;
1536 	  rtx last = get_last_insn ();
1537 	  rtx xop0 = op0, xtarget = target;
1538 	  rtx xspec_target = spec_target;
1539 	  rtx xspec_target_subreg = spec_target_subreg;
1540 	  rtx pat;
1541 	  enum machine_mode maxmode = mode_for_extraction (EP_extv, 0);
1542 
1543 	  if (MEM_P (xop0))
1544 	    {
1545 	      /* Is the memory operand acceptable?  */
1546 	      if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1547 		     (xop0, GET_MODE (xop0))))
1548 		{
1549 		  /* No, load into a reg and extract from there.  */
1550 		  enum machine_mode bestmode;
1551 
1552 		  /* Get the mode to use for inserting into this field.  If
1553 		     OP0 is BLKmode, get the smallest mode consistent with the
1554 		     alignment. If OP0 is a non-BLKmode object that is no
1555 		     wider than MAXMODE, use its mode. Otherwise, use the
1556 		     smallest mode containing the field.  */
1557 
1558 		  if (GET_MODE (xop0) == BLKmode
1559 		      || (GET_MODE_SIZE (GET_MODE (op0))
1560 			  > GET_MODE_SIZE (maxmode)))
1561 		    bestmode = get_best_mode (bitsize, bitnum,
1562 					      MEM_ALIGN (xop0), maxmode,
1563 					      MEM_VOLATILE_P (xop0));
1564 		  else
1565 		    bestmode = GET_MODE (xop0);
1566 
1567 		  if (bestmode == VOIDmode
1568 		      || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1569 			  && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1570 		    goto extv_loses;
1571 
1572 		  /* Compute offset as multiple of this unit,
1573 		     counting in bytes.  */
1574 		  unit = GET_MODE_BITSIZE (bestmode);
1575 		  xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1576 		  xbitpos = bitnum % unit;
1577 		  xop0 = adjust_address (xop0, bestmode, xoffset);
1578 
1579 		  /* Make sure register is big enough for the whole field. */
1580 		  if (xoffset * BITS_PER_UNIT + unit
1581 		      < offset * BITS_PER_UNIT + bitsize)
1582 		    goto extv_loses;
1583 
1584 		  /* Fetch it to a register in that size.  */
1585 		  xop0 = force_reg (bestmode, xop0);
1586 
1587 		  /* XBITPOS counts within UNIT, which is what is expected.  */
1588 		}
1589 	      else
1590 		/* Get ref to first byte containing part of the field.  */
1591 		xop0 = adjust_address (xop0, byte_mode, xoffset);
1592 	    }
1593 
1594 	  /* If op0 is a register, we need it in MAXMODE (which is usually
1595 	     SImode) to make it acceptable to the format of extv.  */
1596 	  if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1597 	    goto extv_loses;
1598 	  if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1599 	    xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1600 
1601 	  /* On big-endian machines, we count bits from the most significant.
1602 	     If the bit field insn does not, we must invert.  */
1603 	  if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1604 	    xbitpos = unit - bitsize - xbitpos;
1605 
1606 	  /* XBITPOS counts within a size of UNIT.
1607 	     Adjust to count within a size of MAXMODE.  */
1608 	  if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1609 	    xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1610 
1611 	  unit = GET_MODE_BITSIZE (maxmode);
1612 
1613 	  if (xtarget == 0)
1614 	    xtarget = xspec_target = gen_reg_rtx (tmode);
1615 
1616 	  if (GET_MODE (xtarget) != maxmode)
1617 	    {
1618 	      if (REG_P (xtarget))
1619 		{
1620 		  int wider = (GET_MODE_SIZE (maxmode)
1621 			       > GET_MODE_SIZE (GET_MODE (xtarget)));
1622 		  xtarget = gen_lowpart (maxmode, xtarget);
1623 		  if (wider)
1624 		    xspec_target_subreg = xtarget;
1625 		}
1626 	      else
1627 		xtarget = gen_reg_rtx (maxmode);
1628 	    }
1629 
1630 	  /* If this machine's extv insists on a register target,
1631 	     make sure we have one.  */
1632 	  if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1633 		 (xtarget, maxmode)))
1634 	    xtarget = gen_reg_rtx (maxmode);
1635 
1636 	  bitsize_rtx = GEN_INT (bitsize);
1637 	  bitpos_rtx = GEN_INT (xbitpos);
1638 
1639 	  pat = gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1640 	  if (pat)
1641 	    {
1642 	      emit_insn (pat);
1643 	      target = xtarget;
1644 	      spec_target = xspec_target;
1645 	      spec_target_subreg = xspec_target_subreg;
1646 	    }
1647 	  else
1648 	    {
1649 	      delete_insns_since (last);
1650 	      target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1651 						bitpos, target, 0);
1652 	    }
1653 	}
1654       else
1655       extv_loses:
1656 	target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1657 					  bitpos, target, 0);
1658     }
1659   if (target == spec_target)
1660     return target;
1661   if (target == spec_target_subreg)
1662     return spec_target;
1663   if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1664     {
1665       /* If the target mode is not a scalar integral, first convert to the
1666 	 integer mode of that size and then access it as a floating-point
1667 	 value via a SUBREG.  */
1668       if (!SCALAR_INT_MODE_P (tmode))
1669 	{
1670 	  enum machine_mode smode
1671 	    = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1672 	  target = convert_to_mode (smode, target, unsignedp);
1673 	  target = force_reg (smode, target);
1674 	  return gen_lowpart (tmode, target);
1675 	}
1676 
1677       return convert_to_mode (tmode, target, unsignedp);
1678     }
1679   return target;
1680 }
1681 
1682 /* Extract a bit field using shifts and boolean operations
1683    Returns an rtx to represent the value.
1684    OP0 addresses a register (word) or memory (byte).
1685    BITPOS says which bit within the word or byte the bit field starts in.
1686    OFFSET says how many bytes farther the bit field starts;
1687     it is 0 if OP0 is a register.
1688    BITSIZE says how many bits long the bit field is.
1689     (If OP0 is a register, it may be narrower than a full word,
1690      but BITPOS still counts within a full word,
1691      which is significant on bigendian machines.)
1692 
1693    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1694    If TARGET is nonzero, attempts to store the value there
1695    and return TARGET, but this is not guaranteed.
1696    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1697 
1698 static rtx
extract_fixed_bit_field(enum machine_mode tmode,rtx op0,unsigned HOST_WIDE_INT offset,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitpos,rtx target,int unsignedp)1699 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1700 			 unsigned HOST_WIDE_INT offset,
1701 			 unsigned HOST_WIDE_INT bitsize,
1702 			 unsigned HOST_WIDE_INT bitpos, rtx target,
1703 			 int unsignedp)
1704 {
1705   unsigned int total_bits = BITS_PER_WORD;
1706   enum machine_mode mode;
1707 
1708   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1709     {
1710       /* Special treatment for a bit field split across two registers.  */
1711       if (bitsize + bitpos > BITS_PER_WORD)
1712 	return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1713     }
1714   else
1715     {
1716       /* Get the proper mode to use for this field.  We want a mode that
1717 	 includes the entire field.  If such a mode would be larger than
1718 	 a word, we won't be doing the extraction the normal way.  */
1719 
1720       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1721 			    MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1722 
1723       if (mode == VOIDmode)
1724 	/* The only way this should occur is if the field spans word
1725 	   boundaries.  */
1726 	return extract_split_bit_field (op0, bitsize,
1727 					bitpos + offset * BITS_PER_UNIT,
1728 					unsignedp);
1729 
1730       total_bits = GET_MODE_BITSIZE (mode);
1731 
1732       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1733 	 be in the range 0 to total_bits-1, and put any excess bytes in
1734 	 OFFSET.  */
1735       if (bitpos >= total_bits)
1736 	{
1737 	  offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1738 	  bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1739 		     * BITS_PER_UNIT);
1740 	}
1741 
1742       /* Get ref to an aligned byte, halfword, or word containing the field.
1743 	 Adjust BITPOS to be position within a word,
1744 	 and OFFSET to be the offset of that word.
1745 	 Then alter OP0 to refer to that word.  */
1746       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1747       offset -= (offset % (total_bits / BITS_PER_UNIT));
1748       op0 = adjust_address (op0, mode, offset);
1749     }
1750 
1751   mode = GET_MODE (op0);
1752 
1753   if (BYTES_BIG_ENDIAN)
1754     /* BITPOS is the distance between our msb and that of OP0.
1755        Convert it to the distance from the lsb.  */
1756     bitpos = total_bits - bitsize - bitpos;
1757 
1758   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1759      We have reduced the big-endian case to the little-endian case.  */
1760 
1761   if (unsignedp)
1762     {
1763       if (bitpos)
1764 	{
1765 	  /* If the field does not already start at the lsb,
1766 	     shift it so it does.  */
1767 	  tree amount = build_int_cst (NULL_TREE, bitpos);
1768 	  /* Maybe propagate the target for the shift.  */
1769 	  /* But not if we will return it--could confuse integrate.c.  */
1770 	  rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1771 	  if (tmode != mode) subtarget = 0;
1772 	  op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1773 	}
1774       /* Convert the value to the desired mode.  */
1775       if (mode != tmode)
1776 	op0 = convert_to_mode (tmode, op0, 1);
1777 
1778       /* Unless the msb of the field used to be the msb when we shifted,
1779 	 mask out the upper bits.  */
1780 
1781       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1782 	return expand_binop (GET_MODE (op0), and_optab, op0,
1783 			     mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1784 			     target, 1, OPTAB_LIB_WIDEN);
1785       return op0;
1786     }
1787 
1788   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1789      then arithmetic-shift its lsb to the lsb of the word.  */
1790   op0 = force_reg (mode, op0);
1791   if (mode != tmode)
1792     target = 0;
1793 
1794   /* Find the narrowest integer mode that contains the field.  */
1795 
1796   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1797        mode = GET_MODE_WIDER_MODE (mode))
1798     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1799       {
1800 	op0 = convert_to_mode (mode, op0, 0);
1801 	break;
1802       }
1803 
1804   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1805     {
1806       tree amount
1807 	= build_int_cst (NULL_TREE,
1808 			 GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1809       /* Maybe propagate the target for the shift.  */
1810       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1811       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1812     }
1813 
1814   return expand_shift (RSHIFT_EXPR, mode, op0,
1815 		       build_int_cst (NULL_TREE,
1816 				      GET_MODE_BITSIZE (mode) - bitsize),
1817 		       target, 0);
1818 }
1819 
1820 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1821    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1822    complement of that if COMPLEMENT.  The mask is truncated if
1823    necessary to the width of mode MODE.  The mask is zero-extended if
1824    BITSIZE+BITPOS is too small for MODE.  */
1825 
1826 static rtx
mask_rtx(enum machine_mode mode,int bitpos,int bitsize,int complement)1827 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1828 {
1829   HOST_WIDE_INT masklow, maskhigh;
1830 
1831   if (bitsize == 0)
1832     masklow = 0;
1833   else if (bitpos < HOST_BITS_PER_WIDE_INT)
1834     masklow = (HOST_WIDE_INT) -1 << bitpos;
1835   else
1836     masklow = 0;
1837 
1838   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1839     masklow &= ((unsigned HOST_WIDE_INT) -1
1840 		>> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1841 
1842   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1843     maskhigh = -1;
1844   else
1845     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1846 
1847   if (bitsize == 0)
1848     maskhigh = 0;
1849   else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1850     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1851 		 >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1852   else
1853     maskhigh = 0;
1854 
1855   if (complement)
1856     {
1857       maskhigh = ~maskhigh;
1858       masklow = ~masklow;
1859     }
1860 
1861   return immed_double_const (masklow, maskhigh, mode);
1862 }
1863 
1864 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1865    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1866 
1867 static rtx
lshift_value(enum machine_mode mode,rtx value,int bitpos,int bitsize)1868 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1869 {
1870   unsigned HOST_WIDE_INT v = INTVAL (value);
1871   HOST_WIDE_INT low, high;
1872 
1873   if (bitsize < HOST_BITS_PER_WIDE_INT)
1874     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1875 
1876   if (bitpos < HOST_BITS_PER_WIDE_INT)
1877     {
1878       low = v << bitpos;
1879       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1880     }
1881   else
1882     {
1883       low = 0;
1884       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1885     }
1886 
1887   return immed_double_const (low, high, mode);
1888 }
1889 
1890 /* Extract a bit field from a memory by forcing the alignment of the
1891    memory.  This efficient only if the field spans at least 4 boundaries.
1892 
1893    OP0 is the MEM.
1894    BITSIZE is the field width; BITPOS is the position of the first bit.
1895    UNSIGNEDP is true if the result should be zero-extended.  */
1896 
1897 static rtx
extract_force_align_mem_bit_field(rtx op0,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitpos,int unsignedp)1898 extract_force_align_mem_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1899 				   unsigned HOST_WIDE_INT bitpos,
1900 				   int unsignedp)
1901 {
1902   enum machine_mode mode, dmode;
1903   unsigned int m_bitsize, m_size;
1904   unsigned int sign_shift_up, sign_shift_dn;
1905   rtx base, a1, a2, v1, v2, comb, shift, result, start;
1906 
1907   /* Choose a mode that will fit BITSIZE.  */
1908   mode = smallest_mode_for_size (bitsize, MODE_INT);
1909   m_size = GET_MODE_SIZE (mode);
1910   m_bitsize = GET_MODE_BITSIZE (mode);
1911 
1912   /* Choose a mode twice as wide.  Fail if no such mode exists.  */
1913   dmode = mode_for_size (m_bitsize * 2, MODE_INT, false);
1914   if (dmode == BLKmode)
1915     return NULL;
1916 
1917   do_pending_stack_adjust ();
1918   start = get_last_insn ();
1919 
1920   /* At the end, we'll need an additional shift to deal with sign/zero
1921      extension.  By default this will be a left+right shift of the
1922      appropriate size.  But we may be able to eliminate one of them.  */
1923   sign_shift_up = sign_shift_dn = m_bitsize - bitsize;
1924 
1925   if (STRICT_ALIGNMENT)
1926     {
1927       base = plus_constant (XEXP (op0, 0), bitpos / BITS_PER_UNIT);
1928       bitpos %= BITS_PER_UNIT;
1929 
1930       /* We load two values to be concatenate.  There's an edge condition
1931 	 that bears notice -- an aligned value at the end of a page can
1932 	 only load one value lest we segfault.  So the two values we load
1933 	 are at "base & -size" and "(base + size - 1) & -size".  If base
1934 	 is unaligned, the addresses will be aligned and sequential; if
1935 	 base is aligned, the addresses will both be equal to base.  */
1936 
1937       a1 = expand_simple_binop (Pmode, AND, force_operand (base, NULL),
1938 				GEN_INT (-(HOST_WIDE_INT)m_size),
1939 				NULL, true, OPTAB_LIB_WIDEN);
1940       mark_reg_pointer (a1, m_bitsize);
1941       v1 = gen_rtx_MEM (mode, a1);
1942       set_mem_align (v1, m_bitsize);
1943       v1 = force_reg (mode, validize_mem (v1));
1944 
1945       a2 = plus_constant (base, GET_MODE_SIZE (mode) - 1);
1946       a2 = expand_simple_binop (Pmode, AND, force_operand (a2, NULL),
1947 				GEN_INT (-(HOST_WIDE_INT)m_size),
1948 				NULL, true, OPTAB_LIB_WIDEN);
1949       v2 = gen_rtx_MEM (mode, a2);
1950       set_mem_align (v2, m_bitsize);
1951       v2 = force_reg (mode, validize_mem (v2));
1952 
1953       /* Combine these two values into a double-word value.  */
1954       if (m_bitsize == BITS_PER_WORD)
1955 	{
1956 	  comb = gen_reg_rtx (dmode);
1957 	  emit_insn (gen_rtx_CLOBBER (VOIDmode, comb));
1958 	  emit_move_insn (gen_rtx_SUBREG (mode, comb, 0), v1);
1959 	  emit_move_insn (gen_rtx_SUBREG (mode, comb, m_size), v2);
1960 	}
1961       else
1962 	{
1963 	  if (BYTES_BIG_ENDIAN)
1964 	    comb = v1, v1 = v2, v2 = comb;
1965 	  v1 = convert_modes (dmode, mode, v1, true);
1966 	  if (v1 == NULL)
1967 	    goto fail;
1968 	  v2 = convert_modes (dmode, mode, v2, true);
1969 	  v2 = expand_simple_binop (dmode, ASHIFT, v2, GEN_INT (m_bitsize),
1970 				    NULL, true, OPTAB_LIB_WIDEN);
1971 	  if (v2 == NULL)
1972 	    goto fail;
1973 	  comb = expand_simple_binop (dmode, IOR, v1, v2, NULL,
1974 				      true, OPTAB_LIB_WIDEN);
1975 	  if (comb == NULL)
1976 	    goto fail;
1977 	}
1978 
1979       shift = expand_simple_binop (Pmode, AND, base, GEN_INT (m_size - 1),
1980 				   NULL, true, OPTAB_LIB_WIDEN);
1981       shift = expand_mult (Pmode, shift, GEN_INT (BITS_PER_UNIT), NULL, 1);
1982 
1983       if (bitpos != 0)
1984 	{
1985 	  if (sign_shift_up <= bitpos)
1986 	    bitpos -= sign_shift_up, sign_shift_up = 0;
1987 	  shift = expand_simple_binop (Pmode, PLUS, shift, GEN_INT (bitpos),
1988 				       NULL, true, OPTAB_LIB_WIDEN);
1989 	}
1990     }
1991   else
1992     {
1993       unsigned HOST_WIDE_INT offset = bitpos / BITS_PER_UNIT;
1994       bitpos %= BITS_PER_UNIT;
1995 
1996       /* When strict alignment is not required, we can just load directly
1997 	 from memory without masking.  If the remaining BITPOS offset is
1998 	 small enough, we may be able to do all operations in MODE as
1999 	 opposed to DMODE.  */
2000       if (bitpos + bitsize <= m_bitsize)
2001 	dmode = mode;
2002       comb = adjust_address (op0, dmode, offset);
2003 
2004       if (sign_shift_up <= bitpos)
2005 	bitpos -= sign_shift_up, sign_shift_up = 0;
2006       shift = GEN_INT (bitpos);
2007     }
2008 
2009   /* Shift down the double-word such that the requested value is at bit 0.  */
2010   if (shift != const0_rtx)
2011     comb = expand_simple_binop (dmode, unsignedp ? LSHIFTRT : ASHIFTRT,
2012 				comb, shift, NULL, unsignedp, OPTAB_LIB_WIDEN);
2013   if (comb == NULL)
2014     goto fail;
2015 
2016   /* If the field exactly matches MODE, then all we need to do is return the
2017      lowpart.  Otherwise, shift to get the sign bits set properly.  */
2018   result = force_reg (mode, gen_lowpart (mode, comb));
2019 
2020   if (sign_shift_up)
2021     result = expand_simple_binop (mode, ASHIFT, result,
2022 				  GEN_INT (sign_shift_up),
2023 				  NULL_RTX, 0, OPTAB_LIB_WIDEN);
2024   if (sign_shift_dn)
2025     result = expand_simple_binop (mode, unsignedp ? LSHIFTRT : ASHIFTRT,
2026 				  result, GEN_INT (sign_shift_dn),
2027 				  NULL_RTX, 0, OPTAB_LIB_WIDEN);
2028 
2029   return result;
2030 
2031  fail:
2032   delete_insns_since (start);
2033   return NULL;
2034 }
2035 
2036 /* Extract a bit field that is split across two words
2037    and return an RTX for the result.
2038 
2039    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2040    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2041    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
2042 
2043 static rtx
extract_split_bit_field(rtx op0,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitpos,int unsignedp)2044 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2045 			 unsigned HOST_WIDE_INT bitpos, int unsignedp)
2046 {
2047   unsigned int unit;
2048   unsigned int bitsdone = 0;
2049   rtx result = NULL_RTX;
2050   int first = 1;
2051 
2052   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2053      much at a time.  */
2054   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2055     unit = BITS_PER_WORD;
2056   else
2057     {
2058       unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2059       if (0 && bitsize / unit > 2)
2060 	{
2061 	  rtx tmp = extract_force_align_mem_bit_field (op0, bitsize, bitpos,
2062 						       unsignedp);
2063 	  if (tmp)
2064 	    return tmp;
2065 	}
2066     }
2067 
2068   while (bitsdone < bitsize)
2069     {
2070       unsigned HOST_WIDE_INT thissize;
2071       rtx part, word;
2072       unsigned HOST_WIDE_INT thispos;
2073       unsigned HOST_WIDE_INT offset;
2074 
2075       offset = (bitpos + bitsdone) / unit;
2076       thispos = (bitpos + bitsdone) % unit;
2077 
2078       /* THISSIZE must not overrun a word boundary.  Otherwise,
2079 	 extract_fixed_bit_field will call us again, and we will mutually
2080 	 recurse forever.  */
2081       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2082       thissize = MIN (thissize, unit - thispos);
2083 
2084       /* If OP0 is a register, then handle OFFSET here.
2085 
2086 	 When handling multiword bitfields, extract_bit_field may pass
2087 	 down a word_mode SUBREG of a larger REG for a bitfield that actually
2088 	 crosses a word boundary.  Thus, for a SUBREG, we must find
2089 	 the current word starting from the base register.  */
2090       if (GET_CODE (op0) == SUBREG)
2091 	{
2092 	  int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
2093 	  word = operand_subword_force (SUBREG_REG (op0), word_offset,
2094 					GET_MODE (SUBREG_REG (op0)));
2095 	  offset = 0;
2096 	}
2097       else if (REG_P (op0))
2098 	{
2099 	  word = operand_subword_force (op0, offset, GET_MODE (op0));
2100 	  offset = 0;
2101 	}
2102       else
2103 	word = op0;
2104 
2105       /* Extract the parts in bit-counting order,
2106 	 whose meaning is determined by BYTES_PER_UNIT.
2107 	 OFFSET is in UNITs, and UNIT is in bits.
2108 	 extract_fixed_bit_field wants offset in bytes.  */
2109       part = extract_fixed_bit_field (word_mode, word,
2110 				      offset * unit / BITS_PER_UNIT,
2111 				      thissize, thispos, 0, 1);
2112       bitsdone += thissize;
2113 
2114       /* Shift this part into place for the result.  */
2115       if (BYTES_BIG_ENDIAN)
2116 	{
2117 	  if (bitsize != bitsdone)
2118 	    part = expand_shift (LSHIFT_EXPR, word_mode, part,
2119 				 build_int_cst (NULL_TREE, bitsize - bitsdone),
2120 				 0, 1);
2121 	}
2122       else
2123 	{
2124 	  if (bitsdone != thissize)
2125 	    part = expand_shift (LSHIFT_EXPR, word_mode, part,
2126 				 build_int_cst (NULL_TREE,
2127 						bitsdone - thissize), 0, 1);
2128 	}
2129 
2130       if (first)
2131 	result = part;
2132       else
2133 	/* Combine the parts with bitwise or.  This works
2134 	   because we extracted each part as an unsigned bit field.  */
2135 	result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2136 			       OPTAB_LIB_WIDEN);
2137 
2138       first = 0;
2139     }
2140 
2141   /* Unsigned bit field: we are done.  */
2142   if (unsignedp)
2143     return result;
2144   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2145   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2146 			 build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2147 			 NULL_RTX, 0);
2148   return expand_shift (RSHIFT_EXPR, word_mode, result,
2149 		       build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2150 		       NULL_RTX, 0);
2151 }
2152 
2153 /* Add INC into TARGET.  */
2154 
2155 void
expand_inc(rtx target,rtx inc)2156 expand_inc (rtx target, rtx inc)
2157 {
2158   rtx value = expand_binop (GET_MODE (target), add_optab,
2159 			    target, inc,
2160 			    target, 0, OPTAB_LIB_WIDEN);
2161   if (value != target)
2162     emit_move_insn (target, value);
2163 }
2164 
2165 /* Subtract DEC from TARGET.  */
2166 
2167 void
expand_dec(rtx target,rtx dec)2168 expand_dec (rtx target, rtx dec)
2169 {
2170   rtx value = expand_binop (GET_MODE (target), sub_optab,
2171 			    target, dec,
2172 			    target, 0, OPTAB_LIB_WIDEN);
2173   if (value != target)
2174     emit_move_insn (target, value);
2175 }
2176 
2177 /* Output a shift instruction for expression code CODE,
2178    with SHIFTED being the rtx for the value to shift,
2179    and AMOUNT the tree for the amount to shift by.
2180    Store the result in the rtx TARGET, if that is convenient.
2181    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2182    Return the rtx for where the value is.  */
2183 
2184 rtx
expand_shift(enum tree_code code,enum machine_mode mode,rtx shifted,tree amount,rtx target,int unsignedp)2185 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2186 	      tree amount, rtx target, int unsignedp)
2187 {
2188   rtx op1, temp = 0;
2189   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2190   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2191   int try;
2192 
2193   /* Previously detected shift-counts computed by NEGATE_EXPR
2194      and shifted in the other direction; but that does not work
2195      on all machines.  */
2196 
2197   op1 = expand_normal (amount);
2198 
2199   if (SHIFT_COUNT_TRUNCATED)
2200     {
2201       if (GET_CODE (op1) == CONST_INT
2202 	  && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2203 	      (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2204 	op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2205 		       % GET_MODE_BITSIZE (mode));
2206       else if (GET_CODE (op1) == SUBREG
2207 	       && subreg_lowpart_p (op1))
2208 	op1 = SUBREG_REG (op1);
2209     }
2210 
2211   if (op1 == const0_rtx)
2212     return shifted;
2213 
2214   /* Check whether its cheaper to implement a left shift by a constant
2215      bit count by a sequence of additions.  */
2216   if (code == LSHIFT_EXPR
2217       && GET_CODE (op1) == CONST_INT
2218       && INTVAL (op1) > 0
2219       && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2220       && INTVAL (op1) < MAX_BITS_PER_WORD
2221       && shift_cost[mode][INTVAL (op1)] > INTVAL (op1) * add_cost[mode]
2222       && shift_cost[mode][INTVAL (op1)] != MAX_COST)
2223     {
2224       int i;
2225       for (i = 0; i < INTVAL (op1); i++)
2226 	{
2227 	  temp = force_reg (mode, shifted);
2228 	  shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2229 				  unsignedp, OPTAB_LIB_WIDEN);
2230 	}
2231       return shifted;
2232     }
2233 
2234   for (try = 0; temp == 0 && try < 3; try++)
2235     {
2236       enum optab_methods methods;
2237 
2238       if (try == 0)
2239 	methods = OPTAB_DIRECT;
2240       else if (try == 1)
2241 	methods = OPTAB_WIDEN;
2242       else
2243 	methods = OPTAB_LIB_WIDEN;
2244 
2245       if (rotate)
2246 	{
2247 	  /* Widening does not work for rotation.  */
2248 	  if (methods == OPTAB_WIDEN)
2249 	    continue;
2250 	  else if (methods == OPTAB_LIB_WIDEN)
2251 	    {
2252 	      /* If we have been unable to open-code this by a rotation,
2253 		 do it as the IOR of two shifts.  I.e., to rotate A
2254 		 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2255 		 where C is the bitsize of A.
2256 
2257 		 It is theoretically possible that the target machine might
2258 		 not be able to perform either shift and hence we would
2259 		 be making two libcalls rather than just the one for the
2260 		 shift (similarly if IOR could not be done).  We will allow
2261 		 this extremely unlikely lossage to avoid complicating the
2262 		 code below.  */
2263 
2264 	      rtx subtarget = target == shifted ? 0 : target;
2265 	      tree new_amount, other_amount;
2266 	      rtx temp1;
2267 	      tree type = TREE_TYPE (amount);
2268 	      if (GET_MODE (op1) != TYPE_MODE (type)
2269 		  && GET_MODE (op1) != VOIDmode)
2270 		op1 = convert_to_mode (TYPE_MODE (type), op1, 1);
2271 	      new_amount = make_tree (type, op1);
2272 	      other_amount
2273 		= fold_build2 (MINUS_EXPR, type,
2274 			       build_int_cst (type, GET_MODE_BITSIZE (mode)),
2275 			       new_amount);
2276 
2277 	      shifted = force_reg (mode, shifted);
2278 
2279 	      temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2280 				   mode, shifted, new_amount, 0, 1);
2281 	      temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2282 				    mode, shifted, other_amount, subtarget, 1);
2283 	      return expand_binop (mode, ior_optab, temp, temp1, target,
2284 				   unsignedp, methods);
2285 	    }
2286 
2287 	  temp = expand_binop (mode,
2288 			       left ? rotl_optab : rotr_optab,
2289 			       shifted, op1, target, unsignedp, methods);
2290 	}
2291       else if (unsignedp)
2292 	temp = expand_binop (mode,
2293 			     left ? ashl_optab : lshr_optab,
2294 			     shifted, op1, target, unsignedp, methods);
2295 
2296       /* Do arithmetic shifts.
2297 	 Also, if we are going to widen the operand, we can just as well
2298 	 use an arithmetic right-shift instead of a logical one.  */
2299       if (temp == 0 && ! rotate
2300 	  && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2301 	{
2302 	  enum optab_methods methods1 = methods;
2303 
2304 	  /* If trying to widen a log shift to an arithmetic shift,
2305 	     don't accept an arithmetic shift of the same size.  */
2306 	  if (unsignedp)
2307 	    methods1 = OPTAB_MUST_WIDEN;
2308 
2309 	  /* Arithmetic shift */
2310 
2311 	  temp = expand_binop (mode,
2312 			       left ? ashl_optab : ashr_optab,
2313 			       shifted, op1, target, unsignedp, methods1);
2314 	}
2315 
2316       /* We used to try extzv here for logical right shifts, but that was
2317 	 only useful for one machine, the VAX, and caused poor code
2318 	 generation there for lshrdi3, so the code was deleted and a
2319 	 define_expand for lshrsi3 was added to vax.md.  */
2320     }
2321 
2322   gcc_assert (temp);
2323   return temp;
2324 }
2325 
2326 enum alg_code {
2327   alg_unknown,
2328   alg_zero,
2329   alg_m, alg_shift,
2330   alg_add_t_m2,
2331   alg_sub_t_m2,
2332   alg_add_factor,
2333   alg_sub_factor,
2334   alg_add_t2_m,
2335   alg_sub_t2_m,
2336   alg_impossible
2337 };
2338 
2339 /* This structure holds the "cost" of a multiply sequence.  The
2340    "cost" field holds the total rtx_cost of every operator in the
2341    synthetic multiplication sequence, hence cost(a op b) is defined
2342    as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
2343    The "latency" field holds the minimum possible latency of the
2344    synthetic multiply, on a hypothetical infinitely parallel CPU.
2345    This is the critical path, or the maximum height, of the expression
2346    tree which is the sum of rtx_costs on the most expensive path from
2347    any leaf to the root.  Hence latency(a op b) is defined as zero for
2348    leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise.  */
2349 
2350 struct mult_cost {
2351   short cost;     /* Total rtx_cost of the multiplication sequence.  */
2352   short latency;  /* The latency of the multiplication sequence.  */
2353 };
2354 
2355 /* This macro is used to compare a pointer to a mult_cost against an
2356    single integer "rtx_cost" value.  This is equivalent to the macro
2357    CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}.  */
2358 #define MULT_COST_LESS(X,Y) ((X)->cost < (Y)	\
2359 			     || ((X)->cost == (Y) && (X)->latency < (Y)))
2360 
2361 /* This macro is used to compare two pointers to mult_costs against
2362    each other.  The macro returns true if X is cheaper than Y.
2363    Currently, the cheaper of two mult_costs is the one with the
2364    lower "cost".  If "cost"s are tied, the lower latency is cheaper.  */
2365 #define CHEAPER_MULT_COST(X,Y)  ((X)->cost < (Y)->cost		\
2366 				 || ((X)->cost == (Y)->cost	\
2367 				     && (X)->latency < (Y)->latency))
2368 
2369 /* This structure records a sequence of operations.
2370    `ops' is the number of operations recorded.
2371    `cost' is their total cost.
2372    The operations are stored in `op' and the corresponding
2373    logarithms of the integer coefficients in `log'.
2374 
2375    These are the operations:
2376    alg_zero		total := 0;
2377    alg_m		total := multiplicand;
2378    alg_shift		total := total * coeff
2379    alg_add_t_m2		total := total + multiplicand * coeff;
2380    alg_sub_t_m2		total := total - multiplicand * coeff;
2381    alg_add_factor	total := total * coeff + total;
2382    alg_sub_factor	total := total * coeff - total;
2383    alg_add_t2_m		total := total * coeff + multiplicand;
2384    alg_sub_t2_m		total := total * coeff - multiplicand;
2385 
2386    The first operand must be either alg_zero or alg_m.  */
2387 
2388 struct algorithm
2389 {
2390   struct mult_cost cost;
2391   short ops;
2392   /* The size of the OP and LOG fields are not directly related to the
2393      word size, but the worst-case algorithms will be if we have few
2394      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2395      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2396      in total wordsize operations.  */
2397   enum alg_code op[MAX_BITS_PER_WORD];
2398   char log[MAX_BITS_PER_WORD];
2399 };
2400 
2401 /* The entry for our multiplication cache/hash table.  */
2402 struct alg_hash_entry {
2403   /* The number we are multiplying by.  */
2404   unsigned HOST_WIDE_INT t;
2405 
2406   /* The mode in which we are multiplying something by T.  */
2407   enum machine_mode mode;
2408 
2409   /* The best multiplication algorithm for t.  */
2410   enum alg_code alg;
2411 
2412   /* The cost of multiplication if ALG_CODE is not alg_impossible.
2413      Otherwise, the cost within which multiplication by T is
2414      impossible.  */
2415   struct mult_cost cost;
2416 };
2417 
2418 /* The number of cache/hash entries.  */
2419 #if HOST_BITS_PER_WIDE_INT == 64
2420 #define NUM_ALG_HASH_ENTRIES 1031
2421 #else
2422 #define NUM_ALG_HASH_ENTRIES 307
2423 #endif
2424 
2425 /* Each entry of ALG_HASH caches alg_code for some integer.  This is
2426    actually a hash table.  If we have a collision, that the older
2427    entry is kicked out.  */
2428 static struct alg_hash_entry alg_hash[NUM_ALG_HASH_ENTRIES];
2429 
2430 /* Indicates the type of fixup needed after a constant multiplication.
2431    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2432    the result should be negated, and ADD_VARIANT means that the
2433    multiplicand should be added to the result.  */
2434 enum mult_variant {basic_variant, negate_variant, add_variant};
2435 
2436 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2437 			const struct mult_cost *, enum machine_mode mode);
2438 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2439 				 struct algorithm *, enum mult_variant *, int);
2440 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2441 			      const struct algorithm *, enum mult_variant);
2442 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2443 						 int, rtx *, int *, int *);
2444 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2445 static rtx extract_high_half (enum machine_mode, rtx);
2446 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2447 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2448 				       int, int);
2449 /* Compute and return the best algorithm for multiplying by T.
2450    The algorithm must cost less than cost_limit
2451    If retval.cost >= COST_LIMIT, no algorithm was found and all
2452    other field of the returned struct are undefined.
2453    MODE is the machine mode of the multiplication.  */
2454 
2455 static void
synth_mult(struct algorithm * alg_out,unsigned HOST_WIDE_INT t,const struct mult_cost * cost_limit,enum machine_mode mode)2456 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2457 	    const struct mult_cost *cost_limit, enum machine_mode mode)
2458 {
2459   int m;
2460   struct algorithm *alg_in, *best_alg;
2461   struct mult_cost best_cost;
2462   struct mult_cost new_limit;
2463   int op_cost, op_latency;
2464   unsigned HOST_WIDE_INT q;
2465   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2466   int hash_index;
2467   bool cache_hit = false;
2468   enum alg_code cache_alg = alg_zero;
2469 
2470   /* Indicate that no algorithm is yet found.  If no algorithm
2471      is found, this value will be returned and indicate failure.  */
2472   alg_out->cost.cost = cost_limit->cost + 1;
2473   alg_out->cost.latency = cost_limit->latency + 1;
2474 
2475   if (cost_limit->cost < 0
2476       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2477     return;
2478 
2479   /* Restrict the bits of "t" to the multiplication's mode.  */
2480   t &= GET_MODE_MASK (mode);
2481 
2482   /* t == 1 can be done in zero cost.  */
2483   if (t == 1)
2484     {
2485       alg_out->ops = 1;
2486       alg_out->cost.cost = 0;
2487       alg_out->cost.latency = 0;
2488       alg_out->op[0] = alg_m;
2489       return;
2490     }
2491 
2492   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2493      fail now.  */
2494   if (t == 0)
2495     {
2496       if (MULT_COST_LESS (cost_limit, zero_cost))
2497 	return;
2498       else
2499 	{
2500 	  alg_out->ops = 1;
2501 	  alg_out->cost.cost = zero_cost;
2502 	  alg_out->cost.latency = zero_cost;
2503 	  alg_out->op[0] = alg_zero;
2504 	  return;
2505 	}
2506     }
2507 
2508   /* We'll be needing a couple extra algorithm structures now.  */
2509 
2510   alg_in = alloca (sizeof (struct algorithm));
2511   best_alg = alloca (sizeof (struct algorithm));
2512   best_cost = *cost_limit;
2513 
2514   /* Compute the hash index.  */
2515   hash_index = (t ^ (unsigned int) mode) % NUM_ALG_HASH_ENTRIES;
2516 
2517   /* See if we already know what to do for T.  */
2518   if (alg_hash[hash_index].t == t
2519       && alg_hash[hash_index].mode == mode
2520       && alg_hash[hash_index].alg != alg_unknown)
2521     {
2522       cache_alg = alg_hash[hash_index].alg;
2523 
2524       if (cache_alg == alg_impossible)
2525 	{
2526 	  /* The cache tells us that it's impossible to synthesize
2527 	     multiplication by T within alg_hash[hash_index].cost.  */
2528 	  if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2529 	    /* COST_LIMIT is at least as restrictive as the one
2530 	       recorded in the hash table, in which case we have no
2531 	       hope of synthesizing a multiplication.  Just
2532 	       return.  */
2533 	    return;
2534 
2535 	  /* If we get here, COST_LIMIT is less restrictive than the
2536 	     one recorded in the hash table, so we may be able to
2537 	     synthesize a multiplication.  Proceed as if we didn't
2538 	     have the cache entry.  */
2539 	}
2540       else
2541 	{
2542 	  if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2543 	    /* The cached algorithm shows that this multiplication
2544 	       requires more cost than COST_LIMIT.  Just return.  This
2545 	       way, we don't clobber this cache entry with
2546 	       alg_impossible but retain useful information.  */
2547 	    return;
2548 
2549 	  cache_hit = true;
2550 
2551 	  switch (cache_alg)
2552 	    {
2553 	    case alg_shift:
2554 	      goto do_alg_shift;
2555 
2556 	    case alg_add_t_m2:
2557 	    case alg_sub_t_m2:
2558 	      goto do_alg_addsub_t_m2;
2559 
2560 	    case alg_add_factor:
2561 	    case alg_sub_factor:
2562 	      goto do_alg_addsub_factor;
2563 
2564 	    case alg_add_t2_m:
2565 	      goto do_alg_add_t2_m;
2566 
2567 	    case alg_sub_t2_m:
2568 	      goto do_alg_sub_t2_m;
2569 
2570 	    default:
2571 	      gcc_unreachable ();
2572 	    }
2573 	}
2574     }
2575 
2576   /* If we have a group of zero bits at the low-order part of T, try
2577      multiplying by the remaining bits and then doing a shift.  */
2578 
2579   if ((t & 1) == 0)
2580     {
2581     do_alg_shift:
2582       m = floor_log2 (t & -t);	/* m = number of low zero bits */
2583       if (m < maxm)
2584 	{
2585 	  q = t >> m;
2586 	  /* The function expand_shift will choose between a shift and
2587 	     a sequence of additions, so the observed cost is given as
2588 	     MIN (m * add_cost[mode], shift_cost[mode][m]).  */
2589 	  op_cost = m * add_cost[mode];
2590 	  if (shift_cost[mode][m] < op_cost)
2591 	    op_cost = shift_cost[mode][m];
2592 	  new_limit.cost = best_cost.cost - op_cost;
2593 	  new_limit.latency = best_cost.latency - op_cost;
2594 	  synth_mult (alg_in, q, &new_limit, mode);
2595 
2596 	  alg_in->cost.cost += op_cost;
2597 	  alg_in->cost.latency += op_cost;
2598 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2599 	    {
2600 	      struct algorithm *x;
2601 	      best_cost = alg_in->cost;
2602 	      x = alg_in, alg_in = best_alg, best_alg = x;
2603 	      best_alg->log[best_alg->ops] = m;
2604 	      best_alg->op[best_alg->ops] = alg_shift;
2605 	    }
2606 	}
2607       if (cache_hit)
2608 	goto done;
2609     }
2610 
2611   /* If we have an odd number, add or subtract one.  */
2612   if ((t & 1) != 0)
2613     {
2614       unsigned HOST_WIDE_INT w;
2615 
2616     do_alg_addsub_t_m2:
2617       for (w = 1; (w & t) != 0; w <<= 1)
2618 	;
2619       /* APPLE LOCAL begin 7744816 DImode multiply by 0xffffffffULL */
2620       if (w > 2
2621 	      /* Reject the case where t is 3.
2622 		 Thus we prefer addition in that case.  */
2623 	      && t != 3)
2624       /* APPLE LOCAL end 7744816 DImode multiply by 0xffffffffULL */
2625 	{
2626 	  /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2627 
2628 	  op_cost = add_cost[mode];
2629 	  new_limit.cost = best_cost.cost - op_cost;
2630 	  new_limit.latency = best_cost.latency - op_cost;
2631 	  synth_mult (alg_in, t + 1, &new_limit, mode);
2632 
2633 	  alg_in->cost.cost += op_cost;
2634 	  alg_in->cost.latency += op_cost;
2635 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2636 	    {
2637 	      struct algorithm *x;
2638 	      best_cost = alg_in->cost;
2639 	      x = alg_in, alg_in = best_alg, best_alg = x;
2640 	      best_alg->log[best_alg->ops] = 0;
2641 	      best_alg->op[best_alg->ops] = alg_sub_t_m2;
2642 	    }
2643 	}
2644       else
2645 	{
2646 	  /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2647 
2648 	  op_cost = add_cost[mode];
2649 	  new_limit.cost = best_cost.cost - op_cost;
2650 	  new_limit.latency = best_cost.latency - op_cost;
2651 	  synth_mult (alg_in, t - 1, &new_limit, mode);
2652 
2653 	  alg_in->cost.cost += op_cost;
2654 	  alg_in->cost.latency += op_cost;
2655 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2656 	    {
2657 	      struct algorithm *x;
2658 	      best_cost = alg_in->cost;
2659 	      x = alg_in, alg_in = best_alg, best_alg = x;
2660 	      best_alg->log[best_alg->ops] = 0;
2661 	      best_alg->op[best_alg->ops] = alg_add_t_m2;
2662 	    }
2663 	}
2664       if (cache_hit)
2665 	goto done;
2666     }
2667 
2668   /* Look for factors of t of the form
2669      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2670      If we find such a factor, we can multiply by t using an algorithm that
2671      multiplies by q, shift the result by m and add/subtract it to itself.
2672 
2673      We search for large factors first and loop down, even if large factors
2674      are less probable than small; if we find a large factor we will find a
2675      good sequence quickly, and therefore be able to prune (by decreasing
2676      COST_LIMIT) the search.  */
2677 
2678  do_alg_addsub_factor:
2679   for (m = floor_log2 (t - 1); m >= 2; m--)
2680     {
2681       unsigned HOST_WIDE_INT d;
2682 
2683       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2684       if (t % d == 0 && t > d && m < maxm
2685 	  && (!cache_hit || cache_alg == alg_add_factor))
2686 	{
2687 	  /* If the target has a cheap shift-and-add instruction use
2688 	     that in preference to a shift insn followed by an add insn.
2689 	     Assume that the shift-and-add is "atomic" with a latency
2690 	     equal to its cost, otherwise assume that on superscalar
2691 	     hardware the shift may be executed concurrently with the
2692 	     earlier steps in the algorithm.  */
2693 	  op_cost = add_cost[mode] + shift_cost[mode][m];
2694 	  if (shiftadd_cost[mode][m] < op_cost)
2695 	    {
2696 	      op_cost = shiftadd_cost[mode][m];
2697 	      op_latency = op_cost;
2698 	    }
2699 	  else
2700 	    op_latency = add_cost[mode];
2701 
2702 	  new_limit.cost = best_cost.cost - op_cost;
2703 	  new_limit.latency = best_cost.latency - op_latency;
2704 	  synth_mult (alg_in, t / d, &new_limit, mode);
2705 
2706 	  alg_in->cost.cost += op_cost;
2707 	  alg_in->cost.latency += op_latency;
2708 	  if (alg_in->cost.latency < op_cost)
2709 	    alg_in->cost.latency = op_cost;
2710 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2711 	    {
2712 	      struct algorithm *x;
2713 	      best_cost = alg_in->cost;
2714 	      x = alg_in, alg_in = best_alg, best_alg = x;
2715 	      best_alg->log[best_alg->ops] = m;
2716 	      best_alg->op[best_alg->ops] = alg_add_factor;
2717 	    }
2718 	  /* Other factors will have been taken care of in the recursion.  */
2719 	  break;
2720 	}
2721 
2722       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2723       if (t % d == 0 && t > d && m < maxm
2724 	  && (!cache_hit || cache_alg == alg_sub_factor))
2725 	{
2726 	  /* If the target has a cheap shift-and-subtract insn use
2727 	     that in preference to a shift insn followed by a sub insn.
2728 	     Assume that the shift-and-sub is "atomic" with a latency
2729 	     equal to it's cost, otherwise assume that on superscalar
2730 	     hardware the shift may be executed concurrently with the
2731 	     earlier steps in the algorithm.  */
2732 	  op_cost = add_cost[mode] + shift_cost[mode][m];
2733 	  if (shiftsub_cost[mode][m] < op_cost)
2734 	    {
2735 	      op_cost = shiftsub_cost[mode][m];
2736 	      op_latency = op_cost;
2737 	    }
2738 	  else
2739 	    op_latency = add_cost[mode];
2740 
2741 	  new_limit.cost = best_cost.cost - op_cost;
2742 	  new_limit.latency = best_cost.latency - op_latency;
2743 	  synth_mult (alg_in, t / d, &new_limit, mode);
2744 
2745 	  alg_in->cost.cost += op_cost;
2746 	  alg_in->cost.latency += op_latency;
2747 	  if (alg_in->cost.latency < op_cost)
2748 	    alg_in->cost.latency = op_cost;
2749 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2750 	    {
2751 	      struct algorithm *x;
2752 	      best_cost = alg_in->cost;
2753 	      x = alg_in, alg_in = best_alg, best_alg = x;
2754 	      best_alg->log[best_alg->ops] = m;
2755 	      best_alg->op[best_alg->ops] = alg_sub_factor;
2756 	    }
2757 	  break;
2758 	}
2759     }
2760   if (cache_hit)
2761     goto done;
2762 
2763   /* Try shift-and-add (load effective address) instructions,
2764      i.e. do a*3, a*5, a*9.  */
2765   if ((t & 1) != 0)
2766     {
2767     do_alg_add_t2_m:
2768       q = t - 1;
2769       q = q & -q;
2770       m = exact_log2 (q);
2771       if (m >= 0 && m < maxm)
2772 	{
2773 	  op_cost = shiftadd_cost[mode][m];
2774 	  new_limit.cost = best_cost.cost - op_cost;
2775 	  new_limit.latency = best_cost.latency - op_cost;
2776 	  synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2777 
2778 	  alg_in->cost.cost += op_cost;
2779 	  alg_in->cost.latency += op_cost;
2780 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2781 	    {
2782 	      struct algorithm *x;
2783 	      best_cost = alg_in->cost;
2784 	      x = alg_in, alg_in = best_alg, best_alg = x;
2785 	      best_alg->log[best_alg->ops] = m;
2786 	      best_alg->op[best_alg->ops] = alg_add_t2_m;
2787 	    }
2788 	}
2789       if (cache_hit)
2790 	goto done;
2791 
2792     do_alg_sub_t2_m:
2793       q = t + 1;
2794       q = q & -q;
2795       m = exact_log2 (q);
2796       if (m >= 0 && m < maxm)
2797 	{
2798 	  op_cost = shiftsub_cost[mode][m];
2799 	  new_limit.cost = best_cost.cost - op_cost;
2800 	  new_limit.latency = best_cost.latency - op_cost;
2801 	  synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2802 
2803 	  alg_in->cost.cost += op_cost;
2804 	  alg_in->cost.latency += op_cost;
2805 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2806 	    {
2807 	      struct algorithm *x;
2808 	      best_cost = alg_in->cost;
2809 	      x = alg_in, alg_in = best_alg, best_alg = x;
2810 	      best_alg->log[best_alg->ops] = m;
2811 	      best_alg->op[best_alg->ops] = alg_sub_t2_m;
2812 	    }
2813 	}
2814       if (cache_hit)
2815 	goto done;
2816     }
2817 
2818  done:
2819   /* If best_cost has not decreased, we have not found any algorithm.  */
2820   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2821     {
2822       /* We failed to find an algorithm.  Record alg_impossible for
2823 	 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2824 	 we are asked to find an algorithm for T within the same or
2825 	 lower COST_LIMIT, we can immediately return to the
2826 	 caller.  */
2827       alg_hash[hash_index].t = t;
2828       alg_hash[hash_index].mode = mode;
2829       alg_hash[hash_index].alg = alg_impossible;
2830       alg_hash[hash_index].cost = *cost_limit;
2831       return;
2832     }
2833 
2834   /* Cache the result.  */
2835   if (!cache_hit)
2836     {
2837       alg_hash[hash_index].t = t;
2838       alg_hash[hash_index].mode = mode;
2839       alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2840       alg_hash[hash_index].cost.cost = best_cost.cost;
2841       alg_hash[hash_index].cost.latency = best_cost.latency;
2842     }
2843 
2844   /* If we are getting a too long sequence for `struct algorithm'
2845      to record, make this search fail.  */
2846   if (best_alg->ops == MAX_BITS_PER_WORD)
2847     return;
2848 
2849   /* Copy the algorithm from temporary space to the space at alg_out.
2850      We avoid using structure assignment because the majority of
2851      best_alg is normally undefined, and this is a critical function.  */
2852   alg_out->ops = best_alg->ops + 1;
2853   alg_out->cost = best_cost;
2854   memcpy (alg_out->op, best_alg->op,
2855 	  alg_out->ops * sizeof *alg_out->op);
2856   memcpy (alg_out->log, best_alg->log,
2857 	  alg_out->ops * sizeof *alg_out->log);
2858 }
2859 
2860 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2861    Try three variations:
2862 
2863        - a shift/add sequence based on VAL itself
2864        - a shift/add sequence based on -VAL, followed by a negation
2865        - a shift/add sequence based on VAL - 1, followed by an addition.
2866 
2867    Return true if the cheapest of these cost less than MULT_COST,
2868    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2869 
2870 static bool
choose_mult_variant(enum machine_mode mode,HOST_WIDE_INT val,struct algorithm * alg,enum mult_variant * variant,int mult_cost)2871 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2872 		     struct algorithm *alg, enum mult_variant *variant,
2873 		     int mult_cost)
2874 {
2875   struct algorithm alg2;
2876   struct mult_cost limit;
2877   int op_cost;
2878 
2879   /* Fail quickly for impossible bounds.  */
2880   if (mult_cost < 0)
2881     return false;
2882 
2883   /* Ensure that mult_cost provides a reasonable upper bound.
2884      Any constant multiplication can be performed with less
2885      than 2 * bits additions.  */
2886   op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[mode];
2887   if (mult_cost > op_cost)
2888     mult_cost = op_cost;
2889 
2890   *variant = basic_variant;
2891   limit.cost = mult_cost;
2892   limit.latency = mult_cost;
2893   synth_mult (alg, val, &limit, mode);
2894 
2895   /* This works only if the inverted value actually fits in an
2896      `unsigned int' */
2897   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2898     {
2899       op_cost = neg_cost[mode];
2900       if (MULT_COST_LESS (&alg->cost, mult_cost))
2901 	{
2902 	  limit.cost = alg->cost.cost - op_cost;
2903 	  limit.latency = alg->cost.latency - op_cost;
2904 	}
2905       else
2906 	{
2907 	  limit.cost = mult_cost - op_cost;
2908 	  limit.latency = mult_cost - op_cost;
2909 	}
2910 
2911       synth_mult (&alg2, -val, &limit, mode);
2912       alg2.cost.cost += op_cost;
2913       alg2.cost.latency += op_cost;
2914       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2915 	*alg = alg2, *variant = negate_variant;
2916     }
2917 
2918   /* This proves very useful for division-by-constant.  */
2919   op_cost = add_cost[mode];
2920   if (MULT_COST_LESS (&alg->cost, mult_cost))
2921     {
2922       limit.cost = alg->cost.cost - op_cost;
2923       limit.latency = alg->cost.latency - op_cost;
2924     }
2925   else
2926     {
2927       limit.cost = mult_cost - op_cost;
2928       limit.latency = mult_cost - op_cost;
2929     }
2930 
2931   synth_mult (&alg2, val - 1, &limit, mode);
2932   alg2.cost.cost += op_cost;
2933   alg2.cost.latency += op_cost;
2934   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2935     *alg = alg2, *variant = add_variant;
2936 
2937   return MULT_COST_LESS (&alg->cost, mult_cost);
2938 }
2939 
2940 /* A subroutine of expand_mult, used for constant multiplications.
2941    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2942    convenient.  Use the shift/add sequence described by ALG and apply
2943    the final fixup specified by VARIANT.  */
2944 
2945 static rtx
expand_mult_const(enum machine_mode mode,rtx op0,HOST_WIDE_INT val,rtx target,const struct algorithm * alg,enum mult_variant variant)2946 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2947 		   rtx target, const struct algorithm *alg,
2948 		   enum mult_variant variant)
2949 {
2950   HOST_WIDE_INT val_so_far;
2951   rtx insn, accum, tem;
2952   int opno;
2953   enum machine_mode nmode;
2954 
2955   /* Avoid referencing memory over and over.
2956      For speed, but also for correctness when mem is volatile.  */
2957   if (MEM_P (op0))
2958     op0 = force_reg (mode, op0);
2959 
2960   /* ACCUM starts out either as OP0 or as a zero, depending on
2961      the first operation.  */
2962 
2963   if (alg->op[0] == alg_zero)
2964     {
2965       accum = copy_to_mode_reg (mode, const0_rtx);
2966       val_so_far = 0;
2967     }
2968   else if (alg->op[0] == alg_m)
2969     {
2970       accum = copy_to_mode_reg (mode, op0);
2971       val_so_far = 1;
2972     }
2973   else
2974     gcc_unreachable ();
2975 
2976   for (opno = 1; opno < alg->ops; opno++)
2977     {
2978       int log = alg->log[opno];
2979       rtx shift_subtarget = optimize ? 0 : accum;
2980       rtx add_target
2981 	= (opno == alg->ops - 1 && target != 0 && variant != add_variant
2982 	   && !optimize)
2983 	  ? target : 0;
2984       rtx accum_target = optimize ? 0 : accum;
2985 
2986       switch (alg->op[opno])
2987 	{
2988 	case alg_shift:
2989 	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
2990 				build_int_cst (NULL_TREE, log),
2991 				NULL_RTX, 0);
2992 	  val_so_far <<= log;
2993 	  break;
2994 
2995 	case alg_add_t_m2:
2996 	  tem = expand_shift (LSHIFT_EXPR, mode, op0,
2997 			      build_int_cst (NULL_TREE, log),
2998 			      NULL_RTX, 0);
2999 	  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3000 				 add_target ? add_target : accum_target);
3001 	  val_so_far += (HOST_WIDE_INT) 1 << log;
3002 	  break;
3003 
3004 	case alg_sub_t_m2:
3005 	  tem = expand_shift (LSHIFT_EXPR, mode, op0,
3006 			      build_int_cst (NULL_TREE, log),
3007 			      NULL_RTX, 0);
3008 	  accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3009 				 add_target ? add_target : accum_target);
3010 	  val_so_far -= (HOST_WIDE_INT) 1 << log;
3011 	  break;
3012 
3013 	case alg_add_t2_m:
3014 	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
3015 				build_int_cst (NULL_TREE, log),
3016 				shift_subtarget,
3017 				0);
3018 	  accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3019 				 add_target ? add_target : accum_target);
3020 	  val_so_far = (val_so_far << log) + 1;
3021 	  break;
3022 
3023 	case alg_sub_t2_m:
3024 	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
3025 				build_int_cst (NULL_TREE, log),
3026 				shift_subtarget, 0);
3027 	  accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3028 				 add_target ? add_target : accum_target);
3029 	  val_so_far = (val_so_far << log) - 1;
3030 	  break;
3031 
3032 	case alg_add_factor:
3033 	  tem = expand_shift (LSHIFT_EXPR, mode, accum,
3034 			      build_int_cst (NULL_TREE, log),
3035 			      NULL_RTX, 0);
3036 	  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3037 				 add_target ? add_target : accum_target);
3038 	  val_so_far += val_so_far << log;
3039 	  break;
3040 
3041 	case alg_sub_factor:
3042 	  tem = expand_shift (LSHIFT_EXPR, mode, accum,
3043 			      build_int_cst (NULL_TREE, log),
3044 			      NULL_RTX, 0);
3045 	  accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3046 				 (add_target
3047 				  ? add_target : (optimize ? 0 : tem)));
3048 	  val_so_far = (val_so_far << log) - val_so_far;
3049 	  break;
3050 
3051 	default:
3052 	  gcc_unreachable ();
3053 	}
3054 
3055       /* Write a REG_EQUAL note on the last insn so that we can cse
3056 	 multiplication sequences.  Note that if ACCUM is a SUBREG,
3057 	 we've set the inner register and must properly indicate
3058 	 that.  */
3059 
3060       tem = op0, nmode = mode;
3061       if (GET_CODE (accum) == SUBREG)
3062 	{
3063 	  nmode = GET_MODE (SUBREG_REG (accum));
3064 	  tem = gen_lowpart (nmode, op0);
3065 	}
3066 
3067       insn = get_last_insn ();
3068       set_unique_reg_note (insn, REG_EQUAL,
3069 			   gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)));
3070     }
3071 
3072   if (variant == negate_variant)
3073     {
3074       val_so_far = -val_so_far;
3075       accum = expand_unop (mode, neg_optab, accum, target, 0);
3076     }
3077   else if (variant == add_variant)
3078     {
3079       val_so_far = val_so_far + 1;
3080       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3081     }
3082 
3083   /* Compare only the bits of val and val_so_far that are significant
3084      in the result mode, to avoid sign-/zero-extension confusion.  */
3085   val &= GET_MODE_MASK (mode);
3086   val_so_far &= GET_MODE_MASK (mode);
3087   gcc_assert (val == val_so_far);
3088 
3089   return accum;
3090 }
3091 
3092 /* Perform a multiplication and return an rtx for the result.
3093    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3094    TARGET is a suggestion for where to store the result (an rtx).
3095 
3096    We check specially for a constant integer as OP1.
3097    If you want this check for OP0 as well, then before calling
3098    you should swap the two operands if OP0 would be constant.  */
3099 
3100 rtx
expand_mult(enum machine_mode mode,rtx op0,rtx op1,rtx target,int unsignedp)3101 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3102 	     int unsignedp)
3103 {
3104   enum mult_variant variant;
3105   struct algorithm algorithm;
3106   int max_cost;
3107 
3108   /* Handling const0_rtx here allows us to use zero as a rogue value for
3109      coeff below.  */
3110   if (op1 == const0_rtx)
3111     return const0_rtx;
3112   if (op1 == const1_rtx)
3113     return op0;
3114   if (op1 == constm1_rtx)
3115     return expand_unop (mode,
3116 			GET_MODE_CLASS (mode) == MODE_INT
3117 			&& !unsignedp && flag_trapv
3118 			? negv_optab : neg_optab,
3119 			op0, target, 0);
3120 
3121   /* These are the operations that are potentially turned into a sequence
3122      of shifts and additions.  */
3123   if (SCALAR_INT_MODE_P (mode)
3124       && (unsignedp || !flag_trapv))
3125     {
3126       HOST_WIDE_INT coeff = 0;
3127       rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3128 
3129       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3130 	 less than or equal in size to `unsigned int' this doesn't matter.
3131 	 If the mode is larger than `unsigned int', then synth_mult works
3132 	 only if the constant value exactly fits in an `unsigned int' without
3133 	 any truncation.  This means that multiplying by negative values does
3134 	 not work; results are off by 2^32 on a 32 bit machine.  */
3135 
3136       if (GET_CODE (op1) == CONST_INT)
3137 	{
3138 	  /* Attempt to handle multiplication of DImode values by negative
3139 	     coefficients, by performing the multiplication by a positive
3140 	     multiplier and then inverting the result.  */
3141 	  if (INTVAL (op1) < 0
3142 	      && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3143 	    {
3144 	      /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3145 		 result is interpreted as an unsigned coefficient.
3146 		 Exclude cost of op0 from max_cost to match the cost
3147 		 calculation of the synth_mult.  */
3148 	      max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET)
3149 			 - neg_cost[mode];
3150 	      if (max_cost > 0
3151 		  && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3152 					  &variant, max_cost))
3153 		{
3154 		  rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3155 						NULL_RTX, &algorithm,
3156 						variant);
3157 		  return expand_unop (mode, neg_optab, temp, target, 0);
3158 		}
3159 	    }
3160 	  else coeff = INTVAL (op1);
3161 	}
3162       else if (GET_CODE (op1) == CONST_DOUBLE)
3163 	{
3164 	  /* If we are multiplying in DImode, it may still be a win
3165 	     to try to work with shifts and adds.  */
3166 	  if (CONST_DOUBLE_HIGH (op1) == 0)
3167 	    coeff = CONST_DOUBLE_LOW (op1);
3168 	  else if (CONST_DOUBLE_LOW (op1) == 0
3169 		   && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3170 	    {
3171 	      int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3172 			  + HOST_BITS_PER_WIDE_INT;
3173 	      return expand_shift (LSHIFT_EXPR, mode, op0,
3174 				   build_int_cst (NULL_TREE, shift),
3175 				   target, unsignedp);
3176 	    }
3177 	}
3178 
3179       /* We used to test optimize here, on the grounds that it's better to
3180 	 produce a smaller program when -O is not used.  But this causes
3181 	 such a terrible slowdown sometimes that it seems better to always
3182 	 use synth_mult.  */
3183       if (coeff != 0)
3184 	{
3185 	  /* Special case powers of two.  */
3186 	  if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3187 	    return expand_shift (LSHIFT_EXPR, mode, op0,
3188 				 build_int_cst (NULL_TREE, floor_log2 (coeff)),
3189 				 target, unsignedp);
3190 
3191 	  /* Exclude cost of op0 from max_cost to match the cost
3192 	     calculation of the synth_mult.  */
3193 	  max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET);
3194 	  if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3195 				   max_cost))
3196 	    return expand_mult_const (mode, op0, coeff, target,
3197 				      &algorithm, variant);
3198 	}
3199     }
3200 
3201   if (GET_CODE (op0) == CONST_DOUBLE)
3202     {
3203       rtx temp = op0;
3204       op0 = op1;
3205       op1 = temp;
3206     }
3207 
3208   /* Expand x*2.0 as x+x.  */
3209   if (GET_CODE (op1) == CONST_DOUBLE
3210       && SCALAR_FLOAT_MODE_P (mode))
3211     {
3212       REAL_VALUE_TYPE d;
3213       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3214 
3215       if (REAL_VALUES_EQUAL (d, dconst2))
3216 	{
3217 	  op0 = force_reg (GET_MODE (op0), op0);
3218 	  return expand_binop (mode, add_optab, op0, op0,
3219 			       target, unsignedp, OPTAB_LIB_WIDEN);
3220 	}
3221     }
3222 
3223   /* This used to use umul_optab if unsigned, but for non-widening multiply
3224      there is no difference between signed and unsigned.  */
3225   op0 = expand_binop (mode,
3226 		      ! unsignedp
3227 		      && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3228 		      ? smulv_optab : smul_optab,
3229 		      op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3230   gcc_assert (op0);
3231   return op0;
3232 }
3233 
3234 /* Return the smallest n such that 2**n >= X.  */
3235 
3236 int
ceil_log2(unsigned HOST_WIDE_INT x)3237 ceil_log2 (unsigned HOST_WIDE_INT x)
3238 {
3239   return floor_log2 (x - 1) + 1;
3240 }
3241 
3242 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3243    replace division by D, and put the least significant N bits of the result
3244    in *MULTIPLIER_PTR and return the most significant bit.
3245 
3246    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3247    needed precision is in PRECISION (should be <= N).
3248 
3249    PRECISION should be as small as possible so this function can choose
3250    multiplier more freely.
3251 
3252    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3253    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3254 
3255    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3256    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3257 
3258 static
3259 unsigned HOST_WIDE_INT
choose_multiplier(unsigned HOST_WIDE_INT d,int n,int precision,rtx * multiplier_ptr,int * post_shift_ptr,int * lgup_ptr)3260 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3261 		   rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3262 {
3263   HOST_WIDE_INT mhigh_hi, mlow_hi;
3264   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3265   int lgup, post_shift;
3266   int pow, pow2;
3267   unsigned HOST_WIDE_INT nl, dummy1;
3268   HOST_WIDE_INT nh, dummy2;
3269 
3270   /* lgup = ceil(log2(divisor)); */
3271   lgup = ceil_log2 (d);
3272 
3273   gcc_assert (lgup <= n);
3274 
3275   pow = n + lgup;
3276   pow2 = n + lgup - precision;
3277 
3278   /* We could handle this with some effort, but this case is much
3279      better handled directly with a scc insn, so rely on caller using
3280      that.  */
3281   gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3282 
3283   /* mlow = 2^(N + lgup)/d */
3284  if (pow >= HOST_BITS_PER_WIDE_INT)
3285     {
3286       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3287       nl = 0;
3288     }
3289   else
3290     {
3291       nh = 0;
3292       nl = (unsigned HOST_WIDE_INT) 1 << pow;
3293     }
3294   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3295 			&mlow_lo, &mlow_hi, &dummy1, &dummy2);
3296 
3297   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3298   if (pow2 >= HOST_BITS_PER_WIDE_INT)
3299     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3300   else
3301     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3302   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3303 			&mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3304 
3305   gcc_assert (!mhigh_hi || nh - d < d);
3306   gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3307   /* Assert that mlow < mhigh.  */
3308   gcc_assert (mlow_hi < mhigh_hi
3309 	      || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3310 
3311   /* If precision == N, then mlow, mhigh exceed 2^N
3312      (but they do not exceed 2^(N+1)).  */
3313 
3314   /* Reduce to lowest terms.  */
3315   for (post_shift = lgup; post_shift > 0; post_shift--)
3316     {
3317       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3318       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3319       if (ml_lo >= mh_lo)
3320 	break;
3321 
3322       mlow_hi = 0;
3323       mlow_lo = ml_lo;
3324       mhigh_hi = 0;
3325       mhigh_lo = mh_lo;
3326     }
3327 
3328   *post_shift_ptr = post_shift;
3329   *lgup_ptr = lgup;
3330   if (n < HOST_BITS_PER_WIDE_INT)
3331     {
3332       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3333       *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3334       return mhigh_lo >= mask;
3335     }
3336   else
3337     {
3338       *multiplier_ptr = GEN_INT (mhigh_lo);
3339       return mhigh_hi;
3340     }
3341 }
3342 
3343 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3344    congruent to 1 (mod 2**N).  */
3345 
3346 static unsigned HOST_WIDE_INT
invert_mod2n(unsigned HOST_WIDE_INT x,int n)3347 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3348 {
3349   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3350 
3351   /* The algorithm notes that the choice y = x satisfies
3352      x*y == 1 mod 2^3, since x is assumed odd.
3353      Each iteration doubles the number of bits of significance in y.  */
3354 
3355   unsigned HOST_WIDE_INT mask;
3356   unsigned HOST_WIDE_INT y = x;
3357   int nbit = 3;
3358 
3359   mask = (n == HOST_BITS_PER_WIDE_INT
3360 	  ? ~(unsigned HOST_WIDE_INT) 0
3361 	  : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3362 
3363   while (nbit < n)
3364     {
3365       y = y * (2 - x*y) & mask;		/* Modulo 2^N */
3366       nbit *= 2;
3367     }
3368   return y;
3369 }
3370 
3371 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3372    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3373    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3374    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3375    become signed.
3376 
3377    The result is put in TARGET if that is convenient.
3378 
3379    MODE is the mode of operation.  */
3380 
3381 rtx
expand_mult_highpart_adjust(enum machine_mode mode,rtx adj_operand,rtx op0,rtx op1,rtx target,int unsignedp)3382 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3383 			     rtx op1, rtx target, int unsignedp)
3384 {
3385   rtx tem;
3386   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3387 
3388   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3389 		      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3390 		      NULL_RTX, 0);
3391   tem = expand_and (mode, tem, op1, NULL_RTX);
3392   adj_operand
3393     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3394 		     adj_operand);
3395 
3396   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3397 		      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3398 		      NULL_RTX, 0);
3399   tem = expand_and (mode, tem, op0, NULL_RTX);
3400   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3401 			  target);
3402 
3403   return target;
3404 }
3405 
3406 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3407 
3408 static rtx
extract_high_half(enum machine_mode mode,rtx op)3409 extract_high_half (enum machine_mode mode, rtx op)
3410 {
3411   enum machine_mode wider_mode;
3412 
3413   if (mode == word_mode)
3414     return gen_highpart (mode, op);
3415 
3416   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3417 
3418   wider_mode = GET_MODE_WIDER_MODE (mode);
3419   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3420 		     build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
3421   return convert_modes (mode, wider_mode, op, 0);
3422 }
3423 
3424 /* Like expand_mult_highpart, but only consider using a multiplication
3425    optab.  OP1 is an rtx for the constant operand.  */
3426 
3427 static rtx
expand_mult_highpart_optab(enum machine_mode mode,rtx op0,rtx op1,rtx target,int unsignedp,int max_cost)3428 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3429 			    rtx target, int unsignedp, int max_cost)
3430 {
3431   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3432   enum machine_mode wider_mode;
3433   optab moptab;
3434   rtx tem;
3435   int size;
3436 
3437   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3438 
3439   wider_mode = GET_MODE_WIDER_MODE (mode);
3440   size = GET_MODE_BITSIZE (mode);
3441 
3442   /* Firstly, try using a multiplication insn that only generates the needed
3443      high part of the product, and in the sign flavor of unsignedp.  */
3444   if (mul_highpart_cost[mode] < max_cost)
3445     {
3446       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3447       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3448 			  unsignedp, OPTAB_DIRECT);
3449       if (tem)
3450 	return tem;
3451     }
3452 
3453   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3454      Need to adjust the result after the multiplication.  */
3455   if (size - 1 < BITS_PER_WORD
3456       && (mul_highpart_cost[mode] + 2 * shift_cost[mode][size-1]
3457 	  + 4 * add_cost[mode] < max_cost))
3458     {
3459       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3460       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3461 			  unsignedp, OPTAB_DIRECT);
3462       if (tem)
3463 	/* We used the wrong signedness.  Adjust the result.  */
3464 	return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3465 					    tem, unsignedp);
3466     }
3467 
3468   /* Try widening multiplication.  */
3469   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3470   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3471       && mul_widen_cost[wider_mode] < max_cost)
3472     {
3473       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3474 			  unsignedp, OPTAB_WIDEN);
3475       if (tem)
3476 	return extract_high_half (mode, tem);
3477     }
3478 
3479   /* Try widening the mode and perform a non-widening multiplication.  */
3480   if (smul_optab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3481       && size - 1 < BITS_PER_WORD
3482       && mul_cost[wider_mode] + shift_cost[mode][size-1] < max_cost)
3483     {
3484       rtx insns, wop0, wop1;
3485 
3486       /* We need to widen the operands, for example to ensure the
3487 	 constant multiplier is correctly sign or zero extended.
3488 	 Use a sequence to clean-up any instructions emitted by
3489 	 the conversions if things don't work out.  */
3490       start_sequence ();
3491       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3492       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3493       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3494 			  unsignedp, OPTAB_WIDEN);
3495       insns = get_insns ();
3496       end_sequence ();
3497 
3498       if (tem)
3499 	{
3500 	  emit_insn (insns);
3501 	  return extract_high_half (mode, tem);
3502 	}
3503     }
3504 
3505   /* Try widening multiplication of opposite signedness, and adjust.  */
3506   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3507   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3508       && size - 1 < BITS_PER_WORD
3509       && (mul_widen_cost[wider_mode] + 2 * shift_cost[mode][size-1]
3510 	  + 4 * add_cost[mode] < max_cost))
3511     {
3512       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3513 			  NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3514       if (tem != 0)
3515 	{
3516 	  tem = extract_high_half (mode, tem);
3517 	  /* We used the wrong signedness.  Adjust the result.  */
3518 	  return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3519 					      target, unsignedp);
3520 	}
3521     }
3522 
3523   return 0;
3524 }
3525 
3526 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3527    putting the high half of the result in TARGET if that is convenient,
3528    and return where the result is.  If the operation can not be performed,
3529    0 is returned.
3530 
3531    MODE is the mode of operation and result.
3532 
3533    UNSIGNEDP nonzero means unsigned multiply.
3534 
3535    MAX_COST is the total allowed cost for the expanded RTL.  */
3536 
3537 static rtx
expand_mult_highpart(enum machine_mode mode,rtx op0,rtx op1,rtx target,int unsignedp,int max_cost)3538 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3539 		      rtx target, int unsignedp, int max_cost)
3540 {
3541   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3542   unsigned HOST_WIDE_INT cnst1;
3543   int extra_cost;
3544   bool sign_adjust = false;
3545   enum mult_variant variant;
3546   struct algorithm alg;
3547   rtx tem;
3548 
3549   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3550   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3551   gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
3552 
3553   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3554 
3555   /* We can't optimize modes wider than BITS_PER_WORD.
3556      ??? We might be able to perform double-word arithmetic if
3557      mode == word_mode, however all the cost calculations in
3558      synth_mult etc. assume single-word operations.  */
3559   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3560     return expand_mult_highpart_optab (mode, op0, op1, target,
3561 				       unsignedp, max_cost);
3562 
3563   extra_cost = shift_cost[mode][GET_MODE_BITSIZE (mode) - 1];
3564 
3565   /* Check whether we try to multiply by a negative constant.  */
3566   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3567     {
3568       sign_adjust = true;
3569       extra_cost += add_cost[mode];
3570     }
3571 
3572   /* See whether shift/add multiplication is cheap enough.  */
3573   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3574 			   max_cost - extra_cost))
3575     {
3576       /* See whether the specialized multiplication optabs are
3577 	 cheaper than the shift/add version.  */
3578       tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3579 					alg.cost.cost + extra_cost);
3580       if (tem)
3581 	return tem;
3582 
3583       tem = convert_to_mode (wider_mode, op0, unsignedp);
3584       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3585       tem = extract_high_half (mode, tem);
3586 
3587       /* Adjust result for signedness.  */
3588       if (sign_adjust)
3589 	tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3590 
3591       return tem;
3592     }
3593   return expand_mult_highpart_optab (mode, op0, op1, target,
3594 				     unsignedp, max_cost);
3595 }
3596 
3597 
3598 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3599 
3600 static rtx
expand_smod_pow2(enum machine_mode mode,rtx op0,HOST_WIDE_INT d)3601 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3602 {
3603   unsigned HOST_WIDE_INT masklow, maskhigh;
3604   rtx result, temp, shift, label;
3605   int logd;
3606 
3607   logd = floor_log2 (d);
3608   result = gen_reg_rtx (mode);
3609 
3610   /* Avoid conditional branches when they're expensive.  */
3611   if (BRANCH_COST >= 2
3612       && !optimize_size)
3613     {
3614       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3615 				      mode, 0, -1);
3616       if (signmask)
3617 	{
3618 	  signmask = force_reg (mode, signmask);
3619 	  masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3620 	  shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3621 
3622 	  /* Use the rtx_cost of a LSHIFTRT instruction to determine
3623 	     which instruction sequence to use.  If logical right shifts
3624 	     are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3625 	     use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3626 
3627 	  temp = gen_rtx_LSHIFTRT (mode, result, shift);
3628 	  if (lshr_optab->handlers[mode].insn_code == CODE_FOR_nothing
3629 	      || rtx_cost (temp, SET) > COSTS_N_INSNS (2))
3630 	    {
3631 	      temp = expand_binop (mode, xor_optab, op0, signmask,
3632 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3633 	      temp = expand_binop (mode, sub_optab, temp, signmask,
3634 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3635 	      temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3636 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3637 	      temp = expand_binop (mode, xor_optab, temp, signmask,
3638 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3639 	      temp = expand_binop (mode, sub_optab, temp, signmask,
3640 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3641 	    }
3642 	  else
3643 	    {
3644 	      signmask = expand_binop (mode, lshr_optab, signmask, shift,
3645 				       NULL_RTX, 1, OPTAB_LIB_WIDEN);
3646 	      signmask = force_reg (mode, signmask);
3647 
3648 	      temp = expand_binop (mode, add_optab, op0, signmask,
3649 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3650 	      temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3651 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3652 	      temp = expand_binop (mode, sub_optab, temp, signmask,
3653 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3654 	    }
3655 	  return temp;
3656 	}
3657     }
3658 
3659   /* Mask contains the mode's signbit and the significant bits of the
3660      modulus.  By including the signbit in the operation, many targets
3661      can avoid an explicit compare operation in the following comparison
3662      against zero.  */
3663 
3664   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3665   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3666     {
3667       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3668       maskhigh = -1;
3669     }
3670   else
3671     maskhigh = (HOST_WIDE_INT) -1
3672 		 << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3673 
3674   temp = expand_binop (mode, and_optab, op0,
3675 		       immed_double_const (masklow, maskhigh, mode),
3676 		       result, 1, OPTAB_LIB_WIDEN);
3677   if (temp != result)
3678     emit_move_insn (result, temp);
3679 
3680   label = gen_label_rtx ();
3681   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3682 
3683   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3684 		       0, OPTAB_LIB_WIDEN);
3685   masklow = (HOST_WIDE_INT) -1 << logd;
3686   maskhigh = -1;
3687   temp = expand_binop (mode, ior_optab, temp,
3688 		       immed_double_const (masklow, maskhigh, mode),
3689 		       result, 1, OPTAB_LIB_WIDEN);
3690   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3691 		       0, OPTAB_LIB_WIDEN);
3692   if (temp != result)
3693     emit_move_insn (result, temp);
3694   emit_label (label);
3695   return result;
3696 }
3697 
3698 /* Expand signed division of OP0 by a power of two D in mode MODE.
3699    This routine is only called for positive values of D.  */
3700 
3701 static rtx
expand_sdiv_pow2(enum machine_mode mode,rtx op0,HOST_WIDE_INT d)3702 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3703 {
3704   rtx temp, label;
3705   tree shift;
3706   int logd;
3707 
3708   logd = floor_log2 (d);
3709   shift = build_int_cst (NULL_TREE, logd);
3710 
3711   if (d == 2 && BRANCH_COST >= 1)
3712     {
3713       temp = gen_reg_rtx (mode);
3714       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3715       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3716 			   0, OPTAB_LIB_WIDEN);
3717       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3718     }
3719 
3720 #ifdef HAVE_conditional_move
3721   if (BRANCH_COST >= 2)
3722     {
3723       rtx temp2;
3724 
3725       /* ??? emit_conditional_move forces a stack adjustment via
3726 	 compare_from_rtx so, if the sequence is discarded, it will
3727 	 be lost.  Do it now instead.  */
3728       do_pending_stack_adjust ();
3729 
3730       start_sequence ();
3731       temp2 = copy_to_mode_reg (mode, op0);
3732       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3733 			   NULL_RTX, 0, OPTAB_LIB_WIDEN);
3734       temp = force_reg (mode, temp);
3735 
3736       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3737       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3738 				     mode, temp, temp2, mode, 0);
3739       if (temp2)
3740 	{
3741 	  rtx seq = get_insns ();
3742 	  end_sequence ();
3743 	  emit_insn (seq);
3744 	  return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3745 	}
3746       end_sequence ();
3747     }
3748 #endif
3749 
3750   if (BRANCH_COST >= 2)
3751     {
3752       int ushift = GET_MODE_BITSIZE (mode) - logd;
3753 
3754       temp = gen_reg_rtx (mode);
3755       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3756       if (shift_cost[mode][ushift] > COSTS_N_INSNS (1))
3757 	temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3758 			     NULL_RTX, 0, OPTAB_LIB_WIDEN);
3759       else
3760 	temp = expand_shift (RSHIFT_EXPR, mode, temp,
3761 			     build_int_cst (NULL_TREE, ushift),
3762 			     NULL_RTX, 1);
3763       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3764 			   0, OPTAB_LIB_WIDEN);
3765       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3766     }
3767 
3768   label = gen_label_rtx ();
3769   temp = copy_to_mode_reg (mode, op0);
3770   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3771   expand_inc (temp, GEN_INT (d - 1));
3772   emit_label (label);
3773   return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3774 }
3775 
3776 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3777    if that is convenient, and returning where the result is.
3778    You may request either the quotient or the remainder as the result;
3779    specify REM_FLAG nonzero to get the remainder.
3780 
3781    CODE is the expression code for which kind of division this is;
3782    it controls how rounding is done.  MODE is the machine mode to use.
3783    UNSIGNEDP nonzero means do unsigned division.  */
3784 
3785 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3786    and then correct it by or'ing in missing high bits
3787    if result of ANDI is nonzero.
3788    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3789    This could optimize to a bfexts instruction.
3790    But C doesn't use these operations, so their optimizations are
3791    left for later.  */
3792 /* ??? For modulo, we don't actually need the highpart of the first product,
3793    the low part will do nicely.  And for small divisors, the second multiply
3794    can also be a low-part only multiply or even be completely left out.
3795    E.g. to calculate the remainder of a division by 3 with a 32 bit
3796    multiply, multiply with 0x55555556 and extract the upper two bits;
3797    the result is exact for inputs up to 0x1fffffff.
3798    The input range can be reduced by using cross-sum rules.
3799    For odd divisors >= 3, the following table gives right shift counts
3800    so that if a number is shifted by an integer multiple of the given
3801    amount, the remainder stays the same:
3802    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3803    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3804    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3805    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3806    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3807 
3808    Cross-sum rules for even numbers can be derived by leaving as many bits
3809    to the right alone as the divisor has zeros to the right.
3810    E.g. if x is an unsigned 32 bit number:
3811    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3812    */
3813 
3814 rtx
expand_divmod(int rem_flag,enum tree_code code,enum machine_mode mode,rtx op0,rtx op1,rtx target,int unsignedp)3815 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3816 	       rtx op0, rtx op1, rtx target, int unsignedp)
3817 {
3818   enum machine_mode compute_mode;
3819   rtx tquotient;
3820   rtx quotient = 0, remainder = 0;
3821   rtx last;
3822   int size;
3823   rtx insn, set;
3824   optab optab1, optab2;
3825   int op1_is_constant, op1_is_pow2 = 0;
3826   int max_cost, extra_cost;
3827   static HOST_WIDE_INT last_div_const = 0;
3828   static HOST_WIDE_INT ext_op1;
3829 
3830   op1_is_constant = GET_CODE (op1) == CONST_INT;
3831   if (op1_is_constant)
3832     {
3833       ext_op1 = INTVAL (op1);
3834       if (unsignedp)
3835 	ext_op1 &= GET_MODE_MASK (mode);
3836       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3837 		     || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3838     }
3839 
3840   /*
3841      This is the structure of expand_divmod:
3842 
3843      First comes code to fix up the operands so we can perform the operations
3844      correctly and efficiently.
3845 
3846      Second comes a switch statement with code specific for each rounding mode.
3847      For some special operands this code emits all RTL for the desired
3848      operation, for other cases, it generates only a quotient and stores it in
3849      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3850      to indicate that it has not done anything.
3851 
3852      Last comes code that finishes the operation.  If QUOTIENT is set and
3853      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3854      QUOTIENT is not set, it is computed using trunc rounding.
3855 
3856      We try to generate special code for division and remainder when OP1 is a
3857      constant.  If |OP1| = 2**n we can use shifts and some other fast
3858      operations.  For other values of OP1, we compute a carefully selected
3859      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3860      by m.
3861 
3862      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3863      half of the product.  Different strategies for generating the product are
3864      implemented in expand_mult_highpart.
3865 
3866      If what we actually want is the remainder, we generate that by another
3867      by-constant multiplication and a subtraction.  */
3868 
3869   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3870      code below will malfunction if we are, so check here and handle
3871      the special case if so.  */
3872   if (op1 == const1_rtx)
3873     return rem_flag ? const0_rtx : op0;
3874 
3875     /* When dividing by -1, we could get an overflow.
3876      negv_optab can handle overflows.  */
3877   if (! unsignedp && op1 == constm1_rtx)
3878     {
3879       if (rem_flag)
3880 	return const0_rtx;
3881       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3882 			  ? negv_optab : neg_optab, op0, target, 0);
3883     }
3884 
3885   if (target
3886       /* Don't use the function value register as a target
3887 	 since we have to read it as well as write it,
3888 	 and function-inlining gets confused by this.  */
3889       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3890 	  /* Don't clobber an operand while doing a multi-step calculation.  */
3891 	  || ((rem_flag || op1_is_constant)
3892 	      && (reg_mentioned_p (target, op0)
3893 		  || (MEM_P (op0) && MEM_P (target))))
3894 	  || reg_mentioned_p (target, op1)
3895 	  || (MEM_P (op1) && MEM_P (target))))
3896     target = 0;
3897 
3898   /* Get the mode in which to perform this computation.  Normally it will
3899      be MODE, but sometimes we can't do the desired operation in MODE.
3900      If so, pick a wider mode in which we can do the operation.  Convert
3901      to that mode at the start to avoid repeated conversions.
3902 
3903      First see what operations we need.  These depend on the expression
3904      we are evaluating.  (We assume that divxx3 insns exist under the
3905      same conditions that modxx3 insns and that these insns don't normally
3906      fail.  If these assumptions are not correct, we may generate less
3907      efficient code in some cases.)
3908 
3909      Then see if we find a mode in which we can open-code that operation
3910      (either a division, modulus, or shift).  Finally, check for the smallest
3911      mode for which we can do the operation with a library call.  */
3912 
3913   /* We might want to refine this now that we have division-by-constant
3914      optimization.  Since expand_mult_highpart tries so many variants, it is
3915      not straightforward to generalize this.  Maybe we should make an array
3916      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3917 
3918   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3919 	    ? (unsignedp ? lshr_optab : ashr_optab)
3920 	    : (unsignedp ? udiv_optab : sdiv_optab));
3921   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3922 	    ? optab1
3923 	    : (unsignedp ? udivmod_optab : sdivmod_optab));
3924 
3925   for (compute_mode = mode; compute_mode != VOIDmode;
3926        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3927     if (optab1->handlers[compute_mode].insn_code != CODE_FOR_nothing
3928 	|| optab2->handlers[compute_mode].insn_code != CODE_FOR_nothing)
3929       break;
3930 
3931   if (compute_mode == VOIDmode)
3932     for (compute_mode = mode; compute_mode != VOIDmode;
3933 	 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3934       if (optab1->handlers[compute_mode].libfunc
3935 	  || optab2->handlers[compute_mode].libfunc)
3936 	break;
3937 
3938   /* If we still couldn't find a mode, use MODE, but expand_binop will
3939      probably die.  */
3940   if (compute_mode == VOIDmode)
3941     compute_mode = mode;
3942 
3943   if (target && GET_MODE (target) == compute_mode)
3944     tquotient = target;
3945   else
3946     tquotient = gen_reg_rtx (compute_mode);
3947 
3948   size = GET_MODE_BITSIZE (compute_mode);
3949 #if 0
3950   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3951      (mode), and thereby get better code when OP1 is a constant.  Do that
3952      later.  It will require going over all usages of SIZE below.  */
3953   size = GET_MODE_BITSIZE (mode);
3954 #endif
3955 
3956   /* Only deduct something for a REM if the last divide done was
3957      for a different constant.   Then set the constant of the last
3958      divide.  */
3959   max_cost = unsignedp ? udiv_cost[compute_mode] : sdiv_cost[compute_mode];
3960   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3961 		     && INTVAL (op1) == last_div_const))
3962     max_cost -= mul_cost[compute_mode] + add_cost[compute_mode];
3963 
3964   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3965 
3966   /* Now convert to the best mode to use.  */
3967   if (compute_mode != mode)
3968     {
3969       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3970       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3971 
3972       /* convert_modes may have placed op1 into a register, so we
3973 	 must recompute the following.  */
3974       op1_is_constant = GET_CODE (op1) == CONST_INT;
3975       op1_is_pow2 = (op1_is_constant
3976 		     && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3977 			  || (! unsignedp
3978 			      && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3979     }
3980 
3981   /* If one of the operands is a volatile MEM, copy it into a register.  */
3982 
3983   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3984     op0 = force_reg (compute_mode, op0);
3985   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3986     op1 = force_reg (compute_mode, op1);
3987 
3988   /* If we need the remainder or if OP1 is constant, we need to
3989      put OP0 in a register in case it has any queued subexpressions.  */
3990   if (rem_flag || op1_is_constant)
3991     op0 = force_reg (compute_mode, op0);
3992 
3993   last = get_last_insn ();
3994 
3995   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3996   if (unsignedp)
3997     {
3998       if (code == FLOOR_DIV_EXPR)
3999 	code = TRUNC_DIV_EXPR;
4000       if (code == FLOOR_MOD_EXPR)
4001 	code = TRUNC_MOD_EXPR;
4002       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4003 	code = TRUNC_DIV_EXPR;
4004     }
4005 
4006   if (op1 != const0_rtx)
4007     switch (code)
4008       {
4009       case TRUNC_MOD_EXPR:
4010       case TRUNC_DIV_EXPR:
4011 	if (op1_is_constant)
4012 	  {
4013 	    if (unsignedp)
4014 	      {
4015 		unsigned HOST_WIDE_INT mh;
4016 		int pre_shift, post_shift;
4017 		int dummy;
4018 		rtx ml;
4019 		unsigned HOST_WIDE_INT d = (INTVAL (op1)
4020 					    & GET_MODE_MASK (compute_mode));
4021 
4022 		if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4023 		  {
4024 		    pre_shift = floor_log2 (d);
4025 		    if (rem_flag)
4026 		      {
4027 			remainder
4028 			  = expand_binop (compute_mode, and_optab, op0,
4029 					  GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4030 					  remainder, 1,
4031 					  OPTAB_LIB_WIDEN);
4032 			if (remainder)
4033 			  return gen_lowpart (mode, remainder);
4034 		      }
4035 		    quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4036 					     build_int_cst (NULL_TREE,
4037 							    pre_shift),
4038 					     tquotient, 1);
4039 		  }
4040 		else if (size <= HOST_BITS_PER_WIDE_INT)
4041 		  {
4042 		    if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4043 		      {
4044 			/* Most significant bit of divisor is set; emit an scc
4045 			   insn.  */
4046 			quotient = emit_store_flag (tquotient, GEU, op0, op1,
4047 						    compute_mode, 1, 1);
4048 			if (quotient == 0)
4049 			  goto fail1;
4050 		      }
4051 		    else
4052 		      {
4053 			/* Find a suitable multiplier and right shift count
4054 			   instead of multiplying with D.  */
4055 
4056 			mh = choose_multiplier (d, size, size,
4057 						&ml, &post_shift, &dummy);
4058 
4059 			/* If the suggested multiplier is more than SIZE bits,
4060 			   we can do better for even divisors, using an
4061 			   initial right shift.  */
4062 			if (mh != 0 && (d & 1) == 0)
4063 			  {
4064 			    pre_shift = floor_log2 (d & -d);
4065 			    mh = choose_multiplier (d >> pre_shift, size,
4066 						    size - pre_shift,
4067 						    &ml, &post_shift, &dummy);
4068 			    gcc_assert (!mh);
4069 			  }
4070 			else
4071 			  pre_shift = 0;
4072 
4073 			if (mh != 0)
4074 			  {
4075 			    rtx t1, t2, t3, t4;
4076 
4077 			    if (post_shift - 1 >= BITS_PER_WORD)
4078 			      goto fail1;
4079 
4080 			    extra_cost
4081 			      = (shift_cost[compute_mode][post_shift - 1]
4082 				 + shift_cost[compute_mode][1]
4083 				 + 2 * add_cost[compute_mode]);
4084 			    t1 = expand_mult_highpart (compute_mode, op0, ml,
4085 						       NULL_RTX, 1,
4086 						       max_cost - extra_cost);
4087 			    if (t1 == 0)
4088 			      goto fail1;
4089 			    t2 = force_operand (gen_rtx_MINUS (compute_mode,
4090 							       op0, t1),
4091 						NULL_RTX);
4092 			    t3 = expand_shift
4093 			      (RSHIFT_EXPR, compute_mode, t2,
4094 			       build_int_cst (NULL_TREE, 1),
4095 			       NULL_RTX,1);
4096 			    t4 = force_operand (gen_rtx_PLUS (compute_mode,
4097 							      t1, t3),
4098 						NULL_RTX);
4099 			    quotient = expand_shift
4100 			      (RSHIFT_EXPR, compute_mode, t4,
4101 			       build_int_cst (NULL_TREE, post_shift - 1),
4102 			       tquotient, 1);
4103 			  }
4104 			else
4105 			  {
4106 			    rtx t1, t2;
4107 
4108 			    if (pre_shift >= BITS_PER_WORD
4109 				|| post_shift >= BITS_PER_WORD)
4110 			      goto fail1;
4111 
4112 			    t1 = expand_shift
4113 			      (RSHIFT_EXPR, compute_mode, op0,
4114 			       build_int_cst (NULL_TREE, pre_shift),
4115 			       NULL_RTX, 1);
4116 			    extra_cost
4117 			      = (shift_cost[compute_mode][pre_shift]
4118 				 + shift_cost[compute_mode][post_shift]);
4119 			    t2 = expand_mult_highpart (compute_mode, t1, ml,
4120 						       NULL_RTX, 1,
4121 						       max_cost - extra_cost);
4122 			    if (t2 == 0)
4123 			      goto fail1;
4124 			    quotient = expand_shift
4125 			      (RSHIFT_EXPR, compute_mode, t2,
4126 			       build_int_cst (NULL_TREE, post_shift),
4127 			       tquotient, 1);
4128 			  }
4129 		      }
4130 		  }
4131 		else		/* Too wide mode to use tricky code */
4132 		  break;
4133 
4134 		insn = get_last_insn ();
4135 		if (insn != last
4136 		    && (set = single_set (insn)) != 0
4137 		    && SET_DEST (set) == quotient)
4138 		  set_unique_reg_note (insn,
4139 				       REG_EQUAL,
4140 				       gen_rtx_UDIV (compute_mode, op0, op1));
4141 	      }
4142 	    else		/* TRUNC_DIV, signed */
4143 	      {
4144 		unsigned HOST_WIDE_INT ml;
4145 		int lgup, post_shift;
4146 		rtx mlr;
4147 		HOST_WIDE_INT d = INTVAL (op1);
4148 		unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
4149 
4150 		/* n rem d = n rem -d */
4151 		if (rem_flag && d < 0)
4152 		  {
4153 		    d = abs_d;
4154 		    op1 = gen_int_mode (abs_d, compute_mode);
4155 		  }
4156 
4157 		if (d == 1)
4158 		  quotient = op0;
4159 		else if (d == -1)
4160 		  quotient = expand_unop (compute_mode, neg_optab, op0,
4161 					  tquotient, 0);
4162 		else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4163 		  {
4164 		    /* This case is not handled correctly below.  */
4165 		    quotient = emit_store_flag (tquotient, EQ, op0, op1,
4166 						compute_mode, 1, 1);
4167 		    if (quotient == 0)
4168 		      goto fail1;
4169 		  }
4170 		else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4171 			 && (rem_flag ? smod_pow2_cheap[compute_mode]
4172 				      : sdiv_pow2_cheap[compute_mode])
4173 			 /* We assume that cheap metric is true if the
4174 			    optab has an expander for this mode.  */
4175 			 && (((rem_flag ? smod_optab : sdiv_optab)
4176 			      ->handlers[compute_mode].insn_code
4177 			      != CODE_FOR_nothing)
4178 			     || (sdivmod_optab->handlers[compute_mode]
4179 				 .insn_code != CODE_FOR_nothing)))
4180 		  ;
4181 		else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4182 		  {
4183 		    if (rem_flag)
4184 		      {
4185 			remainder = expand_smod_pow2 (compute_mode, op0, d);
4186 			if (remainder)
4187 			  return gen_lowpart (mode, remainder);
4188 		      }
4189 
4190 		    if (sdiv_pow2_cheap[compute_mode]
4191 			&& ((sdiv_optab->handlers[compute_mode].insn_code
4192 			     != CODE_FOR_nothing)
4193 			    || (sdivmod_optab->handlers[compute_mode].insn_code
4194 				!= CODE_FOR_nothing)))
4195 		      quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4196 						compute_mode, op0,
4197 						gen_int_mode (abs_d,
4198 							      compute_mode),
4199 						NULL_RTX, 0);
4200 		    else
4201 		      quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4202 
4203 		    /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4204 		       negate the quotient.  */
4205 		    if (d < 0)
4206 		      {
4207 			insn = get_last_insn ();
4208 			if (insn != last
4209 			    && (set = single_set (insn)) != 0
4210 			    && SET_DEST (set) == quotient
4211 			    && abs_d < ((unsigned HOST_WIDE_INT) 1
4212 					<< (HOST_BITS_PER_WIDE_INT - 1)))
4213 			  set_unique_reg_note (insn,
4214 					       REG_EQUAL,
4215 					       gen_rtx_DIV (compute_mode,
4216 							    op0,
4217 							    GEN_INT
4218 							    (trunc_int_for_mode
4219 							     (abs_d,
4220 							      compute_mode))));
4221 
4222 			quotient = expand_unop (compute_mode, neg_optab,
4223 						quotient, quotient, 0);
4224 		      }
4225 		  }
4226 		else if (size <= HOST_BITS_PER_WIDE_INT)
4227 		  {
4228 		    choose_multiplier (abs_d, size, size - 1,
4229 				       &mlr, &post_shift, &lgup);
4230 		    ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4231 		    if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4232 		      {
4233 			rtx t1, t2, t3;
4234 
4235 			if (post_shift >= BITS_PER_WORD
4236 			    || size - 1 >= BITS_PER_WORD)
4237 			  goto fail1;
4238 
4239 			extra_cost = (shift_cost[compute_mode][post_shift]
4240 				      + shift_cost[compute_mode][size - 1]
4241 				      + add_cost[compute_mode]);
4242 			t1 = expand_mult_highpart (compute_mode, op0, mlr,
4243 						   NULL_RTX, 0,
4244 						   max_cost - extra_cost);
4245 			if (t1 == 0)
4246 			  goto fail1;
4247 			t2 = expand_shift
4248 			  (RSHIFT_EXPR, compute_mode, t1,
4249 			   build_int_cst (NULL_TREE, post_shift),
4250 			   NULL_RTX, 0);
4251 			t3 = expand_shift
4252 			  (RSHIFT_EXPR, compute_mode, op0,
4253 			   build_int_cst (NULL_TREE, size - 1),
4254 			   NULL_RTX, 0);
4255 			if (d < 0)
4256 			  quotient
4257 			    = force_operand (gen_rtx_MINUS (compute_mode,
4258 							    t3, t2),
4259 					     tquotient);
4260 			else
4261 			  quotient
4262 			    = force_operand (gen_rtx_MINUS (compute_mode,
4263 							    t2, t3),
4264 					     tquotient);
4265 		      }
4266 		    else
4267 		      {
4268 			rtx t1, t2, t3, t4;
4269 
4270 			if (post_shift >= BITS_PER_WORD
4271 			    || size - 1 >= BITS_PER_WORD)
4272 			  goto fail1;
4273 
4274 			ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4275 			mlr = gen_int_mode (ml, compute_mode);
4276 			extra_cost = (shift_cost[compute_mode][post_shift]
4277 				      + shift_cost[compute_mode][size - 1]
4278 				      + 2 * add_cost[compute_mode]);
4279 			t1 = expand_mult_highpart (compute_mode, op0, mlr,
4280 						   NULL_RTX, 0,
4281 						   max_cost - extra_cost);
4282 			if (t1 == 0)
4283 			  goto fail1;
4284 			t2 = force_operand (gen_rtx_PLUS (compute_mode,
4285 							  t1, op0),
4286 					    NULL_RTX);
4287 			t3 = expand_shift
4288 			  (RSHIFT_EXPR, compute_mode, t2,
4289 			   build_int_cst (NULL_TREE, post_shift),
4290 			   NULL_RTX, 0);
4291 			t4 = expand_shift
4292 			  (RSHIFT_EXPR, compute_mode, op0,
4293 			   build_int_cst (NULL_TREE, size - 1),
4294 			   NULL_RTX, 0);
4295 			if (d < 0)
4296 			  quotient
4297 			    = force_operand (gen_rtx_MINUS (compute_mode,
4298 							    t4, t3),
4299 					     tquotient);
4300 			else
4301 			  quotient
4302 			    = force_operand (gen_rtx_MINUS (compute_mode,
4303 							    t3, t4),
4304 					     tquotient);
4305 		      }
4306 		  }
4307 		else		/* Too wide mode to use tricky code */
4308 		  break;
4309 
4310 		insn = get_last_insn ();
4311 		if (insn != last
4312 		    && (set = single_set (insn)) != 0
4313 		    && SET_DEST (set) == quotient)
4314 		  set_unique_reg_note (insn,
4315 				       REG_EQUAL,
4316 				       gen_rtx_DIV (compute_mode, op0, op1));
4317 	      }
4318 	    break;
4319 	  }
4320       fail1:
4321 	delete_insns_since (last);
4322 	break;
4323 
4324       case FLOOR_DIV_EXPR:
4325       case FLOOR_MOD_EXPR:
4326       /* We will come here only for signed operations.  */
4327 	if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4328 	  {
4329 	    unsigned HOST_WIDE_INT mh;
4330 	    int pre_shift, lgup, post_shift;
4331 	    HOST_WIDE_INT d = INTVAL (op1);
4332 	    rtx ml;
4333 
4334 	    if (d > 0)
4335 	      {
4336 		/* We could just as easily deal with negative constants here,
4337 		   but it does not seem worth the trouble for GCC 2.6.  */
4338 		if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4339 		  {
4340 		    pre_shift = floor_log2 (d);
4341 		    if (rem_flag)
4342 		      {
4343 			remainder = expand_binop (compute_mode, and_optab, op0,
4344 						  GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4345 						  remainder, 0, OPTAB_LIB_WIDEN);
4346 			if (remainder)
4347 			  return gen_lowpart (mode, remainder);
4348 		      }
4349 		    quotient = expand_shift
4350 		      (RSHIFT_EXPR, compute_mode, op0,
4351 		       build_int_cst (NULL_TREE, pre_shift),
4352 		       tquotient, 0);
4353 		  }
4354 		else
4355 		  {
4356 		    rtx t1, t2, t3, t4;
4357 
4358 		    mh = choose_multiplier (d, size, size - 1,
4359 					    &ml, &post_shift, &lgup);
4360 		    gcc_assert (!mh);
4361 
4362 		    if (post_shift < BITS_PER_WORD
4363 			&& size - 1 < BITS_PER_WORD)
4364 		      {
4365 			t1 = expand_shift
4366 			  (RSHIFT_EXPR, compute_mode, op0,
4367 			   build_int_cst (NULL_TREE, size - 1),
4368 			   NULL_RTX, 0);
4369 			t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4370 					   NULL_RTX, 0, OPTAB_WIDEN);
4371 			extra_cost = (shift_cost[compute_mode][post_shift]
4372 				      + shift_cost[compute_mode][size - 1]
4373 				      + 2 * add_cost[compute_mode]);
4374 			t3 = expand_mult_highpart (compute_mode, t2, ml,
4375 						   NULL_RTX, 1,
4376 						   max_cost - extra_cost);
4377 			if (t3 != 0)
4378 			  {
4379 			    t4 = expand_shift
4380 			      (RSHIFT_EXPR, compute_mode, t3,
4381 			       build_int_cst (NULL_TREE, post_shift),
4382 			       NULL_RTX, 1);
4383 			    quotient = expand_binop (compute_mode, xor_optab,
4384 						     t4, t1, tquotient, 0,
4385 						     OPTAB_WIDEN);
4386 			  }
4387 		      }
4388 		  }
4389 	      }
4390 	    else
4391 	      {
4392 		rtx nsign, t1, t2, t3, t4;
4393 		t1 = force_operand (gen_rtx_PLUS (compute_mode,
4394 						  op0, constm1_rtx), NULL_RTX);
4395 		t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4396 				   0, OPTAB_WIDEN);
4397 		nsign = expand_shift
4398 		  (RSHIFT_EXPR, compute_mode, t2,
4399 		   build_int_cst (NULL_TREE, size - 1),
4400 		   NULL_RTX, 0);
4401 		t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4402 				    NULL_RTX);
4403 		t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4404 				    NULL_RTX, 0);
4405 		if (t4)
4406 		  {
4407 		    rtx t5;
4408 		    t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4409 				      NULL_RTX, 0);
4410 		    quotient = force_operand (gen_rtx_PLUS (compute_mode,
4411 							    t4, t5),
4412 					      tquotient);
4413 		  }
4414 	      }
4415 	  }
4416 
4417 	if (quotient != 0)
4418 	  break;
4419 	delete_insns_since (last);
4420 
4421 	/* Try using an instruction that produces both the quotient and
4422 	   remainder, using truncation.  We can easily compensate the quotient
4423 	   or remainder to get floor rounding, once we have the remainder.
4424 	   Notice that we compute also the final remainder value here,
4425 	   and return the result right away.  */
4426 	if (target == 0 || GET_MODE (target) != compute_mode)
4427 	  target = gen_reg_rtx (compute_mode);
4428 
4429 	if (rem_flag)
4430 	  {
4431 	    remainder
4432 	      = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4433 	    quotient = gen_reg_rtx (compute_mode);
4434 	  }
4435 	else
4436 	  {
4437 	    quotient
4438 	      = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4439 	    remainder = gen_reg_rtx (compute_mode);
4440 	  }
4441 
4442 	if (expand_twoval_binop (sdivmod_optab, op0, op1,
4443 				 quotient, remainder, 0))
4444 	  {
4445 	    /* This could be computed with a branch-less sequence.
4446 	       Save that for later.  */
4447 	    rtx tem;
4448 	    rtx label = gen_label_rtx ();
4449 	    do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4450 	    tem = expand_binop (compute_mode, xor_optab, op0, op1,
4451 				NULL_RTX, 0, OPTAB_WIDEN);
4452 	    do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4453 	    expand_dec (quotient, const1_rtx);
4454 	    expand_inc (remainder, op1);
4455 	    emit_label (label);
4456 	    return gen_lowpart (mode, rem_flag ? remainder : quotient);
4457 	  }
4458 
4459 	/* No luck with division elimination or divmod.  Have to do it
4460 	   by conditionally adjusting op0 *and* the result.  */
4461 	{
4462 	  rtx label1, label2, label3, label4, label5;
4463 	  rtx adjusted_op0;
4464 	  rtx tem;
4465 
4466 	  quotient = gen_reg_rtx (compute_mode);
4467 	  adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4468 	  label1 = gen_label_rtx ();
4469 	  label2 = gen_label_rtx ();
4470 	  label3 = gen_label_rtx ();
4471 	  label4 = gen_label_rtx ();
4472 	  label5 = gen_label_rtx ();
4473 	  do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4474 	  do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4475 	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4476 			      quotient, 0, OPTAB_LIB_WIDEN);
4477 	  if (tem != quotient)
4478 	    emit_move_insn (quotient, tem);
4479 	  emit_jump_insn (gen_jump (label5));
4480 	  emit_barrier ();
4481 	  emit_label (label1);
4482 	  expand_inc (adjusted_op0, const1_rtx);
4483 	  emit_jump_insn (gen_jump (label4));
4484 	  emit_barrier ();
4485 	  emit_label (label2);
4486 	  do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4487 	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4488 			      quotient, 0, OPTAB_LIB_WIDEN);
4489 	  if (tem != quotient)
4490 	    emit_move_insn (quotient, tem);
4491 	  emit_jump_insn (gen_jump (label5));
4492 	  emit_barrier ();
4493 	  emit_label (label3);
4494 	  expand_dec (adjusted_op0, const1_rtx);
4495 	  emit_label (label4);
4496 	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4497 			      quotient, 0, OPTAB_LIB_WIDEN);
4498 	  if (tem != quotient)
4499 	    emit_move_insn (quotient, tem);
4500 	  expand_dec (quotient, const1_rtx);
4501 	  emit_label (label5);
4502 	}
4503 	break;
4504 
4505       case CEIL_DIV_EXPR:
4506       case CEIL_MOD_EXPR:
4507 	if (unsignedp)
4508 	  {
4509 	    if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4510 	      {
4511 		rtx t1, t2, t3;
4512 		unsigned HOST_WIDE_INT d = INTVAL (op1);
4513 		t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4514 				   build_int_cst (NULL_TREE, floor_log2 (d)),
4515 				   tquotient, 1);
4516 		t2 = expand_binop (compute_mode, and_optab, op0,
4517 				   GEN_INT (d - 1),
4518 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4519 		t3 = gen_reg_rtx (compute_mode);
4520 		t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4521 				      compute_mode, 1, 1);
4522 		if (t3 == 0)
4523 		  {
4524 		    rtx lab;
4525 		    lab = gen_label_rtx ();
4526 		    do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4527 		    expand_inc (t1, const1_rtx);
4528 		    emit_label (lab);
4529 		    quotient = t1;
4530 		  }
4531 		else
4532 		  quotient = force_operand (gen_rtx_PLUS (compute_mode,
4533 							  t1, t3),
4534 					    tquotient);
4535 		break;
4536 	      }
4537 
4538 	    /* Try using an instruction that produces both the quotient and
4539 	       remainder, using truncation.  We can easily compensate the
4540 	       quotient or remainder to get ceiling rounding, once we have the
4541 	       remainder.  Notice that we compute also the final remainder
4542 	       value here, and return the result right away.  */
4543 	    if (target == 0 || GET_MODE (target) != compute_mode)
4544 	      target = gen_reg_rtx (compute_mode);
4545 
4546 	    if (rem_flag)
4547 	      {
4548 		remainder = (REG_P (target)
4549 			     ? target : gen_reg_rtx (compute_mode));
4550 		quotient = gen_reg_rtx (compute_mode);
4551 	      }
4552 	    else
4553 	      {
4554 		quotient = (REG_P (target)
4555 			    ? target : gen_reg_rtx (compute_mode));
4556 		remainder = gen_reg_rtx (compute_mode);
4557 	      }
4558 
4559 	    if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4560 				     remainder, 1))
4561 	      {
4562 		/* This could be computed with a branch-less sequence.
4563 		   Save that for later.  */
4564 		rtx label = gen_label_rtx ();
4565 		do_cmp_and_jump (remainder, const0_rtx, EQ,
4566 				 compute_mode, label);
4567 		expand_inc (quotient, const1_rtx);
4568 		expand_dec (remainder, op1);
4569 		emit_label (label);
4570 		return gen_lowpart (mode, rem_flag ? remainder : quotient);
4571 	      }
4572 
4573 	    /* No luck with division elimination or divmod.  Have to do it
4574 	       by conditionally adjusting op0 *and* the result.  */
4575 	    {
4576 	      rtx label1, label2;
4577 	      rtx adjusted_op0, tem;
4578 
4579 	      quotient = gen_reg_rtx (compute_mode);
4580 	      adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4581 	      label1 = gen_label_rtx ();
4582 	      label2 = gen_label_rtx ();
4583 	      do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4584 			       compute_mode, label1);
4585 	      emit_move_insn  (quotient, const0_rtx);
4586 	      emit_jump_insn (gen_jump (label2));
4587 	      emit_barrier ();
4588 	      emit_label (label1);
4589 	      expand_dec (adjusted_op0, const1_rtx);
4590 	      tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4591 				  quotient, 1, OPTAB_LIB_WIDEN);
4592 	      if (tem != quotient)
4593 		emit_move_insn (quotient, tem);
4594 	      expand_inc (quotient, const1_rtx);
4595 	      emit_label (label2);
4596 	    }
4597 	  }
4598 	else /* signed */
4599 	  {
4600 	    if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4601 		&& INTVAL (op1) >= 0)
4602 	      {
4603 		/* This is extremely similar to the code for the unsigned case
4604 		   above.  For 2.7 we should merge these variants, but for
4605 		   2.6.1 I don't want to touch the code for unsigned since that
4606 		   get used in C.  The signed case will only be used by other
4607 		   languages (Ada).  */
4608 
4609 		rtx t1, t2, t3;
4610 		unsigned HOST_WIDE_INT d = INTVAL (op1);
4611 		t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4612 				   build_int_cst (NULL_TREE, floor_log2 (d)),
4613 				   tquotient, 0);
4614 		t2 = expand_binop (compute_mode, and_optab, op0,
4615 				   GEN_INT (d - 1),
4616 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4617 		t3 = gen_reg_rtx (compute_mode);
4618 		t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4619 				      compute_mode, 1, 1);
4620 		if (t3 == 0)
4621 		  {
4622 		    rtx lab;
4623 		    lab = gen_label_rtx ();
4624 		    do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4625 		    expand_inc (t1, const1_rtx);
4626 		    emit_label (lab);
4627 		    quotient = t1;
4628 		  }
4629 		else
4630 		  quotient = force_operand (gen_rtx_PLUS (compute_mode,
4631 							  t1, t3),
4632 					    tquotient);
4633 		break;
4634 	      }
4635 
4636 	    /* Try using an instruction that produces both the quotient and
4637 	       remainder, using truncation.  We can easily compensate the
4638 	       quotient or remainder to get ceiling rounding, once we have the
4639 	       remainder.  Notice that we compute also the final remainder
4640 	       value here, and return the result right away.  */
4641 	    if (target == 0 || GET_MODE (target) != compute_mode)
4642 	      target = gen_reg_rtx (compute_mode);
4643 	    if (rem_flag)
4644 	      {
4645 		remainder= (REG_P (target)
4646 			    ? target : gen_reg_rtx (compute_mode));
4647 		quotient = gen_reg_rtx (compute_mode);
4648 	      }
4649 	    else
4650 	      {
4651 		quotient = (REG_P (target)
4652 			    ? target : gen_reg_rtx (compute_mode));
4653 		remainder = gen_reg_rtx (compute_mode);
4654 	      }
4655 
4656 	    if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4657 				     remainder, 0))
4658 	      {
4659 		/* This could be computed with a branch-less sequence.
4660 		   Save that for later.  */
4661 		rtx tem;
4662 		rtx label = gen_label_rtx ();
4663 		do_cmp_and_jump (remainder, const0_rtx, EQ,
4664 				 compute_mode, label);
4665 		tem = expand_binop (compute_mode, xor_optab, op0, op1,
4666 				    NULL_RTX, 0, OPTAB_WIDEN);
4667 		do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4668 		expand_inc (quotient, const1_rtx);
4669 		expand_dec (remainder, op1);
4670 		emit_label (label);
4671 		return gen_lowpart (mode, rem_flag ? remainder : quotient);
4672 	      }
4673 
4674 	    /* No luck with division elimination or divmod.  Have to do it
4675 	       by conditionally adjusting op0 *and* the result.  */
4676 	    {
4677 	      rtx label1, label2, label3, label4, label5;
4678 	      rtx adjusted_op0;
4679 	      rtx tem;
4680 
4681 	      quotient = gen_reg_rtx (compute_mode);
4682 	      adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4683 	      label1 = gen_label_rtx ();
4684 	      label2 = gen_label_rtx ();
4685 	      label3 = gen_label_rtx ();
4686 	      label4 = gen_label_rtx ();
4687 	      label5 = gen_label_rtx ();
4688 	      do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4689 	      do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4690 			       compute_mode, label1);
4691 	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4692 				  quotient, 0, OPTAB_LIB_WIDEN);
4693 	      if (tem != quotient)
4694 		emit_move_insn (quotient, tem);
4695 	      emit_jump_insn (gen_jump (label5));
4696 	      emit_barrier ();
4697 	      emit_label (label1);
4698 	      expand_dec (adjusted_op0, const1_rtx);
4699 	      emit_jump_insn (gen_jump (label4));
4700 	      emit_barrier ();
4701 	      emit_label (label2);
4702 	      do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4703 			       compute_mode, label3);
4704 	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4705 				  quotient, 0, OPTAB_LIB_WIDEN);
4706 	      if (tem != quotient)
4707 		emit_move_insn (quotient, tem);
4708 	      emit_jump_insn (gen_jump (label5));
4709 	      emit_barrier ();
4710 	      emit_label (label3);
4711 	      expand_inc (adjusted_op0, const1_rtx);
4712 	      emit_label (label4);
4713 	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4714 				  quotient, 0, OPTAB_LIB_WIDEN);
4715 	      if (tem != quotient)
4716 		emit_move_insn (quotient, tem);
4717 	      expand_inc (quotient, const1_rtx);
4718 	      emit_label (label5);
4719 	    }
4720 	  }
4721 	break;
4722 
4723       case EXACT_DIV_EXPR:
4724 	if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4725 	  {
4726 	    HOST_WIDE_INT d = INTVAL (op1);
4727 	    unsigned HOST_WIDE_INT ml;
4728 	    int pre_shift;
4729 	    rtx t1;
4730 
4731 	    pre_shift = floor_log2 (d & -d);
4732 	    ml = invert_mod2n (d >> pre_shift, size);
4733 	    t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4734 			       build_int_cst (NULL_TREE, pre_shift),
4735 			       NULL_RTX, unsignedp);
4736 	    quotient = expand_mult (compute_mode, t1,
4737 				    gen_int_mode (ml, compute_mode),
4738 				    NULL_RTX, 1);
4739 
4740 	    insn = get_last_insn ();
4741 	    set_unique_reg_note (insn,
4742 				 REG_EQUAL,
4743 				 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4744 						 compute_mode,
4745 						 op0, op1));
4746 	  }
4747 	break;
4748 
4749       case ROUND_DIV_EXPR:
4750       case ROUND_MOD_EXPR:
4751 	if (unsignedp)
4752 	  {
4753 	    rtx tem;
4754 	    rtx label;
4755 	    label = gen_label_rtx ();
4756 	    quotient = gen_reg_rtx (compute_mode);
4757 	    remainder = gen_reg_rtx (compute_mode);
4758 	    if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4759 	      {
4760 		rtx tem;
4761 		quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4762 					 quotient, 1, OPTAB_LIB_WIDEN);
4763 		tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4764 		remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4765 					  remainder, 1, OPTAB_LIB_WIDEN);
4766 	      }
4767 	    tem = plus_constant (op1, -1);
4768 	    tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4769 				build_int_cst (NULL_TREE, 1),
4770 				NULL_RTX, 1);
4771 	    do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4772 	    expand_inc (quotient, const1_rtx);
4773 	    expand_dec (remainder, op1);
4774 	    emit_label (label);
4775 	  }
4776 	else
4777 	  {
4778 	    rtx abs_rem, abs_op1, tem, mask;
4779 	    rtx label;
4780 	    label = gen_label_rtx ();
4781 	    quotient = gen_reg_rtx (compute_mode);
4782 	    remainder = gen_reg_rtx (compute_mode);
4783 	    if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4784 	      {
4785 		rtx tem;
4786 		quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4787 					 quotient, 0, OPTAB_LIB_WIDEN);
4788 		tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4789 		remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4790 					  remainder, 0, OPTAB_LIB_WIDEN);
4791 	      }
4792 	    abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4793 	    abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4794 	    tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4795 				build_int_cst (NULL_TREE, 1),
4796 				NULL_RTX, 1);
4797 	    do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4798 	    tem = expand_binop (compute_mode, xor_optab, op0, op1,
4799 				NULL_RTX, 0, OPTAB_WIDEN);
4800 	    mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4801 				 build_int_cst (NULL_TREE, size - 1),
4802 				 NULL_RTX, 0);
4803 	    tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4804 				NULL_RTX, 0, OPTAB_WIDEN);
4805 	    tem = expand_binop (compute_mode, sub_optab, tem, mask,
4806 				NULL_RTX, 0, OPTAB_WIDEN);
4807 	    expand_inc (quotient, tem);
4808 	    tem = expand_binop (compute_mode, xor_optab, mask, op1,
4809 				NULL_RTX, 0, OPTAB_WIDEN);
4810 	    tem = expand_binop (compute_mode, sub_optab, tem, mask,
4811 				NULL_RTX, 0, OPTAB_WIDEN);
4812 	    expand_dec (remainder, tem);
4813 	    emit_label (label);
4814 	  }
4815 	return gen_lowpart (mode, rem_flag ? remainder : quotient);
4816 
4817       default:
4818 	gcc_unreachable ();
4819       }
4820 
4821   if (quotient == 0)
4822     {
4823       if (target && GET_MODE (target) != compute_mode)
4824 	target = 0;
4825 
4826       if (rem_flag)
4827 	{
4828 	  /* Try to produce the remainder without producing the quotient.
4829 	     If we seem to have a divmod pattern that does not require widening,
4830 	     don't try widening here.  We should really have a WIDEN argument
4831 	     to expand_twoval_binop, since what we'd really like to do here is
4832 	     1) try a mod insn in compute_mode
4833 	     2) try a divmod insn in compute_mode
4834 	     3) try a div insn in compute_mode and multiply-subtract to get
4835 	        remainder
4836 	     4) try the same things with widening allowed.  */
4837 	  remainder
4838 	    = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4839 				 op0, op1, target,
4840 				 unsignedp,
4841 				 ((optab2->handlers[compute_mode].insn_code
4842 				   != CODE_FOR_nothing)
4843 				  ? OPTAB_DIRECT : OPTAB_WIDEN));
4844 	  if (remainder == 0)
4845 	    {
4846 	      /* No luck there.  Can we do remainder and divide at once
4847 		 without a library call?  */
4848 	      remainder = gen_reg_rtx (compute_mode);
4849 	      if (! expand_twoval_binop ((unsignedp
4850 					  ? udivmod_optab
4851 					  : sdivmod_optab),
4852 					 op0, op1,
4853 					 NULL_RTX, remainder, unsignedp))
4854 		remainder = 0;
4855 	    }
4856 
4857 	  if (remainder)
4858 	    return gen_lowpart (mode, remainder);
4859 	}
4860 
4861       /* Produce the quotient.  Try a quotient insn, but not a library call.
4862 	 If we have a divmod in this mode, use it in preference to widening
4863 	 the div (for this test we assume it will not fail). Note that optab2
4864 	 is set to the one of the two optabs that the call below will use.  */
4865       quotient
4866 	= sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4867 			     op0, op1, rem_flag ? NULL_RTX : target,
4868 			     unsignedp,
4869 			     ((optab2->handlers[compute_mode].insn_code
4870 			       != CODE_FOR_nothing)
4871 			      ? OPTAB_DIRECT : OPTAB_WIDEN));
4872 
4873       if (quotient == 0)
4874 	{
4875 	  /* No luck there.  Try a quotient-and-remainder insn,
4876 	     keeping the quotient alone.  */
4877 	  quotient = gen_reg_rtx (compute_mode);
4878 	  if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4879 				     op0, op1,
4880 				     quotient, NULL_RTX, unsignedp))
4881 	    {
4882 	      quotient = 0;
4883 	      if (! rem_flag)
4884 		/* Still no luck.  If we are not computing the remainder,
4885 		   use a library call for the quotient.  */
4886 		quotient = sign_expand_binop (compute_mode,
4887 					      udiv_optab, sdiv_optab,
4888 					      op0, op1, target,
4889 					      unsignedp, OPTAB_LIB_WIDEN);
4890 	    }
4891 	}
4892     }
4893 
4894   if (rem_flag)
4895     {
4896       if (target && GET_MODE (target) != compute_mode)
4897 	target = 0;
4898 
4899       if (quotient == 0)
4900 	{
4901 	  /* No divide instruction either.  Use library for remainder.  */
4902 	  remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4903 					 op0, op1, target,
4904 					 unsignedp, OPTAB_LIB_WIDEN);
4905 	  /* No remainder function.  Try a quotient-and-remainder
4906 	     function, keeping the remainder.  */
4907 	  if (!remainder)
4908 	    {
4909 	      remainder = gen_reg_rtx (compute_mode);
4910 	      if (!expand_twoval_binop_libfunc
4911 		  (unsignedp ? udivmod_optab : sdivmod_optab,
4912 		   op0, op1,
4913 		   NULL_RTX, remainder,
4914 		   unsignedp ? UMOD : MOD))
4915 		remainder = NULL_RTX;
4916 	    }
4917 	}
4918       else
4919 	{
4920 	  /* We divided.  Now finish doing X - Y * (X / Y).  */
4921 	  remainder = expand_mult (compute_mode, quotient, op1,
4922 				   NULL_RTX, unsignedp);
4923 	  remainder = expand_binop (compute_mode, sub_optab, op0,
4924 				    remainder, target, unsignedp,
4925 				    OPTAB_LIB_WIDEN);
4926 	}
4927     }
4928 
4929   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4930 }
4931 
4932 /* Return a tree node with data type TYPE, describing the value of X.
4933    Usually this is an VAR_DECL, if there is no obvious better choice.
4934    X may be an expression, however we only support those expressions
4935    generated by loop.c.  */
4936 
4937 tree
make_tree(tree type,rtx x)4938 make_tree (tree type, rtx x)
4939 {
4940   tree t;
4941 
4942   switch (GET_CODE (x))
4943     {
4944     case CONST_INT:
4945       {
4946 	HOST_WIDE_INT hi = 0;
4947 
4948 	if (INTVAL (x) < 0
4949 	    && !(TYPE_UNSIGNED (type)
4950 		 && (GET_MODE_BITSIZE (TYPE_MODE (type))
4951 		     < HOST_BITS_PER_WIDE_INT)))
4952 	  hi = -1;
4953 
4954 	t = build_int_cst_wide (type, INTVAL (x), hi);
4955 
4956 	return t;
4957       }
4958 
4959     case CONST_DOUBLE:
4960       if (GET_MODE (x) == VOIDmode)
4961 	t = build_int_cst_wide (type,
4962 				CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4963       else
4964 	{
4965 	  REAL_VALUE_TYPE d;
4966 
4967 	  REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4968 	  t = build_real (type, d);
4969 	}
4970 
4971       return t;
4972 
4973     case CONST_VECTOR:
4974       {
4975 	int units = CONST_VECTOR_NUNITS (x);
4976 	tree itype = TREE_TYPE (type);
4977 	tree t = NULL_TREE;
4978 	int i;
4979 
4980 
4981 	/* Build a tree with vector elements.  */
4982 	for (i = units - 1; i >= 0; --i)
4983 	  {
4984 	    rtx elt = CONST_VECTOR_ELT (x, i);
4985 	    t = tree_cons (NULL_TREE, make_tree (itype, elt), t);
4986 	  }
4987 
4988 	return build_vector (type, t);
4989       }
4990 
4991     case PLUS:
4992       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4993 			  make_tree (type, XEXP (x, 1)));
4994 
4995     case MINUS:
4996       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4997 			  make_tree (type, XEXP (x, 1)));
4998 
4999     case NEG:
5000       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5001 
5002     case MULT:
5003       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5004 			  make_tree (type, XEXP (x, 1)));
5005 
5006     case ASHIFT:
5007       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5008 			  make_tree (type, XEXP (x, 1)));
5009 
5010     case LSHIFTRT:
5011       t = lang_hooks.types.unsigned_type (type);
5012       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5013 			    		 make_tree (t, XEXP (x, 0)),
5014 				    	 make_tree (type, XEXP (x, 1))));
5015 
5016     case ASHIFTRT:
5017       t = lang_hooks.types.signed_type (type);
5018       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5019 					 make_tree (t, XEXP (x, 0)),
5020 				    	 make_tree (type, XEXP (x, 1))));
5021 
5022     case DIV:
5023       if (TREE_CODE (type) != REAL_TYPE)
5024 	t = lang_hooks.types.signed_type (type);
5025       else
5026 	t = type;
5027 
5028       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5029 				    	 make_tree (t, XEXP (x, 0)),
5030 				    	 make_tree (t, XEXP (x, 1))));
5031     case UDIV:
5032       t = lang_hooks.types.unsigned_type (type);
5033       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5034 				    	 make_tree (t, XEXP (x, 0)),
5035 				    	 make_tree (t, XEXP (x, 1))));
5036 
5037     case SIGN_EXTEND:
5038     case ZERO_EXTEND:
5039       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5040 					  GET_CODE (x) == ZERO_EXTEND);
5041       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5042 
5043     case CONST:
5044       return make_tree (type, XEXP (x, 0));
5045 
5046     case SYMBOL_REF:
5047       t = SYMBOL_REF_DECL (x);
5048       if (t)
5049 	return fold_convert (type, build_fold_addr_expr (t));
5050       /* else fall through.  */
5051 
5052     default:
5053       t = build_decl (VAR_DECL, NULL_TREE, type);
5054 
5055       /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
5056 	 ptr_mode.  So convert.  */
5057       if (POINTER_TYPE_P (type))
5058 	x = convert_memory_address (TYPE_MODE (type), x);
5059 
5060       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5061 	 want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5062       t->decl_with_rtl.rtl = x;
5063 
5064       return t;
5065     }
5066 }
5067 
5068 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5069    and returning TARGET.
5070 
5071    If TARGET is 0, a pseudo-register or constant is returned.  */
5072 
5073 rtx
expand_and(enum machine_mode mode,rtx op0,rtx op1,rtx target)5074 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5075 {
5076   rtx tem = 0;
5077 
5078   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5079     tem = simplify_binary_operation (AND, mode, op0, op1);
5080   if (tem == 0)
5081     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5082 
5083   if (target == 0)
5084     target = tem;
5085   else if (tem != target)
5086     emit_move_insn (target, tem);
5087   return target;
5088 }
5089 
5090 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5091    and storing in TARGET.  Normally return TARGET.
5092    Return 0 if that cannot be done.
5093 
5094    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5095    it is VOIDmode, they cannot both be CONST_INT.
5096 
5097    UNSIGNEDP is for the case where we have to widen the operands
5098    to perform the operation.  It says to use zero-extension.
5099 
5100    NORMALIZEP is 1 if we should convert the result to be either zero
5101    or one.  Normalize is -1 if we should convert the result to be
5102    either zero or -1.  If NORMALIZEP is zero, the result will be left
5103    "raw" out of the scc insn.  */
5104 
5105 rtx
emit_store_flag(rtx target,enum rtx_code code,rtx op0,rtx op1,enum machine_mode mode,int unsignedp,int normalizep)5106 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5107 		 enum machine_mode mode, int unsignedp, int normalizep)
5108 {
5109   rtx subtarget;
5110   enum insn_code icode;
5111   enum machine_mode compare_mode;
5112   enum machine_mode target_mode = GET_MODE (target);
5113   rtx tem;
5114   rtx last = get_last_insn ();
5115   rtx pattern, comparison;
5116 
5117   if (unsignedp)
5118     code = unsigned_condition (code);
5119 
5120   /* If one operand is constant, make it the second one.  Only do this
5121      if the other operand is not constant as well.  */
5122 
5123   if (swap_commutative_operands_p (op0, op1))
5124     {
5125       tem = op0;
5126       op0 = op1;
5127       op1 = tem;
5128       code = swap_condition (code);
5129     }
5130 
5131   if (mode == VOIDmode)
5132     mode = GET_MODE (op0);
5133 
5134   /* For some comparisons with 1 and -1, we can convert this to
5135      comparisons with zero.  This will often produce more opportunities for
5136      store-flag insns.  */
5137 
5138   switch (code)
5139     {
5140     case LT:
5141       if (op1 == const1_rtx)
5142 	op1 = const0_rtx, code = LE;
5143       break;
5144     case LE:
5145       if (op1 == constm1_rtx)
5146 	op1 = const0_rtx, code = LT;
5147       break;
5148     case GE:
5149       if (op1 == const1_rtx)
5150 	op1 = const0_rtx, code = GT;
5151       break;
5152     case GT:
5153       if (op1 == constm1_rtx)
5154 	op1 = const0_rtx, code = GE;
5155       break;
5156     case GEU:
5157       if (op1 == const1_rtx)
5158 	op1 = const0_rtx, code = NE;
5159       break;
5160     case LTU:
5161       if (op1 == const1_rtx)
5162 	op1 = const0_rtx, code = EQ;
5163       break;
5164     default:
5165       break;
5166     }
5167 
5168   /* If we are comparing a double-word integer with zero or -1, we can
5169      convert the comparison into one involving a single word.  */
5170   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5171       && GET_MODE_CLASS (mode) == MODE_INT
5172       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5173     {
5174       if ((code == EQ || code == NE)
5175 	  && (op1 == const0_rtx || op1 == constm1_rtx))
5176 	{
5177 	  rtx op00, op01, op0both;
5178 
5179 	  /* Do a logical OR or AND of the two words and compare the result.  */
5180 	  op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5181 	  op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5182 	  op0both = expand_binop (word_mode,
5183 				  op1 == const0_rtx ? ior_optab : and_optab,
5184 				  op00, op01, NULL_RTX, unsignedp, OPTAB_DIRECT);
5185 
5186 	  if (op0both != 0)
5187 	    return emit_store_flag (target, code, op0both, op1, word_mode,
5188 				    unsignedp, normalizep);
5189 	}
5190       else if ((code == LT || code == GE) && op1 == const0_rtx)
5191 	{
5192 	  rtx op0h;
5193 
5194 	  /* If testing the sign bit, can just test on high word.  */
5195 	  op0h = simplify_gen_subreg (word_mode, op0, mode,
5196 				      subreg_highpart_offset (word_mode, mode));
5197 	  return emit_store_flag (target, code, op0h, op1, word_mode,
5198 				  unsignedp, normalizep);
5199 	}
5200     }
5201 
5202   /* From now on, we won't change CODE, so set ICODE now.  */
5203   icode = setcc_gen_code[(int) code];
5204 
5205   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5206      complement of A (for GE) and shifting the sign bit to the low bit.  */
5207   if (op1 == const0_rtx && (code == LT || code == GE)
5208       && GET_MODE_CLASS (mode) == MODE_INT
5209       && (normalizep || STORE_FLAG_VALUE == 1
5210 	  || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5211 	      && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5212 		  == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
5213     {
5214       subtarget = target;
5215 
5216       /* If the result is to be wider than OP0, it is best to convert it
5217 	 first.  If it is to be narrower, it is *incorrect* to convert it
5218 	 first.  */
5219       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5220 	{
5221 	  op0 = convert_modes (target_mode, mode, op0, 0);
5222 	  mode = target_mode;
5223 	}
5224 
5225       if (target_mode != mode)
5226 	subtarget = 0;
5227 
5228       if (code == GE)
5229 	op0 = expand_unop (mode, one_cmpl_optab, op0,
5230 			   ((STORE_FLAG_VALUE == 1 || normalizep)
5231 			    ? 0 : subtarget), 0);
5232 
5233       if (STORE_FLAG_VALUE == 1 || normalizep)
5234 	/* If we are supposed to produce a 0/1 value, we want to do
5235 	   a logical shift from the sign bit to the low-order bit; for
5236 	   a -1/0 value, we do an arithmetic shift.  */
5237 	op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5238 			    size_int (GET_MODE_BITSIZE (mode) - 1),
5239 			    subtarget, normalizep != -1);
5240 
5241       if (mode != target_mode)
5242 	op0 = convert_modes (target_mode, mode, op0, 0);
5243 
5244       return op0;
5245     }
5246 
5247   if (icode != CODE_FOR_nothing)
5248     {
5249       insn_operand_predicate_fn pred;
5250 
5251       /* We think we may be able to do this with a scc insn.  Emit the
5252 	 comparison and then the scc insn.  */
5253 
5254       do_pending_stack_adjust ();
5255       last = get_last_insn ();
5256 
5257       comparison
5258 	= compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
5259       if (CONSTANT_P (comparison))
5260 	{
5261 	  switch (GET_CODE (comparison))
5262 	    {
5263 	    case CONST_INT:
5264 	      if (comparison == const0_rtx)
5265 		return const0_rtx;
5266 	      break;
5267 
5268 #ifdef FLOAT_STORE_FLAG_VALUE
5269 	    case CONST_DOUBLE:
5270 	      if (comparison == CONST0_RTX (GET_MODE (comparison)))
5271 		return const0_rtx;
5272 	      break;
5273 #endif
5274 	    default:
5275 	      gcc_unreachable ();
5276 	    }
5277 
5278 	  if (normalizep == 1)
5279 	    return const1_rtx;
5280 	  if (normalizep == -1)
5281 	    return constm1_rtx;
5282 	  return const_true_rtx;
5283 	}
5284 
5285       /* The code of COMPARISON may not match CODE if compare_from_rtx
5286 	 decided to swap its operands and reverse the original code.
5287 
5288 	 We know that compare_from_rtx returns either a CONST_INT or
5289 	 a new comparison code, so it is safe to just extract the
5290 	 code from COMPARISON.  */
5291       code = GET_CODE (comparison);
5292 
5293       /* Get a reference to the target in the proper mode for this insn.  */
5294       compare_mode = insn_data[(int) icode].operand[0].mode;
5295       subtarget = target;
5296       pred = insn_data[(int) icode].operand[0].predicate;
5297       if (optimize || ! (*pred) (subtarget, compare_mode))
5298 	subtarget = gen_reg_rtx (compare_mode);
5299 
5300       pattern = GEN_FCN (icode) (subtarget);
5301       if (pattern)
5302 	{
5303 	  emit_insn (pattern);
5304 
5305 	  /* If we are converting to a wider mode, first convert to
5306 	     TARGET_MODE, then normalize.  This produces better combining
5307 	     opportunities on machines that have a SIGN_EXTRACT when we are
5308 	     testing a single bit.  This mostly benefits the 68k.
5309 
5310 	     If STORE_FLAG_VALUE does not have the sign bit set when
5311 	     interpreted in COMPARE_MODE, we can do this conversion as
5312 	     unsigned, which is usually more efficient.  */
5313 	  if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
5314 	    {
5315 	      convert_move (target, subtarget,
5316 			    (GET_MODE_BITSIZE (compare_mode)
5317 			     <= HOST_BITS_PER_WIDE_INT)
5318 			    && 0 == (STORE_FLAG_VALUE
5319 				     & ((HOST_WIDE_INT) 1
5320 					<< (GET_MODE_BITSIZE (compare_mode) -1))));
5321 	      op0 = target;
5322 	      compare_mode = target_mode;
5323 	    }
5324 	  else
5325 	    op0 = subtarget;
5326 
5327 	  /* If we want to keep subexpressions around, don't reuse our
5328 	     last target.  */
5329 
5330 	  if (optimize)
5331 	    subtarget = 0;
5332 
5333 	  /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
5334 	     we don't have to do anything.  */
5335 	  if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5336 	    ;
5337 	  /* STORE_FLAG_VALUE might be the most negative number, so write
5338 	     the comparison this way to avoid a compiler-time warning.  */
5339 	  else if (- normalizep == STORE_FLAG_VALUE)
5340 	    op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
5341 
5342 	  /* We don't want to use STORE_FLAG_VALUE < 0 below since this
5343 	     makes it hard to use a value of just the sign bit due to
5344 	     ANSI integer constant typing rules.  */
5345 	  else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
5346 		   && (STORE_FLAG_VALUE
5347 		       & ((HOST_WIDE_INT) 1
5348 			  << (GET_MODE_BITSIZE (compare_mode) - 1))))
5349 	    op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
5350 				size_int (GET_MODE_BITSIZE (compare_mode) - 1),
5351 				subtarget, normalizep == 1);
5352 	  else
5353 	    {
5354 	      gcc_assert (STORE_FLAG_VALUE & 1);
5355 
5356 	      op0 = expand_and (compare_mode, op0, const1_rtx, subtarget);
5357 	      if (normalizep == -1)
5358 		op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
5359 	    }
5360 
5361 	  /* If we were converting to a smaller mode, do the
5362 	     conversion now.  */
5363 	  if (target_mode != compare_mode)
5364 	    {
5365 	      convert_move (target, op0, 0);
5366 	      return target;
5367 	    }
5368 	  else
5369 	    return op0;
5370 	}
5371     }
5372 
5373   delete_insns_since (last);
5374 
5375   /* If optimizing, use different pseudo registers for each insn, instead
5376      of reusing the same pseudo.  This leads to better CSE, but slows
5377      down the compiler, since there are more pseudos */
5378   subtarget = (!optimize
5379 	       && (target_mode == mode)) ? target : NULL_RTX;
5380 
5381   /* If we reached here, we can't do this with a scc insn.  However, there
5382      are some comparisons that can be done directly.  For example, if
5383      this is an equality comparison of integers, we can try to exclusive-or
5384      (or subtract) the two operands and use a recursive call to try the
5385      comparison with zero.  Don't do any of these cases if branches are
5386      very cheap.  */
5387 
5388   if (BRANCH_COST > 0
5389       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
5390       && op1 != const0_rtx)
5391     {
5392       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5393 			  OPTAB_WIDEN);
5394 
5395       if (tem == 0)
5396 	tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5397 			    OPTAB_WIDEN);
5398       if (tem != 0)
5399 	tem = emit_store_flag (target, code, tem, const0_rtx,
5400 			       mode, unsignedp, normalizep);
5401       if (tem == 0)
5402 	delete_insns_since (last);
5403       return tem;
5404     }
5405 
5406   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5407      the constant zero.  Reject all other comparisons at this point.  Only
5408      do LE and GT if branches are expensive since they are expensive on
5409      2-operand machines.  */
5410 
5411   if (BRANCH_COST == 0
5412       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
5413       || (code != EQ && code != NE
5414 	  && (BRANCH_COST <= 1 || (code != LE && code != GT))))
5415     return 0;
5416 
5417   /* See what we need to return.  We can only return a 1, -1, or the
5418      sign bit.  */
5419 
5420   if (normalizep == 0)
5421     {
5422       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5423 	normalizep = STORE_FLAG_VALUE;
5424 
5425       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5426 	       && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5427 		   == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5428 	;
5429       else
5430 	return 0;
5431     }
5432 
5433   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5434      do the necessary operation below.  */
5435 
5436   tem = 0;
5437 
5438   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5439      the sign bit set.  */
5440 
5441   if (code == LE)
5442     {
5443       /* This is destructive, so SUBTARGET can't be OP0.  */
5444       if (rtx_equal_p (subtarget, op0))
5445 	subtarget = 0;
5446 
5447       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5448 			  OPTAB_WIDEN);
5449       if (tem)
5450 	tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5451 			    OPTAB_WIDEN);
5452     }
5453 
5454   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5455      number of bits in the mode of OP0, minus one.  */
5456 
5457   if (code == GT)
5458     {
5459       if (rtx_equal_p (subtarget, op0))
5460 	subtarget = 0;
5461 
5462       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5463 			  size_int (GET_MODE_BITSIZE (mode) - 1),
5464 			  subtarget, 0);
5465       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5466 			  OPTAB_WIDEN);
5467     }
5468 
5469   if (code == EQ || code == NE)
5470     {
5471       /* For EQ or NE, one way to do the comparison is to apply an operation
5472 	 that converts the operand into a positive number if it is nonzero
5473 	 or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5474 	 for NE we negate.  This puts the result in the sign bit.  Then we
5475 	 normalize with a shift, if needed.
5476 
5477 	 Two operations that can do the above actions are ABS and FFS, so try
5478 	 them.  If that doesn't work, and MODE is smaller than a full word,
5479 	 we can use zero-extension to the wider mode (an unsigned conversion)
5480 	 as the operation.  */
5481 
5482       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5483 	 that is compensated by the subsequent overflow when subtracting
5484 	 one / negating.  */
5485 
5486       if (abs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5487 	tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5488       else if (ffs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5489 	tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5490       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5491 	{
5492 	  tem = convert_modes (word_mode, mode, op0, 1);
5493 	  mode = word_mode;
5494 	}
5495 
5496       if (tem != 0)
5497 	{
5498 	  if (code == EQ)
5499 	    tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5500 				0, OPTAB_WIDEN);
5501 	  else
5502 	    tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5503 	}
5504 
5505       /* If we couldn't do it that way, for NE we can "or" the two's complement
5506 	 of the value with itself.  For EQ, we take the one's complement of
5507 	 that "or", which is an extra insn, so we only handle EQ if branches
5508 	 are expensive.  */
5509 
5510       if (tem == 0 && (code == NE || BRANCH_COST > 1))
5511 	{
5512 	  if (rtx_equal_p (subtarget, op0))
5513 	    subtarget = 0;
5514 
5515 	  tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5516 	  tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5517 			      OPTAB_WIDEN);
5518 
5519 	  if (tem && code == EQ)
5520 	    tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5521 	}
5522     }
5523 
5524   if (tem && normalizep)
5525     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5526 			size_int (GET_MODE_BITSIZE (mode) - 1),
5527 			subtarget, normalizep == 1);
5528 
5529   if (tem)
5530     {
5531       if (GET_MODE (tem) != target_mode)
5532 	{
5533 	  convert_move (target, tem, 0);
5534 	  tem = target;
5535 	}
5536       else if (!subtarget)
5537 	{
5538 	  emit_move_insn (target, tem);
5539 	  tem = target;
5540 	}
5541     }
5542   else
5543     delete_insns_since (last);
5544 
5545   return tem;
5546 }
5547 
5548 /* Like emit_store_flag, but always succeeds.  */
5549 
5550 rtx
emit_store_flag_force(rtx target,enum rtx_code code,rtx op0,rtx op1,enum machine_mode mode,int unsignedp,int normalizep)5551 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5552 		       enum machine_mode mode, int unsignedp, int normalizep)
5553 {
5554   rtx tem, label;
5555 
5556   /* First see if emit_store_flag can do the job.  */
5557   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5558   if (tem != 0)
5559     return tem;
5560 
5561   if (normalizep == 0)
5562     normalizep = 1;
5563 
5564   /* If this failed, we have to do this with set/compare/jump/set code.  */
5565 
5566   if (!REG_P (target)
5567       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5568     target = gen_reg_rtx (GET_MODE (target));
5569 
5570   emit_move_insn (target, const1_rtx);
5571   label = gen_label_rtx ();
5572   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5573 			   NULL_RTX, label);
5574 
5575   emit_move_insn (target, const0_rtx);
5576   emit_label (label);
5577 
5578   return target;
5579 }
5580 
5581 /* Perform possibly multi-word comparison and conditional jump to LABEL
5582    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5583    now a thin wrapper around do_compare_rtx_and_jump.  */
5584 
5585 static void
do_cmp_and_jump(rtx arg1,rtx arg2,enum rtx_code op,enum machine_mode mode,rtx label)5586 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5587 		 rtx label)
5588 {
5589   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5590   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5591 			   NULL_RTX, NULL_RTX, label);
5592 }
5593