1 /* Medium-level subroutines: convert bit-field store and extract
2    and shifts, multiplies and divides to rtl instructions.
3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4    1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006
5    Free Software Foundation, Inc.
6 
7 This file is part of GCC.
8 
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 2, or (at your option) any later
12 version.
13 
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17 for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING.  If not, write to the Free
21 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
22 02110-1301, USA.  */
23 
24 
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "toplev.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "tm_p.h"
33 #include "flags.h"
34 #include "insn-config.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "real.h"
38 #include "recog.h"
39 #include "langhooks.h"
40 
41 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
42 				   unsigned HOST_WIDE_INT,
43 				   unsigned HOST_WIDE_INT, rtx);
44 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
45 				   unsigned HOST_WIDE_INT, rtx);
46 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
47 				    unsigned HOST_WIDE_INT,
48 				    unsigned HOST_WIDE_INT,
49 				    unsigned HOST_WIDE_INT, rtx, int);
50 static rtx mask_rtx (enum machine_mode, int, int, int);
51 static rtx lshift_value (enum machine_mode, rtx, int, int);
52 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
53 				    unsigned HOST_WIDE_INT, int);
54 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
55 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
56 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
57 
58 /* Test whether a value is zero of a power of two.  */
59 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
60 
61 /* Nonzero means divides or modulus operations are relatively cheap for
62    powers of two, so don't use branches; emit the operation instead.
63    Usually, this will mean that the MD file will emit non-branch
64    sequences.  */
65 
66 static bool sdiv_pow2_cheap[NUM_MACHINE_MODES];
67 static bool smod_pow2_cheap[NUM_MACHINE_MODES];
68 
69 #ifndef SLOW_UNALIGNED_ACCESS
70 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
71 #endif
72 
73 /* For compilers that support multiple targets with different word sizes,
74    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
75    is the H8/300(H) compiler.  */
76 
77 #ifndef MAX_BITS_PER_WORD
78 #define MAX_BITS_PER_WORD BITS_PER_WORD
79 #endif
80 
81 /* Reduce conditional compilation elsewhere.  */
82 #ifndef HAVE_insv
83 #define HAVE_insv	0
84 #define CODE_FOR_insv	CODE_FOR_nothing
85 #define gen_insv(a,b,c,d) NULL_RTX
86 #endif
87 #ifndef HAVE_extv
88 #define HAVE_extv	0
89 #define CODE_FOR_extv	CODE_FOR_nothing
90 #define gen_extv(a,b,c,d) NULL_RTX
91 #endif
92 #ifndef HAVE_extzv
93 #define HAVE_extzv	0
94 #define CODE_FOR_extzv	CODE_FOR_nothing
95 #define gen_extzv(a,b,c,d) NULL_RTX
96 #endif
97 
98 /* Cost of various pieces of RTL.  Note that some of these are indexed by
99    shift count and some by mode.  */
100 static int zero_cost;
101 static int add_cost[NUM_MACHINE_MODES];
102 static int neg_cost[NUM_MACHINE_MODES];
103 static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
104 static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
105 static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
106 static int mul_cost[NUM_MACHINE_MODES];
107 static int sdiv_cost[NUM_MACHINE_MODES];
108 static int udiv_cost[NUM_MACHINE_MODES];
109 static int mul_widen_cost[NUM_MACHINE_MODES];
110 static int mul_highpart_cost[NUM_MACHINE_MODES];
111 
112 void
init_expmed(void)113 init_expmed (void)
114 {
115   struct
116   {
117     struct rtx_def reg;		rtunion reg_fld[2];
118     struct rtx_def plus;	rtunion plus_fld1;
119     struct rtx_def neg;
120     struct rtx_def mult;	rtunion mult_fld1;
121     struct rtx_def sdiv;	rtunion sdiv_fld1;
122     struct rtx_def udiv;	rtunion udiv_fld1;
123     struct rtx_def zext;
124     struct rtx_def sdiv_32;	rtunion sdiv_32_fld1;
125     struct rtx_def smod_32;	rtunion smod_32_fld1;
126     struct rtx_def wide_mult;	rtunion wide_mult_fld1;
127     struct rtx_def wide_lshr;	rtunion wide_lshr_fld1;
128     struct rtx_def wide_trunc;
129     struct rtx_def shift;	rtunion shift_fld1;
130     struct rtx_def shift_mult;	rtunion shift_mult_fld1;
131     struct rtx_def shift_add;	rtunion shift_add_fld1;
132     struct rtx_def shift_sub;	rtunion shift_sub_fld1;
133   } all;
134 
135   rtx pow2[MAX_BITS_PER_WORD];
136   rtx cint[MAX_BITS_PER_WORD];
137   int m, n;
138   enum machine_mode mode, wider_mode;
139 
140   zero_cost = rtx_cost (const0_rtx, 0);
141 
142   for (m = 1; m < MAX_BITS_PER_WORD; m++)
143     {
144       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
145       cint[m] = GEN_INT (m);
146     }
147 
148   memset (&all, 0, sizeof all);
149 
150   PUT_CODE (&all.reg, REG);
151   /* Avoid using hard regs in ways which may be unsupported.  */
152   REGNO (&all.reg) = LAST_VIRTUAL_REGISTER + 1;
153 
154   PUT_CODE (&all.plus, PLUS);
155   XEXP (&all.plus, 0) = &all.reg;
156   XEXP (&all.plus, 1) = &all.reg;
157 
158   PUT_CODE (&all.neg, NEG);
159   XEXP (&all.neg, 0) = &all.reg;
160 
161   PUT_CODE (&all.mult, MULT);
162   XEXP (&all.mult, 0) = &all.reg;
163   XEXP (&all.mult, 1) = &all.reg;
164 
165   PUT_CODE (&all.sdiv, DIV);
166   XEXP (&all.sdiv, 0) = &all.reg;
167   XEXP (&all.sdiv, 1) = &all.reg;
168 
169   PUT_CODE (&all.udiv, UDIV);
170   XEXP (&all.udiv, 0) = &all.reg;
171   XEXP (&all.udiv, 1) = &all.reg;
172 
173   PUT_CODE (&all.sdiv_32, DIV);
174   XEXP (&all.sdiv_32, 0) = &all.reg;
175   XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
176 
177   PUT_CODE (&all.smod_32, MOD);
178   XEXP (&all.smod_32, 0) = &all.reg;
179   XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
180 
181   PUT_CODE (&all.zext, ZERO_EXTEND);
182   XEXP (&all.zext, 0) = &all.reg;
183 
184   PUT_CODE (&all.wide_mult, MULT);
185   XEXP (&all.wide_mult, 0) = &all.zext;
186   XEXP (&all.wide_mult, 1) = &all.zext;
187 
188   PUT_CODE (&all.wide_lshr, LSHIFTRT);
189   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
190 
191   PUT_CODE (&all.wide_trunc, TRUNCATE);
192   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
193 
194   PUT_CODE (&all.shift, ASHIFT);
195   XEXP (&all.shift, 0) = &all.reg;
196 
197   PUT_CODE (&all.shift_mult, MULT);
198   XEXP (&all.shift_mult, 0) = &all.reg;
199 
200   PUT_CODE (&all.shift_add, PLUS);
201   XEXP (&all.shift_add, 0) = &all.shift_mult;
202   XEXP (&all.shift_add, 1) = &all.reg;
203 
204   PUT_CODE (&all.shift_sub, MINUS);
205   XEXP (&all.shift_sub, 0) = &all.shift_mult;
206   XEXP (&all.shift_sub, 1) = &all.reg;
207 
208   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
209        mode != VOIDmode;
210        mode = GET_MODE_WIDER_MODE (mode))
211     {
212       PUT_MODE (&all.reg, mode);
213       PUT_MODE (&all.plus, mode);
214       PUT_MODE (&all.neg, mode);
215       PUT_MODE (&all.mult, mode);
216       PUT_MODE (&all.sdiv, mode);
217       PUT_MODE (&all.udiv, mode);
218       PUT_MODE (&all.sdiv_32, mode);
219       PUT_MODE (&all.smod_32, mode);
220       PUT_MODE (&all.wide_trunc, mode);
221       PUT_MODE (&all.shift, mode);
222       PUT_MODE (&all.shift_mult, mode);
223       PUT_MODE (&all.shift_add, mode);
224       PUT_MODE (&all.shift_sub, mode);
225 
226       add_cost[mode] = rtx_cost (&all.plus, SET);
227       neg_cost[mode] = rtx_cost (&all.neg, SET);
228       mul_cost[mode] = rtx_cost (&all.mult, SET);
229       sdiv_cost[mode] = rtx_cost (&all.sdiv, SET);
230       udiv_cost[mode] = rtx_cost (&all.udiv, SET);
231 
232       sdiv_pow2_cheap[mode] = (rtx_cost (&all.sdiv_32, SET)
233 			       <= 2 * add_cost[mode]);
234       smod_pow2_cheap[mode] = (rtx_cost (&all.smod_32, SET)
235 			       <= 4 * add_cost[mode]);
236 
237       wider_mode = GET_MODE_WIDER_MODE (mode);
238       if (wider_mode != VOIDmode)
239 	{
240 	  PUT_MODE (&all.zext, wider_mode);
241 	  PUT_MODE (&all.wide_mult, wider_mode);
242 	  PUT_MODE (&all.wide_lshr, wider_mode);
243 	  XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
244 
245 	  mul_widen_cost[wider_mode] = rtx_cost (&all.wide_mult, SET);
246 	  mul_highpart_cost[mode] = rtx_cost (&all.wide_trunc, SET);
247 	}
248 
249       shift_cost[mode][0] = 0;
250       shiftadd_cost[mode][0] = shiftsub_cost[mode][0] = add_cost[mode];
251 
252       n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
253       for (m = 1; m < n; m++)
254 	{
255 	  XEXP (&all.shift, 1) = cint[m];
256 	  XEXP (&all.shift_mult, 1) = pow2[m];
257 
258 	  shift_cost[mode][m] = rtx_cost (&all.shift, SET);
259 	  shiftadd_cost[mode][m] = rtx_cost (&all.shift_add, SET);
260 	  shiftsub_cost[mode][m] = rtx_cost (&all.shift_sub, SET);
261 	}
262     }
263 }
264 
265 /* Return an rtx representing minus the value of X.
266    MODE is the intended mode of the result,
267    useful if X is a CONST_INT.  */
268 
269 rtx
negate_rtx(enum machine_mode mode,rtx x)270 negate_rtx (enum machine_mode mode, rtx x)
271 {
272   rtx result = simplify_unary_operation (NEG, mode, x, mode);
273 
274   if (result == 0)
275     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
276 
277   return result;
278 }
279 
280 /* Report on the availability of insv/extv/extzv and the desired mode
281    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
282    is false; else the mode of the specified operand.  If OPNO is -1,
283    all the caller cares about is whether the insn is available.  */
284 enum machine_mode
mode_for_extraction(enum extraction_pattern pattern,int opno)285 mode_for_extraction (enum extraction_pattern pattern, int opno)
286 {
287   const struct insn_data *data;
288 
289   switch (pattern)
290     {
291     case EP_insv:
292       if (HAVE_insv)
293 	{
294 	  data = &insn_data[CODE_FOR_insv];
295 	  break;
296 	}
297       return MAX_MACHINE_MODE;
298 
299     case EP_extv:
300       if (HAVE_extv)
301 	{
302 	  data = &insn_data[CODE_FOR_extv];
303 	  break;
304 	}
305       return MAX_MACHINE_MODE;
306 
307     case EP_extzv:
308       if (HAVE_extzv)
309 	{
310 	  data = &insn_data[CODE_FOR_extzv];
311 	  break;
312 	}
313       return MAX_MACHINE_MODE;
314 
315     default:
316       gcc_unreachable ();
317     }
318 
319   if (opno == -1)
320     return VOIDmode;
321 
322   /* Everyone who uses this function used to follow it with
323      if (result == VOIDmode) result = word_mode; */
324   if (data->operand[opno].mode == VOIDmode)
325     return word_mode;
326   return data->operand[opno].mode;
327 }
328 
329 
330 /* Generate code to store value from rtx VALUE
331    into a bit-field within structure STR_RTX
332    containing BITSIZE bits starting at bit BITNUM.
333    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
334    ALIGN is the alignment that STR_RTX is known to have.
335    TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
336 
337 /* ??? Note that there are two different ideas here for how
338    to determine the size to count bits within, for a register.
339    One is BITS_PER_WORD, and the other is the size of operand 3
340    of the insv pattern.
341 
342    If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
343    else, we use the mode of operand 3.  */
344 
345 rtx
store_bit_field(rtx str_rtx,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,enum machine_mode fieldmode,rtx value)346 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
347 		 unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
348 		 rtx value)
349 {
350   unsigned int unit
351     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
352   unsigned HOST_WIDE_INT offset, bitpos;
353   rtx op0 = str_rtx;
354   int byte_offset;
355   rtx orig_value;
356 
357   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
358 
359   while (GET_CODE (op0) == SUBREG)
360     {
361       /* The following line once was done only if WORDS_BIG_ENDIAN,
362 	 but I think that is a mistake.  WORDS_BIG_ENDIAN is
363 	 meaningful at a much higher level; when structures are copied
364 	 between memory and regs, the higher-numbered regs
365 	 always get higher addresses.  */
366       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
367       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
368 
369       byte_offset = 0;
370 
371       /* Paradoxical subregs need special handling on big endian machines.  */
372       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
373 	{
374 	  int difference = inner_mode_size - outer_mode_size;
375 
376 	  if (WORDS_BIG_ENDIAN)
377 	    byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
378 	  if (BYTES_BIG_ENDIAN)
379 	    byte_offset += difference % UNITS_PER_WORD;
380 	}
381       else
382 	byte_offset = SUBREG_BYTE (op0);
383 
384       bitnum += byte_offset * BITS_PER_UNIT;
385       op0 = SUBREG_REG (op0);
386     }
387 
388   /* No action is needed if the target is a register and if the field
389      lies completely outside that register.  This can occur if the source
390      code contains an out-of-bounds access to a small array.  */
391   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
392     return value;
393 
394   /* Use vec_set patterns for inserting parts of vectors whenever
395      available.  */
396   if (VECTOR_MODE_P (GET_MODE (op0))
397       && !MEM_P (op0)
398       && (vec_set_optab->handlers[GET_MODE (op0)].insn_code
399 	  != CODE_FOR_nothing)
400       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
401       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
402       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
403     {
404       enum machine_mode outermode = GET_MODE (op0);
405       enum machine_mode innermode = GET_MODE_INNER (outermode);
406       int icode = (int) vec_set_optab->handlers[outermode].insn_code;
407       int pos = bitnum / GET_MODE_BITSIZE (innermode);
408       rtx rtxpos = GEN_INT (pos);
409       rtx src = value;
410       rtx dest = op0;
411       rtx pat, seq;
412       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
413       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
414       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
415 
416       start_sequence ();
417 
418       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
419 	src = copy_to_mode_reg (mode1, src);
420 
421       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
422 	rtxpos = copy_to_mode_reg (mode1, rtxpos);
423 
424       /* We could handle this, but we should always be called with a pseudo
425 	 for our targets and all insns should take them as outputs.  */
426       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
427 		  && (*insn_data[icode].operand[1].predicate) (src, mode1)
428 		  && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
429       pat = GEN_FCN (icode) (dest, src, rtxpos);
430       seq = get_insns ();
431       end_sequence ();
432       if (pat)
433 	{
434 	  emit_insn (seq);
435 	  emit_insn (pat);
436 	  return dest;
437 	}
438     }
439 
440   /* If the target is a register, overwriting the entire object, or storing
441      a full-word or multi-word field can be done with just a SUBREG.
442 
443      If the target is memory, storing any naturally aligned field can be
444      done with a simple store.  For targets that support fast unaligned
445      memory, any naturally sized, unit aligned field can be done directly.  */
446 
447   offset = bitnum / unit;
448   bitpos = bitnum % unit;
449   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
450                 + (offset * UNITS_PER_WORD);
451 
452   if (bitpos == 0
453       && bitsize == GET_MODE_BITSIZE (fieldmode)
454       && (!MEM_P (op0)
455 	  ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
456 	     || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
457 	     && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
458 	  : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
459 	     || (offset * BITS_PER_UNIT % bitsize == 0
460 		 && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
461     {
462       if (MEM_P (op0))
463 	op0 = adjust_address (op0, fieldmode, offset);
464       else if (GET_MODE (op0) != fieldmode)
465 	op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
466 				   byte_offset);
467       emit_move_insn (op0, value);
468       return value;
469     }
470 
471   /* Make sure we are playing with integral modes.  Pun with subregs
472      if we aren't.  This must come after the entire register case above,
473      since that case is valid for any mode.  The following cases are only
474      valid for integral modes.  */
475   {
476     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
477     if (imode != GET_MODE (op0))
478       {
479 	if (MEM_P (op0))
480 	  op0 = adjust_address (op0, imode, 0);
481 	else
482 	  {
483 	    gcc_assert (imode != BLKmode);
484 	    op0 = gen_lowpart (imode, op0);
485 	  }
486       }
487   }
488 
489   /* We may be accessing data outside the field, which means
490      we can alias adjacent data.  */
491   if (MEM_P (op0))
492     {
493       op0 = shallow_copy_rtx (op0);
494       set_mem_alias_set (op0, 0);
495       set_mem_expr (op0, 0);
496     }
497 
498   /* If OP0 is a register, BITPOS must count within a word.
499      But as we have it, it counts within whatever size OP0 now has.
500      On a bigendian machine, these are not the same, so convert.  */
501   if (BYTES_BIG_ENDIAN
502       && !MEM_P (op0)
503       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
504     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
505 
506   /* Storing an lsb-aligned field in a register
507      can be done with a movestrict instruction.  */
508 
509   if (!MEM_P (op0)
510       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
511       && bitsize == GET_MODE_BITSIZE (fieldmode)
512       && (movstrict_optab->handlers[fieldmode].insn_code
513 	  != CODE_FOR_nothing))
514     {
515       int icode = movstrict_optab->handlers[fieldmode].insn_code;
516 
517       /* Get appropriate low part of the value being stored.  */
518       if (GET_CODE (value) == CONST_INT || REG_P (value))
519 	value = gen_lowpart (fieldmode, value);
520       else if (!(GET_CODE (value) == SYMBOL_REF
521 		 || GET_CODE (value) == LABEL_REF
522 		 || GET_CODE (value) == CONST))
523 	value = convert_to_mode (fieldmode, value, 0);
524 
525       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
526 	value = copy_to_mode_reg (fieldmode, value);
527 
528       if (GET_CODE (op0) == SUBREG)
529 	{
530 	  /* Else we've got some float mode source being extracted into
531 	     a different float mode destination -- this combination of
532 	     subregs results in Severe Tire Damage.  */
533 	  gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode
534 		      || GET_MODE_CLASS (fieldmode) == MODE_INT
535 		      || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
536 	  op0 = SUBREG_REG (op0);
537 	}
538 
539       emit_insn (GEN_FCN (icode)
540 		 (gen_rtx_SUBREG (fieldmode, op0,
541 				  (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
542 				  + (offset * UNITS_PER_WORD)),
543 				  value));
544 
545       return value;
546     }
547 
548   /* Handle fields bigger than a word.  */
549 
550   if (bitsize > BITS_PER_WORD)
551     {
552       /* Here we transfer the words of the field
553 	 in the order least significant first.
554 	 This is because the most significant word is the one which may
555 	 be less than full.
556 	 However, only do that if the value is not BLKmode.  */
557 
558       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
559       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
560       unsigned int i;
561 
562       /* This is the mode we must force value to, so that there will be enough
563 	 subwords to extract.  Note that fieldmode will often (always?) be
564 	 VOIDmode, because that is what store_field uses to indicate that this
565 	 is a bit field, but passing VOIDmode to operand_subword_force
566 	 is not allowed.  */
567       fieldmode = GET_MODE (value);
568       if (fieldmode == VOIDmode)
569 	fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
570 
571       for (i = 0; i < nwords; i++)
572 	{
573 	  /* If I is 0, use the low-order word in both field and target;
574 	     if I is 1, use the next to lowest word; and so on.  */
575 	  unsigned int wordnum = (backwards ? nwords - i - 1 : i);
576 	  unsigned int bit_offset = (backwards
577 				     ? MAX ((int) bitsize - ((int) i + 1)
578 					    * BITS_PER_WORD,
579 					    0)
580 				     : (int) i * BITS_PER_WORD);
581 
582 	  store_bit_field (op0, MIN (BITS_PER_WORD,
583 				     bitsize - i * BITS_PER_WORD),
584 			   bitnum + bit_offset, word_mode,
585 			   operand_subword_force (value, wordnum, fieldmode));
586 	}
587       return value;
588     }
589 
590   /* From here on we can assume that the field to be stored in is
591      a full-word (whatever type that is), since it is shorter than a word.  */
592 
593   /* OFFSET is the number of words or bytes (UNIT says which)
594      from STR_RTX to the first word or byte containing part of the field.  */
595 
596   if (!MEM_P (op0))
597     {
598       if (offset != 0
599 	  || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
600 	{
601 	  if (!REG_P (op0))
602 	    {
603 	      /* Since this is a destination (lvalue), we can't copy
604 		 it to a pseudo.  We can remove a SUBREG that does not
605 		 change the size of the operand.  Such a SUBREG may
606 		 have been added above.  */
607 	      gcc_assert (GET_CODE (op0) == SUBREG
608 			  && (GET_MODE_SIZE (GET_MODE (op0))
609 			      == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
610 	      op0 = SUBREG_REG (op0);
611 	    }
612 	  op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
613 		                op0, (offset * UNITS_PER_WORD));
614 	}
615       offset = 0;
616     }
617 
618   /* If VALUE has a floating-point or complex mode, access it as an
619      integer of the corresponding size.  This can occur on a machine
620      with 64 bit registers that uses SFmode for float.  It can also
621      occur for unaligned float or complex fields.  */
622   orig_value = value;
623   if (GET_MODE (value) != VOIDmode
624       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
625       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
626     {
627       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
628       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
629     }
630 
631   /* Now OFFSET is nonzero only if OP0 is memory
632      and is therefore always measured in bytes.  */
633 
634   if (HAVE_insv
635       && GET_MODE (value) != BLKmode
636       && bitsize > 0
637       && GET_MODE_BITSIZE (op_mode) >= bitsize
638       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
639 	    && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
640       && insn_data[CODE_FOR_insv].operand[1].predicate (GEN_INT (bitsize),
641 							VOIDmode))
642     {
643       int xbitpos = bitpos;
644       rtx value1;
645       rtx xop0 = op0;
646       rtx last = get_last_insn ();
647       rtx pat;
648       enum machine_mode maxmode = mode_for_extraction (EP_insv, 3);
649       int save_volatile_ok = volatile_ok;
650 
651       volatile_ok = 1;
652 
653       /* If this machine's insv can only insert into a register, copy OP0
654 	 into a register and save it back later.  */
655       if (MEM_P (op0)
656 	  && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
657 		(op0, VOIDmode)))
658 	{
659 	  rtx tempreg;
660 	  enum machine_mode bestmode;
661 
662 	  /* Get the mode to use for inserting into this field.  If OP0 is
663 	     BLKmode, get the smallest mode consistent with the alignment. If
664 	     OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
665 	     mode. Otherwise, use the smallest mode containing the field.  */
666 
667 	  if (GET_MODE (op0) == BLKmode
668 	      || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
669 	    bestmode
670 	      = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), maxmode,
671 			       MEM_VOLATILE_P (op0));
672 	  else
673 	    bestmode = GET_MODE (op0);
674 
675 	  if (bestmode == VOIDmode
676 	      || GET_MODE_SIZE (bestmode) < GET_MODE_SIZE (fieldmode)
677 	      || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
678 		  && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
679 	    goto insv_loses;
680 
681 	  /* Adjust address to point to the containing unit of that mode.
682 	     Compute offset as multiple of this unit, counting in bytes.  */
683 	  unit = GET_MODE_BITSIZE (bestmode);
684 	  offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
685 	  bitpos = bitnum % unit;
686 	  op0 = adjust_address (op0, bestmode,  offset);
687 
688 	  /* Fetch that unit, store the bitfield in it, then store
689 	     the unit.  */
690 	  tempreg = copy_to_reg (op0);
691 	  store_bit_field (tempreg, bitsize, bitpos, fieldmode, orig_value);
692 	  emit_move_insn (op0, tempreg);
693 	  return value;
694 	}
695       volatile_ok = save_volatile_ok;
696 
697       /* Add OFFSET into OP0's address.  */
698       if (MEM_P (xop0))
699 	xop0 = adjust_address (xop0, byte_mode, offset);
700 
701       /* If xop0 is a register, we need it in MAXMODE
702 	 to make it acceptable to the format of insv.  */
703       if (GET_CODE (xop0) == SUBREG)
704 	/* We can't just change the mode, because this might clobber op0,
705 	   and we will need the original value of op0 if insv fails.  */
706 	xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
707       if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
708 	xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
709 
710       /* On big-endian machines, we count bits from the most significant.
711 	 If the bit field insn does not, we must invert.  */
712 
713       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
714 	xbitpos = unit - bitsize - xbitpos;
715 
716       /* We have been counting XBITPOS within UNIT.
717 	 Count instead within the size of the register.  */
718       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
719 	xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
720 
721       unit = GET_MODE_BITSIZE (maxmode);
722 
723       /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
724       value1 = value;
725       if (GET_MODE (value) != maxmode)
726 	{
727 	  if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
728 	    {
729 	      /* Optimization: Don't bother really extending VALUE
730 		 if it has all the bits we will actually use.  However,
731 		 if we must narrow it, be sure we do it correctly.  */
732 
733 	      if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
734 		{
735 		  rtx tmp;
736 
737 		  tmp = simplify_subreg (maxmode, value1, GET_MODE (value), 0);
738 		  if (! tmp)
739 		    tmp = simplify_gen_subreg (maxmode,
740 					       force_reg (GET_MODE (value),
741 							  value1),
742 					       GET_MODE (value), 0);
743 		  value1 = tmp;
744 		}
745 	      else
746 		value1 = gen_lowpart (maxmode, value1);
747 	    }
748 	  else if (GET_CODE (value) == CONST_INT)
749 	    value1 = gen_int_mode (INTVAL (value), maxmode);
750 	  else
751 	    /* Parse phase is supposed to make VALUE's data type
752 	       match that of the component reference, which is a type
753 	       at least as wide as the field; so VALUE should have
754 	       a mode that corresponds to that type.  */
755 	    gcc_assert (CONSTANT_P (value));
756 	}
757 
758       /* If this machine's insv insists on a register,
759 	 get VALUE1 into a register.  */
760       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
761 	     (value1, maxmode)))
762 	value1 = force_reg (maxmode, value1);
763 
764       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
765       if (pat)
766 	emit_insn (pat);
767       else
768 	{
769 	  delete_insns_since (last);
770 	  store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
771 	}
772     }
773   else
774     insv_loses:
775     /* Insv is not available; store using shifts and boolean ops.  */
776     store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
777   return value;
778 }
779 
780 /* Use shifts and boolean operations to store VALUE
781    into a bit field of width BITSIZE
782    in a memory location specified by OP0 except offset by OFFSET bytes.
783      (OFFSET must be 0 if OP0 is a register.)
784    The field starts at position BITPOS within the byte.
785     (If OP0 is a register, it may be a full word or a narrower mode,
786      but BITPOS still counts within a full word,
787      which is significant on bigendian machines.)  */
788 
789 static void
store_fixed_bit_field(rtx op0,unsigned HOST_WIDE_INT offset,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitpos,rtx value)790 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
791 		       unsigned HOST_WIDE_INT bitsize,
792 		       unsigned HOST_WIDE_INT bitpos, rtx value)
793 {
794   enum machine_mode mode;
795   unsigned int total_bits = BITS_PER_WORD;
796   rtx temp;
797   int all_zero = 0;
798   int all_one = 0;
799 
800   /* There is a case not handled here:
801      a structure with a known alignment of just a halfword
802      and a field split across two aligned halfwords within the structure.
803      Or likewise a structure with a known alignment of just a byte
804      and a field split across two bytes.
805      Such cases are not supposed to be able to occur.  */
806 
807   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
808     {
809       gcc_assert (!offset);
810       /* Special treatment for a bit field split across two registers.  */
811       if (bitsize + bitpos > BITS_PER_WORD)
812 	{
813 	  store_split_bit_field (op0, bitsize, bitpos, value);
814 	  return;
815 	}
816     }
817   else
818     {
819       /* Get the proper mode to use for this field.  We want a mode that
820 	 includes the entire field.  If such a mode would be larger than
821 	 a word, we won't be doing the extraction the normal way.
822 	 We don't want a mode bigger than the destination.  */
823 
824       mode = GET_MODE (op0);
825       if (GET_MODE_BITSIZE (mode) == 0
826 	  || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
827 	mode = word_mode;
828       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
829 			    MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
830 
831       if (mode == VOIDmode)
832 	{
833 	  /* The only way this should occur is if the field spans word
834 	     boundaries.  */
835 	  store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
836 				 value);
837 	  return;
838 	}
839 
840       total_bits = GET_MODE_BITSIZE (mode);
841 
842       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
843 	 be in the range 0 to total_bits-1, and put any excess bytes in
844 	 OFFSET.  */
845       if (bitpos >= total_bits)
846 	{
847 	  offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
848 	  bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
849 		     * BITS_PER_UNIT);
850 	}
851 
852       /* Get ref to an aligned byte, halfword, or word containing the field.
853 	 Adjust BITPOS to be position within a word,
854 	 and OFFSET to be the offset of that word.
855 	 Then alter OP0 to refer to that word.  */
856       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
857       offset -= (offset % (total_bits / BITS_PER_UNIT));
858       op0 = adjust_address (op0, mode, offset);
859     }
860 
861   mode = GET_MODE (op0);
862 
863   /* Now MODE is either some integral mode for a MEM as OP0,
864      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
865      The bit field is contained entirely within OP0.
866      BITPOS is the starting bit number within OP0.
867      (OP0's mode may actually be narrower than MODE.)  */
868 
869   if (BYTES_BIG_ENDIAN)
870       /* BITPOS is the distance between our msb
871 	 and that of the containing datum.
872 	 Convert it to the distance from the lsb.  */
873       bitpos = total_bits - bitsize - bitpos;
874 
875   /* Now BITPOS is always the distance between our lsb
876      and that of OP0.  */
877 
878   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
879      we must first convert its mode to MODE.  */
880 
881   if (GET_CODE (value) == CONST_INT)
882     {
883       HOST_WIDE_INT v = INTVAL (value);
884 
885       if (bitsize < HOST_BITS_PER_WIDE_INT)
886 	v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
887 
888       if (v == 0)
889 	all_zero = 1;
890       else if ((bitsize < HOST_BITS_PER_WIDE_INT
891 		&& v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
892 	       || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
893 	all_one = 1;
894 
895       value = lshift_value (mode, value, bitpos, bitsize);
896     }
897   else
898     {
899       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
900 		      && bitpos + bitsize != GET_MODE_BITSIZE (mode));
901 
902       if (GET_MODE (value) != mode)
903 	{
904 	  if ((REG_P (value) || GET_CODE (value) == SUBREG)
905 	      && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
906 	    value = gen_lowpart (mode, value);
907 	  else
908 	    value = convert_to_mode (mode, value, 1);
909 	}
910 
911       if (must_and)
912 	value = expand_binop (mode, and_optab, value,
913 			      mask_rtx (mode, 0, bitsize, 0),
914 			      NULL_RTX, 1, OPTAB_LIB_WIDEN);
915       if (bitpos > 0)
916 	value = expand_shift (LSHIFT_EXPR, mode, value,
917 			      build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
918     }
919 
920   /* Now clear the chosen bits in OP0,
921      except that if VALUE is -1 we need not bother.  */
922   /* We keep the intermediates in registers to allow CSE to combine
923      consecutive bitfield assignments.  */
924 
925   temp = force_reg (mode, op0);
926 
927   if (! all_one)
928     {
929       temp = expand_binop (mode, and_optab, temp,
930 			   mask_rtx (mode, bitpos, bitsize, 1),
931 			   NULL_RTX, 1, OPTAB_LIB_WIDEN);
932       temp = force_reg (mode, temp);
933     }
934 
935   /* Now logical-or VALUE into OP0, unless it is zero.  */
936 
937   if (! all_zero)
938     {
939       temp = expand_binop (mode, ior_optab, temp, value,
940 			   NULL_RTX, 1, OPTAB_LIB_WIDEN);
941       temp = force_reg (mode, temp);
942     }
943 
944   if (op0 != temp)
945     emit_move_insn (op0, temp);
946 }
947 
948 /* Store a bit field that is split across multiple accessible memory objects.
949 
950    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
951    BITSIZE is the field width; BITPOS the position of its first bit
952    (within the word).
953    VALUE is the value to store.
954 
955    This does not yet handle fields wider than BITS_PER_WORD.  */
956 
957 static void
store_split_bit_field(rtx op0,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitpos,rtx value)958 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
959 		       unsigned HOST_WIDE_INT bitpos, rtx value)
960 {
961   unsigned int unit;
962   unsigned int bitsdone = 0;
963 
964   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
965      much at a time.  */
966   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
967     unit = BITS_PER_WORD;
968   else
969     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
970 
971   /* If VALUE is a constant other than a CONST_INT, get it into a register in
972      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
973      that VALUE might be a floating-point constant.  */
974   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
975     {
976       rtx word = gen_lowpart_common (word_mode, value);
977 
978       if (word && (value != word))
979 	value = word;
980       else
981 	value = gen_lowpart_common (word_mode,
982 				    force_reg (GET_MODE (value) != VOIDmode
983 					       ? GET_MODE (value)
984 					       : word_mode, value));
985     }
986 
987   while (bitsdone < bitsize)
988     {
989       unsigned HOST_WIDE_INT thissize;
990       rtx part, word;
991       unsigned HOST_WIDE_INT thispos;
992       unsigned HOST_WIDE_INT offset;
993 
994       offset = (bitpos + bitsdone) / unit;
995       thispos = (bitpos + bitsdone) % unit;
996 
997       /* THISSIZE must not overrun a word boundary.  Otherwise,
998 	 store_fixed_bit_field will call us again, and we will mutually
999 	 recurse forever.  */
1000       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1001       thissize = MIN (thissize, unit - thispos);
1002 
1003       if (BYTES_BIG_ENDIAN)
1004 	{
1005 	  int total_bits;
1006 
1007 	  /* We must do an endian conversion exactly the same way as it is
1008 	     done in extract_bit_field, so that the two calls to
1009 	     extract_fixed_bit_field will have comparable arguments.  */
1010 	  if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1011 	    total_bits = BITS_PER_WORD;
1012 	  else
1013 	    total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1014 
1015 	  /* Fetch successively less significant portions.  */
1016 	  if (GET_CODE (value) == CONST_INT)
1017 	    part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1018 			     >> (bitsize - bitsdone - thissize))
1019 			    & (((HOST_WIDE_INT) 1 << thissize) - 1));
1020 	  else
1021 	    /* The args are chosen so that the last part includes the
1022 	       lsb.  Give extract_bit_field the value it needs (with
1023 	       endianness compensation) to fetch the piece we want.  */
1024 	    part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1025 					    total_bits - bitsize + bitsdone,
1026 					    NULL_RTX, 1);
1027 	}
1028       else
1029 	{
1030 	  /* Fetch successively more significant portions.  */
1031 	  if (GET_CODE (value) == CONST_INT)
1032 	    part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1033 			     >> bitsdone)
1034 			    & (((HOST_WIDE_INT) 1 << thissize) - 1));
1035 	  else
1036 	    part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1037 					    bitsdone, NULL_RTX, 1);
1038 	}
1039 
1040       /* If OP0 is a register, then handle OFFSET here.
1041 
1042 	 When handling multiword bitfields, extract_bit_field may pass
1043 	 down a word_mode SUBREG of a larger REG for a bitfield that actually
1044 	 crosses a word boundary.  Thus, for a SUBREG, we must find
1045 	 the current word starting from the base register.  */
1046       if (GET_CODE (op0) == SUBREG)
1047 	{
1048 	  int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1049 	  word = operand_subword_force (SUBREG_REG (op0), word_offset,
1050 					GET_MODE (SUBREG_REG (op0)));
1051 	  offset = 0;
1052 	}
1053       else if (REG_P (op0))
1054 	{
1055 	  word = operand_subword_force (op0, offset, GET_MODE (op0));
1056 	  offset = 0;
1057 	}
1058       else
1059 	word = op0;
1060 
1061       /* OFFSET is in UNITs, and UNIT is in bits.
1062          store_fixed_bit_field wants offset in bytes.  */
1063       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1064 			     thispos, part);
1065       bitsdone += thissize;
1066     }
1067 }
1068 
1069 /* Generate code to extract a byte-field from STR_RTX
1070    containing BITSIZE bits, starting at BITNUM,
1071    and put it in TARGET if possible (if TARGET is nonzero).
1072    Regardless of TARGET, we return the rtx for where the value is placed.
1073 
1074    STR_RTX is the structure containing the byte (a REG or MEM).
1075    UNSIGNEDP is nonzero if this is an unsigned bit field.
1076    MODE is the natural mode of the field value once extracted.
1077    TMODE is the mode the caller would like the value to have;
1078    but the value may be returned with type MODE instead.
1079 
1080    TOTAL_SIZE is the size in bytes of the containing structure,
1081    or -1 if varying.
1082 
1083    If a TARGET is specified and we can store in it at no extra cost,
1084    we do so, and return TARGET.
1085    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1086    if they are equally easy.  */
1087 
1088 rtx
extract_bit_field(rtx str_rtx,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,int unsignedp,rtx target,enum machine_mode mode,enum machine_mode tmode)1089 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1090 		   unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1091 		   enum machine_mode mode, enum machine_mode tmode)
1092 {
1093   unsigned int unit
1094     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1095   unsigned HOST_WIDE_INT offset, bitpos;
1096   rtx op0 = str_rtx;
1097   rtx spec_target = target;
1098   rtx spec_target_subreg = 0;
1099   enum machine_mode int_mode;
1100   enum machine_mode extv_mode = mode_for_extraction (EP_extv, 0);
1101   enum machine_mode extzv_mode = mode_for_extraction (EP_extzv, 0);
1102   enum machine_mode mode1;
1103   int byte_offset;
1104 
1105   if (tmode == VOIDmode)
1106     tmode = mode;
1107 
1108   while (GET_CODE (op0) == SUBREG)
1109     {
1110       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1111       op0 = SUBREG_REG (op0);
1112     }
1113 
1114   /* If we have an out-of-bounds access to a register, just return an
1115      uninitialized register of the required mode.  This can occur if the
1116      source code contains an out-of-bounds access to a small array.  */
1117   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1118     return gen_reg_rtx (tmode);
1119 
1120   if (REG_P (op0)
1121       && mode == GET_MODE (op0)
1122       && bitnum == 0
1123       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1124     {
1125       /* We're trying to extract a full register from itself.  */
1126       return op0;
1127     }
1128 
1129   /* Use vec_extract patterns for extracting parts of vectors whenever
1130      available.  */
1131   if (VECTOR_MODE_P (GET_MODE (op0))
1132       && !MEM_P (op0)
1133       && (vec_extract_optab->handlers[GET_MODE (op0)].insn_code
1134 	  != CODE_FOR_nothing)
1135       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1136 	  == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1137     {
1138       enum machine_mode outermode = GET_MODE (op0);
1139       enum machine_mode innermode = GET_MODE_INNER (outermode);
1140       int icode = (int) vec_extract_optab->handlers[outermode].insn_code;
1141       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1142       rtx rtxpos = GEN_INT (pos);
1143       rtx src = op0;
1144       rtx dest = NULL, pat, seq;
1145       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1146       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1147       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1148 
1149       if (innermode == tmode || innermode == mode)
1150 	dest = target;
1151 
1152       if (!dest)
1153 	dest = gen_reg_rtx (innermode);
1154 
1155       start_sequence ();
1156 
1157       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1158 	dest = copy_to_mode_reg (mode0, dest);
1159 
1160       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1161 	src = copy_to_mode_reg (mode1, src);
1162 
1163       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1164 	rtxpos = copy_to_mode_reg (mode1, rtxpos);
1165 
1166       /* We could handle this, but we should always be called with a pseudo
1167 	 for our targets and all insns should take them as outputs.  */
1168       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
1169 		  && (*insn_data[icode].operand[1].predicate) (src, mode1)
1170 		  && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
1171 
1172       pat = GEN_FCN (icode) (dest, src, rtxpos);
1173       seq = get_insns ();
1174       end_sequence ();
1175       if (pat)
1176 	{
1177 	  emit_insn (seq);
1178 	  emit_insn (pat);
1179 	  return dest;
1180 	}
1181     }
1182 
1183   /* Make sure we are playing with integral modes.  Pun with subregs
1184      if we aren't.  */
1185   {
1186     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1187     if (imode != GET_MODE (op0))
1188       {
1189 	if (MEM_P (op0))
1190 	  op0 = adjust_address (op0, imode, 0);
1191 	else
1192 	  {
1193 	    gcc_assert (imode != BLKmode);
1194 	    op0 = gen_lowpart (imode, op0);
1195 
1196 	    /* If we got a SUBREG, force it into a register since we
1197 	       aren't going to be able to do another SUBREG on it.  */
1198 	    if (GET_CODE (op0) == SUBREG)
1199 	      op0 = force_reg (imode, op0);
1200 	  }
1201       }
1202   }
1203 
1204   /* We may be accessing data outside the field, which means
1205      we can alias adjacent data.  */
1206   if (MEM_P (op0))
1207     {
1208       op0 = shallow_copy_rtx (op0);
1209       set_mem_alias_set (op0, 0);
1210       set_mem_expr (op0, 0);
1211     }
1212 
1213   /* Extraction of a full-word or multi-word value from a structure
1214      in a register or aligned memory can be done with just a SUBREG.
1215      A subword value in the least significant part of a register
1216      can also be extracted with a SUBREG.  For this, we need the
1217      byte offset of the value in op0.  */
1218 
1219   bitpos = bitnum % unit;
1220   offset = bitnum / unit;
1221   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1222 
1223   /* If OP0 is a register, BITPOS must count within a word.
1224      But as we have it, it counts within whatever size OP0 now has.
1225      On a bigendian machine, these are not the same, so convert.  */
1226   if (BYTES_BIG_ENDIAN
1227       && !MEM_P (op0)
1228       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1229     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1230 
1231   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1232      If that's wrong, the solution is to test for it and set TARGET to 0
1233      if needed.  */
1234 
1235   /* Only scalar integer modes can be converted via subregs.  There is an
1236      additional problem for FP modes here in that they can have a precision
1237      which is different from the size.  mode_for_size uses precision, but
1238      we want a mode based on the size, so we must avoid calling it for FP
1239      modes.  */
1240   mode1  = (SCALAR_INT_MODE_P (tmode)
1241 	    ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1242 	    : mode);
1243 
1244   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1245 	&& bitpos % BITS_PER_WORD == 0)
1246        || (mode1 != BLKmode
1247 	   /* ??? The big endian test here is wrong.  This is correct
1248 	      if the value is in a register, and if mode_for_size is not
1249 	      the same mode as op0.  This causes us to get unnecessarily
1250 	      inefficient code from the Thumb port when -mbig-endian.  */
1251 	   && (BYTES_BIG_ENDIAN
1252 	       ? bitpos + bitsize == BITS_PER_WORD
1253 	       : bitpos == 0)))
1254       && ((!MEM_P (op0)
1255 	   && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1256 				     GET_MODE_BITSIZE (GET_MODE (op0)))
1257 	   && GET_MODE_SIZE (mode1) != 0
1258 	   && byte_offset % GET_MODE_SIZE (mode1) == 0)
1259 	  || (MEM_P (op0)
1260 	      && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1261 		  || (offset * BITS_PER_UNIT % bitsize == 0
1262 		      && MEM_ALIGN (op0) % bitsize == 0)))))
1263     {
1264       if (mode1 != GET_MODE (op0))
1265 	{
1266 	  if (MEM_P (op0))
1267 	    op0 = adjust_address (op0, mode1, offset);
1268 	  else
1269 	    {
1270 	      rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1271 					     byte_offset);
1272 	      if (sub == NULL)
1273 		goto no_subreg_mode_swap;
1274 	      op0 = sub;
1275 	    }
1276 	}
1277       if (mode1 != mode)
1278 	return convert_to_mode (tmode, op0, unsignedp);
1279       return op0;
1280     }
1281  no_subreg_mode_swap:
1282 
1283   /* Handle fields bigger than a word.  */
1284 
1285   if (bitsize > BITS_PER_WORD)
1286     {
1287       /* Here we transfer the words of the field
1288 	 in the order least significant first.
1289 	 This is because the most significant word is the one which may
1290 	 be less than full.  */
1291 
1292       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1293       unsigned int i;
1294 
1295       if (target == 0 || !REG_P (target))
1296 	target = gen_reg_rtx (mode);
1297 
1298       /* Indicate for flow that the entire target reg is being set.  */
1299       emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1300 
1301       for (i = 0; i < nwords; i++)
1302 	{
1303 	  /* If I is 0, use the low-order word in both field and target;
1304 	     if I is 1, use the next to lowest word; and so on.  */
1305 	  /* Word number in TARGET to use.  */
1306 	  unsigned int wordnum
1307 	    = (WORDS_BIG_ENDIAN
1308 	       ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1309 	       : i);
1310 	  /* Offset from start of field in OP0.  */
1311 	  unsigned int bit_offset = (WORDS_BIG_ENDIAN
1312 				     ? MAX (0, ((int) bitsize - ((int) i + 1)
1313 						* (int) BITS_PER_WORD))
1314 				     : (int) i * BITS_PER_WORD);
1315 	  rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1316 	  rtx result_part
1317 	    = extract_bit_field (op0, MIN (BITS_PER_WORD,
1318 					   bitsize - i * BITS_PER_WORD),
1319 				 bitnum + bit_offset, 1, target_part, mode,
1320 				 word_mode);
1321 
1322 	  gcc_assert (target_part);
1323 
1324 	  if (result_part != target_part)
1325 	    emit_move_insn (target_part, result_part);
1326 	}
1327 
1328       if (unsignedp)
1329 	{
1330 	  /* Unless we've filled TARGET, the upper regs in a multi-reg value
1331 	     need to be zero'd out.  */
1332 	  if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1333 	    {
1334 	      unsigned int i, total_words;
1335 
1336 	      total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1337 	      for (i = nwords; i < total_words; i++)
1338 		emit_move_insn
1339 		  (operand_subword (target,
1340 				    WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1341 				    1, VOIDmode),
1342 		   const0_rtx);
1343 	    }
1344 	  return target;
1345 	}
1346 
1347       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1348       target = expand_shift (LSHIFT_EXPR, mode, target,
1349 			     build_int_cst (NULL_TREE,
1350 					    GET_MODE_BITSIZE (mode) - bitsize),
1351 			     NULL_RTX, 0);
1352       return expand_shift (RSHIFT_EXPR, mode, target,
1353 			   build_int_cst (NULL_TREE,
1354 					  GET_MODE_BITSIZE (mode) - bitsize),
1355 			   NULL_RTX, 0);
1356     }
1357 
1358   /* From here on we know the desired field is smaller than a word.  */
1359 
1360   /* Check if there is a correspondingly-sized integer field, so we can
1361      safely extract it as one size of integer, if necessary; then
1362      truncate or extend to the size that is wanted; then use SUBREGs or
1363      convert_to_mode to get one of the modes we really wanted.  */
1364 
1365   int_mode = int_mode_for_mode (tmode);
1366   if (int_mode == BLKmode)
1367     int_mode = int_mode_for_mode (mode);
1368   /* Should probably push op0 out to memory and then do a load.  */
1369   gcc_assert (int_mode != BLKmode);
1370 
1371   /* OFFSET is the number of words or bytes (UNIT says which)
1372      from STR_RTX to the first word or byte containing part of the field.  */
1373   if (!MEM_P (op0))
1374     {
1375       if (offset != 0
1376 	  || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1377 	{
1378 	  if (!REG_P (op0))
1379 	    op0 = copy_to_reg (op0);
1380 	  op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1381 		                op0, (offset * UNITS_PER_WORD));
1382 	}
1383       offset = 0;
1384     }
1385 
1386   /* Now OFFSET is nonzero only for memory operands.  */
1387 
1388   if (unsignedp)
1389     {
1390       if (HAVE_extzv
1391 	  && bitsize > 0
1392 	  && GET_MODE_BITSIZE (extzv_mode) >= bitsize
1393 	  && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1394 		&& (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode))))
1395 	{
1396 	  unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1397 	  rtx bitsize_rtx, bitpos_rtx;
1398 	  rtx last = get_last_insn ();
1399 	  rtx xop0 = op0;
1400 	  rtx xtarget = target;
1401 	  rtx xspec_target = spec_target;
1402 	  rtx xspec_target_subreg = spec_target_subreg;
1403 	  rtx pat;
1404 	  enum machine_mode maxmode = mode_for_extraction (EP_extzv, 0);
1405 
1406 	  if (MEM_P (xop0))
1407 	    {
1408 	      int save_volatile_ok = volatile_ok;
1409 	      volatile_ok = 1;
1410 
1411 	      /* Is the memory operand acceptable?  */
1412 	      if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1413 		     (xop0, GET_MODE (xop0))))
1414 		{
1415 		  /* No, load into a reg and extract from there.  */
1416 		  enum machine_mode bestmode;
1417 
1418 		  /* Get the mode to use for inserting into this field.  If
1419 		     OP0 is BLKmode, get the smallest mode consistent with the
1420 		     alignment. If OP0 is a non-BLKmode object that is no
1421 		     wider than MAXMODE, use its mode. Otherwise, use the
1422 		     smallest mode containing the field.  */
1423 
1424 		  if (GET_MODE (xop0) == BLKmode
1425 		      || (GET_MODE_SIZE (GET_MODE (op0))
1426 			  > GET_MODE_SIZE (maxmode)))
1427 		    bestmode = get_best_mode (bitsize, bitnum,
1428 					      MEM_ALIGN (xop0), maxmode,
1429 					      MEM_VOLATILE_P (xop0));
1430 		  else
1431 		    bestmode = GET_MODE (xop0);
1432 
1433 		  if (bestmode == VOIDmode
1434 		      || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1435 			  && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1436 		    goto extzv_loses;
1437 
1438 		  /* Compute offset as multiple of this unit,
1439 		     counting in bytes.  */
1440 		  unit = GET_MODE_BITSIZE (bestmode);
1441 		  xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1442 		  xbitpos = bitnum % unit;
1443 		  xop0 = adjust_address (xop0, bestmode, xoffset);
1444 
1445 		  /* Make sure register is big enough for the whole field. */
1446 		  if (xoffset * BITS_PER_UNIT + unit
1447 		      < offset * BITS_PER_UNIT + bitsize)
1448 		    goto extzv_loses;
1449 
1450 		  /* Fetch it to a register in that size.  */
1451 		  xop0 = force_reg (bestmode, xop0);
1452 
1453 		  /* XBITPOS counts within UNIT, which is what is expected.  */
1454 		}
1455 	      else
1456 		/* Get ref to first byte containing part of the field.  */
1457 		xop0 = adjust_address (xop0, byte_mode, xoffset);
1458 
1459 	      volatile_ok = save_volatile_ok;
1460 	    }
1461 
1462 	  /* If op0 is a register, we need it in MAXMODE (which is usually
1463 	     SImode). to make it acceptable to the format of extzv.  */
1464 	  if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1465 	    goto extzv_loses;
1466 	  if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1467 	    xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1468 
1469 	  /* On big-endian machines, we count bits from the most significant.
1470 	     If the bit field insn does not, we must invert.  */
1471 	  if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1472 	    xbitpos = unit - bitsize - xbitpos;
1473 
1474 	  /* Now convert from counting within UNIT to counting in MAXMODE.  */
1475 	  if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1476 	    xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1477 
1478 	  unit = GET_MODE_BITSIZE (maxmode);
1479 
1480 	  if (xtarget == 0)
1481 	    xtarget = xspec_target = gen_reg_rtx (tmode);
1482 
1483 	  if (GET_MODE (xtarget) != maxmode)
1484 	    {
1485 	      if (REG_P (xtarget))
1486 		{
1487 		  int wider = (GET_MODE_SIZE (maxmode)
1488 			       > GET_MODE_SIZE (GET_MODE (xtarget)));
1489 		  xtarget = gen_lowpart (maxmode, xtarget);
1490 		  if (wider)
1491 		    xspec_target_subreg = xtarget;
1492 		}
1493 	      else
1494 		xtarget = gen_reg_rtx (maxmode);
1495 	    }
1496 
1497 	  /* If this machine's extzv insists on a register target,
1498 	     make sure we have one.  */
1499 	  if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1500 		 (xtarget, maxmode)))
1501 	    xtarget = gen_reg_rtx (maxmode);
1502 
1503 	  bitsize_rtx = GEN_INT (bitsize);
1504 	  bitpos_rtx = GEN_INT (xbitpos);
1505 
1506 	  pat = gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1507 	  if (pat)
1508 	    {
1509 	      emit_insn (pat);
1510 	      target = xtarget;
1511 	      spec_target = xspec_target;
1512 	      spec_target_subreg = xspec_target_subreg;
1513 	    }
1514 	  else
1515 	    {
1516 	      delete_insns_since (last);
1517 	      target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1518 						bitpos, target, 1);
1519 	    }
1520 	}
1521       else
1522       extzv_loses:
1523 	target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1524 					  bitpos, target, 1);
1525     }
1526   else
1527     {
1528       if (HAVE_extv
1529 	  && bitsize > 0
1530 	  && GET_MODE_BITSIZE (extv_mode) >= bitsize
1531 	  && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1532 		&& (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode))))
1533 	{
1534 	  int xbitpos = bitpos, xoffset = offset;
1535 	  rtx bitsize_rtx, bitpos_rtx;
1536 	  rtx last = get_last_insn ();
1537 	  rtx xop0 = op0, xtarget = target;
1538 	  rtx xspec_target = spec_target;
1539 	  rtx xspec_target_subreg = spec_target_subreg;
1540 	  rtx pat;
1541 	  enum machine_mode maxmode = mode_for_extraction (EP_extv, 0);
1542 
1543 	  if (MEM_P (xop0))
1544 	    {
1545 	      /* Is the memory operand acceptable?  */
1546 	      if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1547 		     (xop0, GET_MODE (xop0))))
1548 		{
1549 		  /* No, load into a reg and extract from there.  */
1550 		  enum machine_mode bestmode;
1551 
1552 		  /* Get the mode to use for inserting into this field.  If
1553 		     OP0 is BLKmode, get the smallest mode consistent with the
1554 		     alignment. If OP0 is a non-BLKmode object that is no
1555 		     wider than MAXMODE, use its mode. Otherwise, use the
1556 		     smallest mode containing the field.  */
1557 
1558 		  if (GET_MODE (xop0) == BLKmode
1559 		      || (GET_MODE_SIZE (GET_MODE (op0))
1560 			  > GET_MODE_SIZE (maxmode)))
1561 		    bestmode = get_best_mode (bitsize, bitnum,
1562 					      MEM_ALIGN (xop0), maxmode,
1563 					      MEM_VOLATILE_P (xop0));
1564 		  else
1565 		    bestmode = GET_MODE (xop0);
1566 
1567 		  if (bestmode == VOIDmode
1568 		      || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1569 			  && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1570 		    goto extv_loses;
1571 
1572 		  /* Compute offset as multiple of this unit,
1573 		     counting in bytes.  */
1574 		  unit = GET_MODE_BITSIZE (bestmode);
1575 		  xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1576 		  xbitpos = bitnum % unit;
1577 		  xop0 = adjust_address (xop0, bestmode, xoffset);
1578 
1579 		  /* Make sure register is big enough for the whole field. */
1580 		  if (xoffset * BITS_PER_UNIT + unit
1581 		      < offset * BITS_PER_UNIT + bitsize)
1582 		    goto extv_loses;
1583 
1584 		  /* Fetch it to a register in that size.  */
1585 		  xop0 = force_reg (bestmode, xop0);
1586 
1587 		  /* XBITPOS counts within UNIT, which is what is expected.  */
1588 		}
1589 	      else
1590 		/* Get ref to first byte containing part of the field.  */
1591 		xop0 = adjust_address (xop0, byte_mode, xoffset);
1592 	    }
1593 
1594 	  /* If op0 is a register, we need it in MAXMODE (which is usually
1595 	     SImode) to make it acceptable to the format of extv.  */
1596 	  if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1597 	    goto extv_loses;
1598 	  if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1599 	    xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1600 
1601 	  /* On big-endian machines, we count bits from the most significant.
1602 	     If the bit field insn does not, we must invert.  */
1603 	  if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1604 	    xbitpos = unit - bitsize - xbitpos;
1605 
1606 	  /* XBITPOS counts within a size of UNIT.
1607 	     Adjust to count within a size of MAXMODE.  */
1608 	  if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1609 	    xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1610 
1611 	  unit = GET_MODE_BITSIZE (maxmode);
1612 
1613 	  if (xtarget == 0)
1614 	    xtarget = xspec_target = gen_reg_rtx (tmode);
1615 
1616 	  if (GET_MODE (xtarget) != maxmode)
1617 	    {
1618 	      if (REG_P (xtarget))
1619 		{
1620 		  int wider = (GET_MODE_SIZE (maxmode)
1621 			       > GET_MODE_SIZE (GET_MODE (xtarget)));
1622 		  xtarget = gen_lowpart (maxmode, xtarget);
1623 		  if (wider)
1624 		    xspec_target_subreg = xtarget;
1625 		}
1626 	      else
1627 		xtarget = gen_reg_rtx (maxmode);
1628 	    }
1629 
1630 	  /* If this machine's extv insists on a register target,
1631 	     make sure we have one.  */
1632 	  if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1633 		 (xtarget, maxmode)))
1634 	    xtarget = gen_reg_rtx (maxmode);
1635 
1636 	  bitsize_rtx = GEN_INT (bitsize);
1637 	  bitpos_rtx = GEN_INT (xbitpos);
1638 
1639 	  pat = gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1640 	  if (pat)
1641 	    {
1642 	      emit_insn (pat);
1643 	      target = xtarget;
1644 	      spec_target = xspec_target;
1645 	      spec_target_subreg = xspec_target_subreg;
1646 	    }
1647 	  else
1648 	    {
1649 	      delete_insns_since (last);
1650 	      target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1651 						bitpos, target, 0);
1652 	    }
1653 	}
1654       else
1655       extv_loses:
1656 	target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1657 					  bitpos, target, 0);
1658     }
1659   if (target == spec_target)
1660     return target;
1661   if (target == spec_target_subreg)
1662     return spec_target;
1663   if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1664     {
1665       /* If the target mode is not a scalar integral, first convert to the
1666 	 integer mode of that size and then access it as a floating-point
1667 	 value via a SUBREG.  */
1668       if (!SCALAR_INT_MODE_P (tmode))
1669 	{
1670 	  enum machine_mode smode
1671 	    = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1672 	  target = convert_to_mode (smode, target, unsignedp);
1673 	  target = force_reg (smode, target);
1674 	  return gen_lowpart (tmode, target);
1675 	}
1676 
1677       return convert_to_mode (tmode, target, unsignedp);
1678     }
1679   return target;
1680 }
1681 
1682 /* Extract a bit field using shifts and boolean operations
1683    Returns an rtx to represent the value.
1684    OP0 addresses a register (word) or memory (byte).
1685    BITPOS says which bit within the word or byte the bit field starts in.
1686    OFFSET says how many bytes farther the bit field starts;
1687     it is 0 if OP0 is a register.
1688    BITSIZE says how many bits long the bit field is.
1689     (If OP0 is a register, it may be narrower than a full word,
1690      but BITPOS still counts within a full word,
1691      which is significant on bigendian machines.)
1692 
1693    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1694    If TARGET is nonzero, attempts to store the value there
1695    and return TARGET, but this is not guaranteed.
1696    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1697 
1698 static rtx
extract_fixed_bit_field(enum machine_mode tmode,rtx op0,unsigned HOST_WIDE_INT offset,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitpos,rtx target,int unsignedp)1699 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1700 			 unsigned HOST_WIDE_INT offset,
1701 			 unsigned HOST_WIDE_INT bitsize,
1702 			 unsigned HOST_WIDE_INT bitpos, rtx target,
1703 			 int unsignedp)
1704 {
1705   unsigned int total_bits = BITS_PER_WORD;
1706   enum machine_mode mode;
1707 
1708   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1709     {
1710       /* Special treatment for a bit field split across two registers.  */
1711       if (bitsize + bitpos > BITS_PER_WORD)
1712 	return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1713     }
1714   else
1715     {
1716       /* Get the proper mode to use for this field.  We want a mode that
1717 	 includes the entire field.  If such a mode would be larger than
1718 	 a word, we won't be doing the extraction the normal way.  */
1719 
1720       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1721 			    MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1722 
1723       if (mode == VOIDmode)
1724 	/* The only way this should occur is if the field spans word
1725 	   boundaries.  */
1726 	return extract_split_bit_field (op0, bitsize,
1727 					bitpos + offset * BITS_PER_UNIT,
1728 					unsignedp);
1729 
1730       total_bits = GET_MODE_BITSIZE (mode);
1731 
1732       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1733 	 be in the range 0 to total_bits-1, and put any excess bytes in
1734 	 OFFSET.  */
1735       if (bitpos >= total_bits)
1736 	{
1737 	  offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1738 	  bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1739 		     * BITS_PER_UNIT);
1740 	}
1741 
1742       /* Get ref to an aligned byte, halfword, or word containing the field.
1743 	 Adjust BITPOS to be position within a word,
1744 	 and OFFSET to be the offset of that word.
1745 	 Then alter OP0 to refer to that word.  */
1746       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1747       offset -= (offset % (total_bits / BITS_PER_UNIT));
1748       op0 = adjust_address (op0, mode, offset);
1749     }
1750 
1751   mode = GET_MODE (op0);
1752 
1753   if (BYTES_BIG_ENDIAN)
1754     /* BITPOS is the distance between our msb and that of OP0.
1755        Convert it to the distance from the lsb.  */
1756     bitpos = total_bits - bitsize - bitpos;
1757 
1758   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1759      We have reduced the big-endian case to the little-endian case.  */
1760 
1761   if (unsignedp)
1762     {
1763       if (bitpos)
1764 	{
1765 	  /* If the field does not already start at the lsb,
1766 	     shift it so it does.  */
1767 	  tree amount = build_int_cst (NULL_TREE, bitpos);
1768 	  /* Maybe propagate the target for the shift.  */
1769 	  /* But not if we will return it--could confuse integrate.c.  */
1770 	  rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1771 	  if (tmode != mode) subtarget = 0;
1772 	  op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1773 	}
1774       /* Convert the value to the desired mode.  */
1775       if (mode != tmode)
1776 	op0 = convert_to_mode (tmode, op0, 1);
1777 
1778       /* Unless the msb of the field used to be the msb when we shifted,
1779 	 mask out the upper bits.  */
1780 
1781       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1782 	return expand_binop (GET_MODE (op0), and_optab, op0,
1783 			     mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1784 			     target, 1, OPTAB_LIB_WIDEN);
1785       return op0;
1786     }
1787 
1788   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1789      then arithmetic-shift its lsb to the lsb of the word.  */
1790   op0 = force_reg (mode, op0);
1791   if (mode != tmode)
1792     target = 0;
1793 
1794   /* Find the narrowest integer mode that contains the field.  */
1795 
1796   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1797        mode = GET_MODE_WIDER_MODE (mode))
1798     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1799       {
1800 	op0 = convert_to_mode (mode, op0, 0);
1801 	break;
1802       }
1803 
1804   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1805     {
1806       tree amount
1807 	= build_int_cst (NULL_TREE,
1808 			 GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1809       /* Maybe propagate the target for the shift.  */
1810       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1811       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1812     }
1813 
1814   return expand_shift (RSHIFT_EXPR, mode, op0,
1815 		       build_int_cst (NULL_TREE,
1816 				      GET_MODE_BITSIZE (mode) - bitsize),
1817 		       target, 0);
1818 }
1819 
1820 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1821    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1822    complement of that if COMPLEMENT.  The mask is truncated if
1823    necessary to the width of mode MODE.  The mask is zero-extended if
1824    BITSIZE+BITPOS is too small for MODE.  */
1825 
1826 static rtx
mask_rtx(enum machine_mode mode,int bitpos,int bitsize,int complement)1827 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1828 {
1829   HOST_WIDE_INT masklow, maskhigh;
1830 
1831   if (bitsize == 0)
1832     masklow = 0;
1833   else if (bitpos < HOST_BITS_PER_WIDE_INT)
1834     masklow = (HOST_WIDE_INT) -1 << bitpos;
1835   else
1836     masklow = 0;
1837 
1838   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1839     masklow &= ((unsigned HOST_WIDE_INT) -1
1840 		>> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1841 
1842   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1843     maskhigh = -1;
1844   else
1845     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1846 
1847   if (bitsize == 0)
1848     maskhigh = 0;
1849   else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1850     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1851 		 >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1852   else
1853     maskhigh = 0;
1854 
1855   if (complement)
1856     {
1857       maskhigh = ~maskhigh;
1858       masklow = ~masklow;
1859     }
1860 
1861   return immed_double_const (masklow, maskhigh, mode);
1862 }
1863 
1864 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1865    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1866 
1867 static rtx
lshift_value(enum machine_mode mode,rtx value,int bitpos,int bitsize)1868 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1869 {
1870   unsigned HOST_WIDE_INT v = INTVAL (value);
1871   HOST_WIDE_INT low, high;
1872 
1873   if (bitsize < HOST_BITS_PER_WIDE_INT)
1874     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1875 
1876   if (bitpos < HOST_BITS_PER_WIDE_INT)
1877     {
1878       low = v << bitpos;
1879       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1880     }
1881   else
1882     {
1883       low = 0;
1884       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1885     }
1886 
1887   return immed_double_const (low, high, mode);
1888 }
1889 
1890 /* Extract a bit field from a memory by forcing the alignment of the
1891    memory.  This efficient only if the field spans at least 4 boundaries.
1892 
1893    OP0 is the MEM.
1894    BITSIZE is the field width; BITPOS is the position of the first bit.
1895    UNSIGNEDP is true if the result should be zero-extended.  */
1896 
1897 static rtx
extract_force_align_mem_bit_field(rtx op0,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitpos,int unsignedp)1898 extract_force_align_mem_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1899 				   unsigned HOST_WIDE_INT bitpos,
1900 				   int unsignedp)
1901 {
1902   enum machine_mode mode, dmode;
1903   unsigned int m_bitsize, m_size;
1904   unsigned int sign_shift_up, sign_shift_dn;
1905   rtx base, a1, a2, v1, v2, comb, shift, result, start;
1906 
1907   /* Choose a mode that will fit BITSIZE.  */
1908   mode = smallest_mode_for_size (bitsize, MODE_INT);
1909   m_size = GET_MODE_SIZE (mode);
1910   m_bitsize = GET_MODE_BITSIZE (mode);
1911 
1912   /* Choose a mode twice as wide.  Fail if no such mode exists.  */
1913   dmode = mode_for_size (m_bitsize * 2, MODE_INT, false);
1914   if (dmode == BLKmode)
1915     return NULL;
1916 
1917   do_pending_stack_adjust ();
1918   start = get_last_insn ();
1919 
1920   /* At the end, we'll need an additional shift to deal with sign/zero
1921      extension.  By default this will be a left+right shift of the
1922      appropriate size.  But we may be able to eliminate one of them.  */
1923   sign_shift_up = sign_shift_dn = m_bitsize - bitsize;
1924 
1925   if (STRICT_ALIGNMENT)
1926     {
1927       base = plus_constant (XEXP (op0, 0), bitpos / BITS_PER_UNIT);
1928       bitpos %= BITS_PER_UNIT;
1929 
1930       /* We load two values to be concatenate.  There's an edge condition
1931 	 that bears notice -- an aligned value at the end of a page can
1932 	 only load one value lest we segfault.  So the two values we load
1933 	 are at "base & -size" and "(base + size - 1) & -size".  If base
1934 	 is unaligned, the addresses will be aligned and sequential; if
1935 	 base is aligned, the addresses will both be equal to base.  */
1936 
1937       a1 = expand_simple_binop (Pmode, AND, force_operand (base, NULL),
1938 				GEN_INT (-(HOST_WIDE_INT)m_size),
1939 				NULL, true, OPTAB_LIB_WIDEN);
1940       mark_reg_pointer (a1, m_bitsize);
1941       v1 = gen_rtx_MEM (mode, a1);
1942       set_mem_align (v1, m_bitsize);
1943       v1 = force_reg (mode, validize_mem (v1));
1944 
1945       a2 = plus_constant (base, GET_MODE_SIZE (mode) - 1);
1946       a2 = expand_simple_binop (Pmode, AND, force_operand (a2, NULL),
1947 				GEN_INT (-(HOST_WIDE_INT)m_size),
1948 				NULL, true, OPTAB_LIB_WIDEN);
1949       v2 = gen_rtx_MEM (mode, a2);
1950       set_mem_align (v2, m_bitsize);
1951       v2 = force_reg (mode, validize_mem (v2));
1952 
1953       /* Combine these two values into a double-word value.  */
1954       if (m_bitsize == BITS_PER_WORD)
1955 	{
1956 	  comb = gen_reg_rtx (dmode);
1957 	  emit_insn (gen_rtx_CLOBBER (VOIDmode, comb));
1958 	  emit_move_insn (gen_rtx_SUBREG (mode, comb, 0), v1);
1959 	  emit_move_insn (gen_rtx_SUBREG (mode, comb, m_size), v2);
1960 	}
1961       else
1962 	{
1963 	  if (BYTES_BIG_ENDIAN)
1964 	    comb = v1, v1 = v2, v2 = comb;
1965 	  v1 = convert_modes (dmode, mode, v1, true);
1966 	  if (v1 == NULL)
1967 	    goto fail;
1968 	  v2 = convert_modes (dmode, mode, v2, true);
1969 	  v2 = expand_simple_binop (dmode, ASHIFT, v2, GEN_INT (m_bitsize),
1970 				    NULL, true, OPTAB_LIB_WIDEN);
1971 	  if (v2 == NULL)
1972 	    goto fail;
1973 	  comb = expand_simple_binop (dmode, IOR, v1, v2, NULL,
1974 				      true, OPTAB_LIB_WIDEN);
1975 	  if (comb == NULL)
1976 	    goto fail;
1977 	}
1978 
1979       shift = expand_simple_binop (Pmode, AND, base, GEN_INT (m_size - 1),
1980 				   NULL, true, OPTAB_LIB_WIDEN);
1981       shift = expand_mult (Pmode, shift, GEN_INT (BITS_PER_UNIT), NULL, 1);
1982 
1983       if (bitpos != 0)
1984 	{
1985 	  if (sign_shift_up <= bitpos)
1986 	    bitpos -= sign_shift_up, sign_shift_up = 0;
1987 	  shift = expand_simple_binop (Pmode, PLUS, shift, GEN_INT (bitpos),
1988 				       NULL, true, OPTAB_LIB_WIDEN);
1989 	}
1990     }
1991   else
1992     {
1993       unsigned HOST_WIDE_INT offset = bitpos / BITS_PER_UNIT;
1994       bitpos %= BITS_PER_UNIT;
1995 
1996       /* When strict alignment is not required, we can just load directly
1997 	 from memory without masking.  If the remaining BITPOS offset is
1998 	 small enough, we may be able to do all operations in MODE as
1999 	 opposed to DMODE.  */
2000       if (bitpos + bitsize <= m_bitsize)
2001 	dmode = mode;
2002       comb = adjust_address (op0, dmode, offset);
2003 
2004       if (sign_shift_up <= bitpos)
2005 	bitpos -= sign_shift_up, sign_shift_up = 0;
2006       shift = GEN_INT (bitpos);
2007     }
2008 
2009   /* Shift down the double-word such that the requested value is at bit 0.  */
2010   if (shift != const0_rtx)
2011     comb = expand_simple_binop (dmode, unsignedp ? LSHIFTRT : ASHIFTRT,
2012 				comb, shift, NULL, unsignedp, OPTAB_LIB_WIDEN);
2013   if (comb == NULL)
2014     goto fail;
2015 
2016   /* If the field exactly matches MODE, then all we need to do is return the
2017      lowpart.  Otherwise, shift to get the sign bits set properly.  */
2018   result = force_reg (mode, gen_lowpart (mode, comb));
2019 
2020   if (sign_shift_up)
2021     result = expand_simple_binop (mode, ASHIFT, result,
2022 				  GEN_INT (sign_shift_up),
2023 				  NULL_RTX, 0, OPTAB_LIB_WIDEN);
2024   if (sign_shift_dn)
2025     result = expand_simple_binop (mode, unsignedp ? LSHIFTRT : ASHIFTRT,
2026 				  result, GEN_INT (sign_shift_dn),
2027 				  NULL_RTX, 0, OPTAB_LIB_WIDEN);
2028 
2029   return result;
2030 
2031  fail:
2032   delete_insns_since (start);
2033   return NULL;
2034 }
2035 
2036 /* Extract a bit field that is split across two words
2037    and return an RTX for the result.
2038 
2039    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2040    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2041    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
2042 
2043 static rtx
extract_split_bit_field(rtx op0,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitpos,int unsignedp)2044 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2045 			 unsigned HOST_WIDE_INT bitpos, int unsignedp)
2046 {
2047   unsigned int unit;
2048   unsigned int bitsdone = 0;
2049   rtx result = NULL_RTX;
2050   int first = 1;
2051 
2052   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2053      much at a time.  */
2054   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2055     unit = BITS_PER_WORD;
2056   else
2057     {
2058       unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2059       if (0 && bitsize / unit > 2)
2060 	{
2061 	  rtx tmp = extract_force_align_mem_bit_field (op0, bitsize, bitpos,
2062 						       unsignedp);
2063 	  if (tmp)
2064 	    return tmp;
2065 	}
2066     }
2067 
2068   while (bitsdone < bitsize)
2069     {
2070       unsigned HOST_WIDE_INT thissize;
2071       rtx part, word;
2072       unsigned HOST_WIDE_INT thispos;
2073       unsigned HOST_WIDE_INT offset;
2074 
2075       offset = (bitpos + bitsdone) / unit;
2076       thispos = (bitpos + bitsdone) % unit;
2077 
2078       /* THISSIZE must not overrun a word boundary.  Otherwise,
2079 	 extract_fixed_bit_field will call us again, and we will mutually
2080 	 recurse forever.  */
2081       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2082       thissize = MIN (thissize, unit - thispos);
2083 
2084       /* If OP0 is a register, then handle OFFSET here.
2085 
2086 	 When handling multiword bitfields, extract_bit_field may pass
2087 	 down a word_mode SUBREG of a larger REG for a bitfield that actually
2088 	 crosses a word boundary.  Thus, for a SUBREG, we must find
2089 	 the current word starting from the base register.  */
2090       if (GET_CODE (op0) == SUBREG)
2091 	{
2092 	  int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
2093 	  word = operand_subword_force (SUBREG_REG (op0), word_offset,
2094 					GET_MODE (SUBREG_REG (op0)));
2095 	  offset = 0;
2096 	}
2097       else if (REG_P (op0))
2098 	{
2099 	  word = operand_subword_force (op0, offset, GET_MODE (op0));
2100 	  offset = 0;
2101 	}
2102       else
2103 	word = op0;
2104 
2105       /* Extract the parts in bit-counting order,
2106 	 whose meaning is determined by BYTES_PER_UNIT.
2107 	 OFFSET is in UNITs, and UNIT is in bits.
2108 	 extract_fixed_bit_field wants offset in bytes.  */
2109       part = extract_fixed_bit_field (word_mode, word,
2110 				      offset * unit / BITS_PER_UNIT,
2111 				      thissize, thispos, 0, 1);
2112       bitsdone += thissize;
2113 
2114       /* Shift this part into place for the result.  */
2115       if (BYTES_BIG_ENDIAN)
2116 	{
2117 	  if (bitsize != bitsdone)
2118 	    part = expand_shift (LSHIFT_EXPR, word_mode, part,
2119 				 build_int_cst (NULL_TREE, bitsize - bitsdone),
2120 				 0, 1);
2121 	}
2122       else
2123 	{
2124 	  if (bitsdone != thissize)
2125 	    part = expand_shift (LSHIFT_EXPR, word_mode, part,
2126 				 build_int_cst (NULL_TREE,
2127 						bitsdone - thissize), 0, 1);
2128 	}
2129 
2130       if (first)
2131 	result = part;
2132       else
2133 	/* Combine the parts with bitwise or.  This works
2134 	   because we extracted each part as an unsigned bit field.  */
2135 	result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2136 			       OPTAB_LIB_WIDEN);
2137 
2138       first = 0;
2139     }
2140 
2141   /* Unsigned bit field: we are done.  */
2142   if (unsignedp)
2143     return result;
2144   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2145   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2146 			 build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2147 			 NULL_RTX, 0);
2148   return expand_shift (RSHIFT_EXPR, word_mode, result,
2149 		       build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2150 		       NULL_RTX, 0);
2151 }
2152 
2153 /* Add INC into TARGET.  */
2154 
2155 void
expand_inc(rtx target,rtx inc)2156 expand_inc (rtx target, rtx inc)
2157 {
2158   rtx value = expand_binop (GET_MODE (target), add_optab,
2159 			    target, inc,
2160 			    target, 0, OPTAB_LIB_WIDEN);
2161   if (value != target)
2162     emit_move_insn (target, value);
2163 }
2164 
2165 /* Subtract DEC from TARGET.  */
2166 
2167 void
expand_dec(rtx target,rtx dec)2168 expand_dec (rtx target, rtx dec)
2169 {
2170   rtx value = expand_binop (GET_MODE (target), sub_optab,
2171 			    target, dec,
2172 			    target, 0, OPTAB_LIB_WIDEN);
2173   if (value != target)
2174     emit_move_insn (target, value);
2175 }
2176 
2177 /* Output a shift instruction for expression code CODE,
2178    with SHIFTED being the rtx for the value to shift,
2179    and AMOUNT the tree for the amount to shift by.
2180    Store the result in the rtx TARGET, if that is convenient.
2181    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2182    Return the rtx for where the value is.  */
2183 
2184 rtx
expand_shift(enum tree_code code,enum machine_mode mode,rtx shifted,tree amount,rtx target,int unsignedp)2185 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2186 	      tree amount, rtx target, int unsignedp)
2187 {
2188   rtx op1, temp = 0;
2189   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2190   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2191   int try;
2192 
2193   /* Previously detected shift-counts computed by NEGATE_EXPR
2194      and shifted in the other direction; but that does not work
2195      on all machines.  */
2196 
2197   op1 = expand_normal (amount);
2198 
2199   if (SHIFT_COUNT_TRUNCATED)
2200     {
2201       if (GET_CODE (op1) == CONST_INT
2202 	  && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2203 	      (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2204 	op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2205 		       % GET_MODE_BITSIZE (mode));
2206       else if (GET_CODE (op1) == SUBREG
2207 	       && subreg_lowpart_p (op1))
2208 	op1 = SUBREG_REG (op1);
2209     }
2210 
2211   if (op1 == const0_rtx)
2212     return shifted;
2213 
2214   /* Check whether its cheaper to implement a left shift by a constant
2215      bit count by a sequence of additions.  */
2216   if (code == LSHIFT_EXPR
2217       && GET_CODE (op1) == CONST_INT
2218       && INTVAL (op1) > 0
2219       && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2220       && INTVAL (op1) < MAX_BITS_PER_WORD
2221       && shift_cost[mode][INTVAL (op1)] > INTVAL (op1) * add_cost[mode]
2222       && shift_cost[mode][INTVAL (op1)] != MAX_COST)
2223     {
2224       int i;
2225       for (i = 0; i < INTVAL (op1); i++)
2226 	{
2227 	  temp = force_reg (mode, shifted);
2228 	  shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2229 				  unsignedp, OPTAB_LIB_WIDEN);
2230 	}
2231       return shifted;
2232     }
2233 
2234   for (try = 0; temp == 0 && try < 3; try++)
2235     {
2236       enum optab_methods methods;
2237 
2238       if (try == 0)
2239 	methods = OPTAB_DIRECT;
2240       else if (try == 1)
2241 	methods = OPTAB_WIDEN;
2242       else
2243 	methods = OPTAB_LIB_WIDEN;
2244 
2245       if (rotate)
2246 	{
2247 	  /* Widening does not work for rotation.  */
2248 	  if (methods == OPTAB_WIDEN)
2249 	    continue;
2250 	  else if (methods == OPTAB_LIB_WIDEN)
2251 	    {
2252 	      /* If we have been unable to open-code this by a rotation,
2253 		 do it as the IOR of two shifts.  I.e., to rotate A
2254 		 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2255 		 where C is the bitsize of A.
2256 
2257 		 It is theoretically possible that the target machine might
2258 		 not be able to perform either shift and hence we would
2259 		 be making two libcalls rather than just the one for the
2260 		 shift (similarly if IOR could not be done).  We will allow
2261 		 this extremely unlikely lossage to avoid complicating the
2262 		 code below.  */
2263 
2264 	      rtx subtarget = target == shifted ? 0 : target;
2265 	      tree new_amount, other_amount;
2266 	      rtx temp1;
2267 	      tree type = TREE_TYPE (amount);
2268 	      if (GET_MODE (op1) != TYPE_MODE (type)
2269 		  && GET_MODE (op1) != VOIDmode)
2270 		op1 = convert_to_mode (TYPE_MODE (type), op1, 1);
2271 	      new_amount = make_tree (type, op1);
2272 	      other_amount
2273 		= fold_build2 (MINUS_EXPR, type,
2274 			       build_int_cst (type, GET_MODE_BITSIZE (mode)),
2275 			       new_amount);
2276 
2277 	      shifted = force_reg (mode, shifted);
2278 
2279 	      temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2280 				   mode, shifted, new_amount, 0, 1);
2281 	      temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2282 				    mode, shifted, other_amount, subtarget, 1);
2283 	      return expand_binop (mode, ior_optab, temp, temp1, target,
2284 				   unsignedp, methods);
2285 	    }
2286 
2287 	  temp = expand_binop (mode,
2288 			       left ? rotl_optab : rotr_optab,
2289 			       shifted, op1, target, unsignedp, methods);
2290 	}
2291       else if (unsignedp)
2292 	temp = expand_binop (mode,
2293 			     left ? ashl_optab : lshr_optab,
2294 			     shifted, op1, target, unsignedp, methods);
2295 
2296       /* Do arithmetic shifts.
2297 	 Also, if we are going to widen the operand, we can just as well
2298 	 use an arithmetic right-shift instead of a logical one.  */
2299       if (temp == 0 && ! rotate
2300 	  && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2301 	{
2302 	  enum optab_methods methods1 = methods;
2303 
2304 	  /* If trying to widen a log shift to an arithmetic shift,
2305 	     don't accept an arithmetic shift of the same size.  */
2306 	  if (unsignedp)
2307 	    methods1 = OPTAB_MUST_WIDEN;
2308 
2309 	  /* Arithmetic shift */
2310 
2311 	  temp = expand_binop (mode,
2312 			       left ? ashl_optab : ashr_optab,
2313 			       shifted, op1, target, unsignedp, methods1);
2314 	}
2315 
2316       /* We used to try extzv here for logical right shifts, but that was
2317 	 only useful for one machine, the VAX, and caused poor code
2318 	 generation there for lshrdi3, so the code was deleted and a
2319 	 define_expand for lshrsi3 was added to vax.md.  */
2320     }
2321 
2322   gcc_assert (temp);
2323   return temp;
2324 }
2325 
2326 enum alg_code {
2327   alg_unknown,
2328   alg_zero,
2329   alg_m, alg_shift,
2330   alg_add_t_m2,
2331   alg_sub_t_m2,
2332   alg_add_factor,
2333   alg_sub_factor,
2334   alg_add_t2_m,
2335   alg_sub_t2_m,
2336   alg_impossible
2337 };
2338 
2339 /* This structure holds the "cost" of a multiply sequence.  The
2340    "cost" field holds the total rtx_cost of every operator in the
2341    synthetic multiplication sequence, hence cost(a op b) is defined
2342    as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
2343    The "latency" field holds the minimum possible latency of the
2344    synthetic multiply, on a hypothetical infinitely parallel CPU.
2345    This is the critical path, or the maximum height, of the expression
2346    tree which is the sum of rtx_costs on the most expensive path from
2347    any leaf to the root.  Hence latency(a op b) is defined as zero for
2348    leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise.  */
2349 
2350 struct mult_cost {
2351   short cost;     /* Total rtx_cost of the multiplication sequence.  */
2352   short latency;  /* The latency of the multiplication sequence.  */
2353 };
2354 
2355 /* This macro is used to compare a pointer to a mult_cost against an
2356    single integer "rtx_cost" value.  This is equivalent to the macro
2357    CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}.  */
2358 #define MULT_COST_LESS(X,Y) ((X)->cost < (Y)	\
2359 			     || ((X)->cost == (Y) && (X)->latency < (Y)))
2360 
2361 /* This macro is used to compare two pointers to mult_costs against
2362    each other.  The macro returns true if X is cheaper than Y.
2363    Currently, the cheaper of two mult_costs is the one with the
2364    lower "cost".  If "cost"s are tied, the lower latency is cheaper.  */
2365 #define CHEAPER_MULT_COST(X,Y)  ((X)->cost < (Y)->cost		\
2366 				 || ((X)->cost == (Y)->cost	\
2367 				     && (X)->latency < (Y)->latency))
2368 
2369 /* This structure records a sequence of operations.
2370    `ops' is the number of operations recorded.
2371    `cost' is their total cost.
2372    The operations are stored in `op' and the corresponding
2373    logarithms of the integer coefficients in `log'.
2374 
2375    These are the operations:
2376    alg_zero		total := 0;
2377    alg_m		total := multiplicand;
2378    alg_shift		total := total * coeff
2379    alg_add_t_m2		total := total + multiplicand * coeff;
2380    alg_sub_t_m2		total := total - multiplicand * coeff;
2381    alg_add_factor	total := total * coeff + total;
2382    alg_sub_factor	total := total * coeff - total;
2383    alg_add_t2_m		total := total * coeff + multiplicand;
2384    alg_sub_t2_m		total := total * coeff - multiplicand;
2385 
2386    The first operand must be either alg_zero or alg_m.  */
2387 
2388 struct algorithm
2389 {
2390   struct mult_cost cost;
2391   short ops;
2392   /* The size of the OP and LOG fields are not directly related to the
2393      word size, but the worst-case algorithms will be if we have few
2394      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2395      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2396      in total wordsize operations.  */
2397   enum alg_code op[MAX_BITS_PER_WORD];
2398   char log[MAX_BITS_PER_WORD];
2399 };
2400 
2401 /* The entry for our multiplication cache/hash table.  */
2402 struct alg_hash_entry {
2403   /* The number we are multiplying by.  */
2404   unsigned HOST_WIDE_INT t;
2405 
2406   /* The mode in which we are multiplying something by T.  */
2407   enum machine_mode mode;
2408 
2409   /* The best multiplication algorithm for t.  */
2410   enum alg_code alg;
2411 
2412   /* The cost of multiplication if ALG_CODE is not alg_impossible.
2413      Otherwise, the cost within which multiplication by T is
2414      impossible.  */
2415   struct mult_cost cost;
2416 };
2417 
2418 /* The number of cache/hash entries.  */
2419 #if HOST_BITS_PER_WIDE_INT == 64
2420 #define NUM_ALG_HASH_ENTRIES 1031
2421 #else
2422 #define NUM_ALG_HASH_ENTRIES 307
2423 #endif
2424 
2425 /* Each entry of ALG_HASH caches alg_code for some integer.  This is
2426    actually a hash table.  If we have a collision, that the older
2427    entry is kicked out.  */
2428 static struct alg_hash_entry alg_hash[NUM_ALG_HASH_ENTRIES];
2429 
2430 /* Indicates the type of fixup needed after a constant multiplication.
2431    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2432    the result should be negated, and ADD_VARIANT means that the
2433    multiplicand should be added to the result.  */
2434 enum mult_variant {basic_variant, negate_variant, add_variant};
2435 
2436 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2437 			const struct mult_cost *, enum machine_mode mode);
2438 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2439 				 struct algorithm *, enum mult_variant *, int);
2440 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2441 			      const struct algorithm *, enum mult_variant);
2442 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2443 						 int, rtx *, int *, int *);
2444 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2445 static rtx extract_high_half (enum machine_mode, rtx);
2446 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2447 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2448 				       int, int);
2449 /* Compute and return the best algorithm for multiplying by T.
2450    The algorithm must cost less than cost_limit
2451    If retval.cost >= COST_LIMIT, no algorithm was found and all
2452    other field of the returned struct are undefined.
2453    MODE is the machine mode of the multiplication.  */
2454 
2455 static void
synth_mult(struct algorithm * alg_out,unsigned HOST_WIDE_INT t,const struct mult_cost * cost_limit,enum machine_mode mode)2456 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2457 	    const struct mult_cost *cost_limit, enum machine_mode mode)
2458 {
2459   int m;
2460   struct algorithm *alg_in, *best_alg;
2461   struct mult_cost best_cost;
2462   struct mult_cost new_limit;
2463   int op_cost, op_latency;
2464   unsigned HOST_WIDE_INT q;
2465   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2466   int hash_index;
2467   bool cache_hit = false;
2468   enum alg_code cache_alg = alg_zero;
2469 
2470   /* Indicate that no algorithm is yet found.  If no algorithm
2471      is found, this value will be returned and indicate failure.  */
2472   alg_out->cost.cost = cost_limit->cost + 1;
2473   alg_out->cost.latency = cost_limit->latency + 1;
2474 
2475   if (cost_limit->cost < 0
2476       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2477     return;
2478 
2479   /* Restrict the bits of "t" to the multiplication's mode.  */
2480   t &= GET_MODE_MASK (mode);
2481 
2482   /* t == 1 can be done in zero cost.  */
2483   if (t == 1)
2484     {
2485       alg_out->ops = 1;
2486       alg_out->cost.cost = 0;
2487       alg_out->cost.latency = 0;
2488       alg_out->op[0] = alg_m;
2489       return;
2490     }
2491 
2492   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2493      fail now.  */
2494   if (t == 0)
2495     {
2496       if (MULT_COST_LESS (cost_limit, zero_cost))
2497 	return;
2498       else
2499 	{
2500 	  alg_out->ops = 1;
2501 	  alg_out->cost.cost = zero_cost;
2502 	  alg_out->cost.latency = zero_cost;
2503 	  alg_out->op[0] = alg_zero;
2504 	  return;
2505 	}
2506     }
2507 
2508   /* We'll be needing a couple extra algorithm structures now.  */
2509 
2510   alg_in = alloca (sizeof (struct algorithm));
2511   best_alg = alloca (sizeof (struct algorithm));
2512   best_cost = *cost_limit;
2513 
2514   /* Compute the hash index.  */
2515   hash_index = (t ^ (unsigned int) mode) % NUM_ALG_HASH_ENTRIES;
2516 
2517   /* See if we already know what to do for T.  */
2518   if (alg_hash[hash_index].t == t
2519       && alg_hash[hash_index].mode == mode
2520       && alg_hash[hash_index].alg != alg_unknown)
2521     {
2522       cache_alg = alg_hash[hash_index].alg;
2523 
2524       if (cache_alg == alg_impossible)
2525 	{
2526 	  /* The cache tells us that it's impossible to synthesize
2527 	     multiplication by T within alg_hash[hash_index].cost.  */
2528 	  if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2529 	    /* COST_LIMIT is at least as restrictive as the one
2530 	       recorded in the hash table, in which case we have no
2531 	       hope of synthesizing a multiplication.  Just
2532 	       return.  */
2533 	    return;
2534 
2535 	  /* If we get here, COST_LIMIT is less restrictive than the
2536 	     one recorded in the hash table, so we may be able to
2537 	     synthesize a multiplication.  Proceed as if we didn't
2538 	     have the cache entry.  */
2539 	}
2540       else
2541 	{
2542 	  if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2543 	    /* The cached algorithm shows that this multiplication
2544 	       requires more cost than COST_LIMIT.  Just return.  This
2545 	       way, we don't clobber this cache entry with
2546 	       alg_impossible but retain useful information.  */
2547 	    return;
2548 
2549 	  cache_hit = true;
2550 
2551 	  switch (cache_alg)
2552 	    {
2553 	    case alg_shift:
2554 	      goto do_alg_shift;
2555 
2556 	    case alg_add_t_m2:
2557 	    case alg_sub_t_m2:
2558 	      goto do_alg_addsub_t_m2;
2559 
2560 	    case alg_add_factor:
2561 	    case alg_sub_factor:
2562 	      goto do_alg_addsub_factor;
2563 
2564 	    case alg_add_t2_m:
2565 	      goto do_alg_add_t2_m;
2566 
2567 	    case alg_sub_t2_m:
2568 	      goto do_alg_sub_t2_m;
2569 
2570 	    default:
2571 	      gcc_unreachable ();
2572 	    }
2573 	}
2574     }
2575 
2576   /* If we have a group of zero bits at the low-order part of T, try
2577      multiplying by the remaining bits and then doing a shift.  */
2578 
2579   if ((t & 1) == 0)
2580     {
2581     do_alg_shift:
2582       m = floor_log2 (t & -t);	/* m = number of low zero bits */
2583       if (m < maxm)
2584 	{
2585 	  q = t >> m;
2586 	  /* The function expand_shift will choose between a shift and
2587 	     a sequence of additions, so the observed cost is given as
2588 	     MIN (m * add_cost[mode], shift_cost[mode][m]).  */
2589 	  op_cost = m * add_cost[mode];
2590 	  if (shift_cost[mode][m] < op_cost)
2591 	    op_cost = shift_cost[mode][m];
2592 	  new_limit.cost = best_cost.cost - op_cost;
2593 	  new_limit.latency = best_cost.latency - op_cost;
2594 	  synth_mult (alg_in, q, &new_limit, mode);
2595 
2596 	  alg_in->cost.cost += op_cost;
2597 	  alg_in->cost.latency += op_cost;
2598 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2599 	    {
2600 	      struct algorithm *x;
2601 	      best_cost = alg_in->cost;
2602 	      x = alg_in, alg_in = best_alg, best_alg = x;
2603 	      best_alg->log[best_alg->ops] = m;
2604 	      best_alg->op[best_alg->ops] = alg_shift;
2605 	    }
2606 	}
2607       if (cache_hit)
2608 	goto done;
2609     }
2610 
2611   /* If we have an odd number, add or subtract one.  */
2612   if ((t & 1) != 0)
2613     {
2614       unsigned HOST_WIDE_INT w;
2615 
2616     do_alg_addsub_t_m2:
2617       for (w = 1; (w & t) != 0; w <<= 1)
2618 	;
2619       /* If T was -1, then W will be zero after the loop.  This is another
2620 	 case where T ends with ...111.  Handling this with (T + 1) and
2621 	 subtract 1 produces slightly better code and results in algorithm
2622 	 selection much faster than treating it like the ...0111 case
2623 	 below.  */
2624       if (w == 0
2625 	  || (w > 2
2626 	      /* Reject the case where t is 3.
2627 		 Thus we prefer addition in that case.  */
2628 	      && t != 3))
2629 	{
2630 	  /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2631 
2632 	  op_cost = add_cost[mode];
2633 	  new_limit.cost = best_cost.cost - op_cost;
2634 	  new_limit.latency = best_cost.latency - op_cost;
2635 	  synth_mult (alg_in, t + 1, &new_limit, mode);
2636 
2637 	  alg_in->cost.cost += op_cost;
2638 	  alg_in->cost.latency += op_cost;
2639 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2640 	    {
2641 	      struct algorithm *x;
2642 	      best_cost = alg_in->cost;
2643 	      x = alg_in, alg_in = best_alg, best_alg = x;
2644 	      best_alg->log[best_alg->ops] = 0;
2645 	      best_alg->op[best_alg->ops] = alg_sub_t_m2;
2646 	    }
2647 	}
2648       else
2649 	{
2650 	  /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2651 
2652 	  op_cost = add_cost[mode];
2653 	  new_limit.cost = best_cost.cost - op_cost;
2654 	  new_limit.latency = best_cost.latency - op_cost;
2655 	  synth_mult (alg_in, t - 1, &new_limit, mode);
2656 
2657 	  alg_in->cost.cost += op_cost;
2658 	  alg_in->cost.latency += op_cost;
2659 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2660 	    {
2661 	      struct algorithm *x;
2662 	      best_cost = alg_in->cost;
2663 	      x = alg_in, alg_in = best_alg, best_alg = x;
2664 	      best_alg->log[best_alg->ops] = 0;
2665 	      best_alg->op[best_alg->ops] = alg_add_t_m2;
2666 	    }
2667 	}
2668       if (cache_hit)
2669 	goto done;
2670     }
2671 
2672   /* Look for factors of t of the form
2673      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2674      If we find such a factor, we can multiply by t using an algorithm that
2675      multiplies by q, shift the result by m and add/subtract it to itself.
2676 
2677      We search for large factors first and loop down, even if large factors
2678      are less probable than small; if we find a large factor we will find a
2679      good sequence quickly, and therefore be able to prune (by decreasing
2680      COST_LIMIT) the search.  */
2681 
2682  do_alg_addsub_factor:
2683   for (m = floor_log2 (t - 1); m >= 2; m--)
2684     {
2685       unsigned HOST_WIDE_INT d;
2686 
2687       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2688       if (t % d == 0 && t > d && m < maxm
2689 	  && (!cache_hit || cache_alg == alg_add_factor))
2690 	{
2691 	  /* If the target has a cheap shift-and-add instruction use
2692 	     that in preference to a shift insn followed by an add insn.
2693 	     Assume that the shift-and-add is "atomic" with a latency
2694 	     equal to its cost, otherwise assume that on superscalar
2695 	     hardware the shift may be executed concurrently with the
2696 	     earlier steps in the algorithm.  */
2697 	  op_cost = add_cost[mode] + shift_cost[mode][m];
2698 	  if (shiftadd_cost[mode][m] < op_cost)
2699 	    {
2700 	      op_cost = shiftadd_cost[mode][m];
2701 	      op_latency = op_cost;
2702 	    }
2703 	  else
2704 	    op_latency = add_cost[mode];
2705 
2706 	  new_limit.cost = best_cost.cost - op_cost;
2707 	  new_limit.latency = best_cost.latency - op_latency;
2708 	  synth_mult (alg_in, t / d, &new_limit, mode);
2709 
2710 	  alg_in->cost.cost += op_cost;
2711 	  alg_in->cost.latency += op_latency;
2712 	  if (alg_in->cost.latency < op_cost)
2713 	    alg_in->cost.latency = op_cost;
2714 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2715 	    {
2716 	      struct algorithm *x;
2717 	      best_cost = alg_in->cost;
2718 	      x = alg_in, alg_in = best_alg, best_alg = x;
2719 	      best_alg->log[best_alg->ops] = m;
2720 	      best_alg->op[best_alg->ops] = alg_add_factor;
2721 	    }
2722 	  /* Other factors will have been taken care of in the recursion.  */
2723 	  break;
2724 	}
2725 
2726       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2727       if (t % d == 0 && t > d && m < maxm
2728 	  && (!cache_hit || cache_alg == alg_sub_factor))
2729 	{
2730 	  /* If the target has a cheap shift-and-subtract insn use
2731 	     that in preference to a shift insn followed by a sub insn.
2732 	     Assume that the shift-and-sub is "atomic" with a latency
2733 	     equal to it's cost, otherwise assume that on superscalar
2734 	     hardware the shift may be executed concurrently with the
2735 	     earlier steps in the algorithm.  */
2736 	  op_cost = add_cost[mode] + shift_cost[mode][m];
2737 	  if (shiftsub_cost[mode][m] < op_cost)
2738 	    {
2739 	      op_cost = shiftsub_cost[mode][m];
2740 	      op_latency = op_cost;
2741 	    }
2742 	  else
2743 	    op_latency = add_cost[mode];
2744 
2745 	  new_limit.cost = best_cost.cost - op_cost;
2746 	  new_limit.latency = best_cost.latency - op_latency;
2747 	  synth_mult (alg_in, t / d, &new_limit, mode);
2748 
2749 	  alg_in->cost.cost += op_cost;
2750 	  alg_in->cost.latency += op_latency;
2751 	  if (alg_in->cost.latency < op_cost)
2752 	    alg_in->cost.latency = op_cost;
2753 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2754 	    {
2755 	      struct algorithm *x;
2756 	      best_cost = alg_in->cost;
2757 	      x = alg_in, alg_in = best_alg, best_alg = x;
2758 	      best_alg->log[best_alg->ops] = m;
2759 	      best_alg->op[best_alg->ops] = alg_sub_factor;
2760 	    }
2761 	  break;
2762 	}
2763     }
2764   if (cache_hit)
2765     goto done;
2766 
2767   /* Try shift-and-add (load effective address) instructions,
2768      i.e. do a*3, a*5, a*9.  */
2769   if ((t & 1) != 0)
2770     {
2771     do_alg_add_t2_m:
2772       q = t - 1;
2773       q = q & -q;
2774       m = exact_log2 (q);
2775       if (m >= 0 && m < maxm)
2776 	{
2777 	  op_cost = shiftadd_cost[mode][m];
2778 	  new_limit.cost = best_cost.cost - op_cost;
2779 	  new_limit.latency = best_cost.latency - op_cost;
2780 	  synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2781 
2782 	  alg_in->cost.cost += op_cost;
2783 	  alg_in->cost.latency += op_cost;
2784 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2785 	    {
2786 	      struct algorithm *x;
2787 	      best_cost = alg_in->cost;
2788 	      x = alg_in, alg_in = best_alg, best_alg = x;
2789 	      best_alg->log[best_alg->ops] = m;
2790 	      best_alg->op[best_alg->ops] = alg_add_t2_m;
2791 	    }
2792 	}
2793       if (cache_hit)
2794 	goto done;
2795 
2796     do_alg_sub_t2_m:
2797       q = t + 1;
2798       q = q & -q;
2799       m = exact_log2 (q);
2800       if (m >= 0 && m < maxm)
2801 	{
2802 	  op_cost = shiftsub_cost[mode][m];
2803 	  new_limit.cost = best_cost.cost - op_cost;
2804 	  new_limit.latency = best_cost.latency - op_cost;
2805 	  synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2806 
2807 	  alg_in->cost.cost += op_cost;
2808 	  alg_in->cost.latency += op_cost;
2809 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2810 	    {
2811 	      struct algorithm *x;
2812 	      best_cost = alg_in->cost;
2813 	      x = alg_in, alg_in = best_alg, best_alg = x;
2814 	      best_alg->log[best_alg->ops] = m;
2815 	      best_alg->op[best_alg->ops] = alg_sub_t2_m;
2816 	    }
2817 	}
2818       if (cache_hit)
2819 	goto done;
2820     }
2821 
2822  done:
2823   /* If best_cost has not decreased, we have not found any algorithm.  */
2824   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2825     {
2826       /* We failed to find an algorithm.  Record alg_impossible for
2827 	 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2828 	 we are asked to find an algorithm for T within the same or
2829 	 lower COST_LIMIT, we can immediately return to the
2830 	 caller.  */
2831       alg_hash[hash_index].t = t;
2832       alg_hash[hash_index].mode = mode;
2833       alg_hash[hash_index].alg = alg_impossible;
2834       alg_hash[hash_index].cost = *cost_limit;
2835       return;
2836     }
2837 
2838   /* Cache the result.  */
2839   if (!cache_hit)
2840     {
2841       alg_hash[hash_index].t = t;
2842       alg_hash[hash_index].mode = mode;
2843       alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2844       alg_hash[hash_index].cost.cost = best_cost.cost;
2845       alg_hash[hash_index].cost.latency = best_cost.latency;
2846     }
2847 
2848   /* If we are getting a too long sequence for `struct algorithm'
2849      to record, make this search fail.  */
2850   if (best_alg->ops == MAX_BITS_PER_WORD)
2851     return;
2852 
2853   /* Copy the algorithm from temporary space to the space at alg_out.
2854      We avoid using structure assignment because the majority of
2855      best_alg is normally undefined, and this is a critical function.  */
2856   alg_out->ops = best_alg->ops + 1;
2857   alg_out->cost = best_cost;
2858   memcpy (alg_out->op, best_alg->op,
2859 	  alg_out->ops * sizeof *alg_out->op);
2860   memcpy (alg_out->log, best_alg->log,
2861 	  alg_out->ops * sizeof *alg_out->log);
2862 }
2863 
2864 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2865    Try three variations:
2866 
2867        - a shift/add sequence based on VAL itself
2868        - a shift/add sequence based on -VAL, followed by a negation
2869        - a shift/add sequence based on VAL - 1, followed by an addition.
2870 
2871    Return true if the cheapest of these cost less than MULT_COST,
2872    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2873 
2874 static bool
choose_mult_variant(enum machine_mode mode,HOST_WIDE_INT val,struct algorithm * alg,enum mult_variant * variant,int mult_cost)2875 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2876 		     struct algorithm *alg, enum mult_variant *variant,
2877 		     int mult_cost)
2878 {
2879   struct algorithm alg2;
2880   struct mult_cost limit;
2881   int op_cost;
2882 
2883   /* Fail quickly for impossible bounds.  */
2884   if (mult_cost < 0)
2885     return false;
2886 
2887   /* Ensure that mult_cost provides a reasonable upper bound.
2888      Any constant multiplication can be performed with less
2889      than 2 * bits additions.  */
2890   op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[mode];
2891   if (mult_cost > op_cost)
2892     mult_cost = op_cost;
2893 
2894   *variant = basic_variant;
2895   limit.cost = mult_cost;
2896   limit.latency = mult_cost;
2897   synth_mult (alg, val, &limit, mode);
2898 
2899   /* This works only if the inverted value actually fits in an
2900      `unsigned int' */
2901   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2902     {
2903       op_cost = neg_cost[mode];
2904       if (MULT_COST_LESS (&alg->cost, mult_cost))
2905 	{
2906 	  limit.cost = alg->cost.cost - op_cost;
2907 	  limit.latency = alg->cost.latency - op_cost;
2908 	}
2909       else
2910 	{
2911 	  limit.cost = mult_cost - op_cost;
2912 	  limit.latency = mult_cost - op_cost;
2913 	}
2914 
2915       synth_mult (&alg2, -val, &limit, mode);
2916       alg2.cost.cost += op_cost;
2917       alg2.cost.latency += op_cost;
2918       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2919 	*alg = alg2, *variant = negate_variant;
2920     }
2921 
2922   /* This proves very useful for division-by-constant.  */
2923   op_cost = add_cost[mode];
2924   if (MULT_COST_LESS (&alg->cost, mult_cost))
2925     {
2926       limit.cost = alg->cost.cost - op_cost;
2927       limit.latency = alg->cost.latency - op_cost;
2928     }
2929   else
2930     {
2931       limit.cost = mult_cost - op_cost;
2932       limit.latency = mult_cost - op_cost;
2933     }
2934 
2935   synth_mult (&alg2, val - 1, &limit, mode);
2936   alg2.cost.cost += op_cost;
2937   alg2.cost.latency += op_cost;
2938   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2939     *alg = alg2, *variant = add_variant;
2940 
2941   return MULT_COST_LESS (&alg->cost, mult_cost);
2942 }
2943 
2944 /* A subroutine of expand_mult, used for constant multiplications.
2945    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2946    convenient.  Use the shift/add sequence described by ALG and apply
2947    the final fixup specified by VARIANT.  */
2948 
2949 static rtx
expand_mult_const(enum machine_mode mode,rtx op0,HOST_WIDE_INT val,rtx target,const struct algorithm * alg,enum mult_variant variant)2950 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2951 		   rtx target, const struct algorithm *alg,
2952 		   enum mult_variant variant)
2953 {
2954   HOST_WIDE_INT val_so_far;
2955   rtx insn, accum, tem;
2956   int opno;
2957   enum machine_mode nmode;
2958 
2959   /* Avoid referencing memory over and over.
2960      For speed, but also for correctness when mem is volatile.  */
2961   if (MEM_P (op0))
2962     op0 = force_reg (mode, op0);
2963 
2964   /* ACCUM starts out either as OP0 or as a zero, depending on
2965      the first operation.  */
2966 
2967   if (alg->op[0] == alg_zero)
2968     {
2969       accum = copy_to_mode_reg (mode, const0_rtx);
2970       val_so_far = 0;
2971     }
2972   else if (alg->op[0] == alg_m)
2973     {
2974       accum = copy_to_mode_reg (mode, op0);
2975       val_so_far = 1;
2976     }
2977   else
2978     gcc_unreachable ();
2979 
2980   for (opno = 1; opno < alg->ops; opno++)
2981     {
2982       int log = alg->log[opno];
2983       rtx shift_subtarget = optimize ? 0 : accum;
2984       rtx add_target
2985 	= (opno == alg->ops - 1 && target != 0 && variant != add_variant
2986 	   && !optimize)
2987 	  ? target : 0;
2988       rtx accum_target = optimize ? 0 : accum;
2989 
2990       switch (alg->op[opno])
2991 	{
2992 	case alg_shift:
2993 	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
2994 				build_int_cst (NULL_TREE, log),
2995 				NULL_RTX, 0);
2996 	  val_so_far <<= log;
2997 	  break;
2998 
2999 	case alg_add_t_m2:
3000 	  tem = expand_shift (LSHIFT_EXPR, mode, op0,
3001 			      build_int_cst (NULL_TREE, log),
3002 			      NULL_RTX, 0);
3003 	  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3004 				 add_target ? add_target : accum_target);
3005 	  val_so_far += (HOST_WIDE_INT) 1 << log;
3006 	  break;
3007 
3008 	case alg_sub_t_m2:
3009 	  tem = expand_shift (LSHIFT_EXPR, mode, op0,
3010 			      build_int_cst (NULL_TREE, log),
3011 			      NULL_RTX, 0);
3012 	  accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3013 				 add_target ? add_target : accum_target);
3014 	  val_so_far -= (HOST_WIDE_INT) 1 << log;
3015 	  break;
3016 
3017 	case alg_add_t2_m:
3018 	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
3019 				build_int_cst (NULL_TREE, log),
3020 				shift_subtarget,
3021 				0);
3022 	  accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3023 				 add_target ? add_target : accum_target);
3024 	  val_so_far = (val_so_far << log) + 1;
3025 	  break;
3026 
3027 	case alg_sub_t2_m:
3028 	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
3029 				build_int_cst (NULL_TREE, log),
3030 				shift_subtarget, 0);
3031 	  accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3032 				 add_target ? add_target : accum_target);
3033 	  val_so_far = (val_so_far << log) - 1;
3034 	  break;
3035 
3036 	case alg_add_factor:
3037 	  tem = expand_shift (LSHIFT_EXPR, mode, accum,
3038 			      build_int_cst (NULL_TREE, log),
3039 			      NULL_RTX, 0);
3040 	  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3041 				 add_target ? add_target : accum_target);
3042 	  val_so_far += val_so_far << log;
3043 	  break;
3044 
3045 	case alg_sub_factor:
3046 	  tem = expand_shift (LSHIFT_EXPR, mode, accum,
3047 			      build_int_cst (NULL_TREE, log),
3048 			      NULL_RTX, 0);
3049 	  accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3050 				 (add_target
3051 				  ? add_target : (optimize ? 0 : tem)));
3052 	  val_so_far = (val_so_far << log) - val_so_far;
3053 	  break;
3054 
3055 	default:
3056 	  gcc_unreachable ();
3057 	}
3058 
3059       /* Write a REG_EQUAL note on the last insn so that we can cse
3060 	 multiplication sequences.  Note that if ACCUM is a SUBREG,
3061 	 we've set the inner register and must properly indicate
3062 	 that.  */
3063 
3064       tem = op0, nmode = mode;
3065       if (GET_CODE (accum) == SUBREG)
3066 	{
3067 	  nmode = GET_MODE (SUBREG_REG (accum));
3068 	  tem = gen_lowpart (nmode, op0);
3069 	}
3070 
3071       insn = get_last_insn ();
3072       set_unique_reg_note (insn, REG_EQUAL,
3073 			   gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)));
3074     }
3075 
3076   if (variant == negate_variant)
3077     {
3078       val_so_far = -val_so_far;
3079       accum = expand_unop (mode, neg_optab, accum, target, 0);
3080     }
3081   else if (variant == add_variant)
3082     {
3083       val_so_far = val_so_far + 1;
3084       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3085     }
3086 
3087   /* Compare only the bits of val and val_so_far that are significant
3088      in the result mode, to avoid sign-/zero-extension confusion.  */
3089   val &= GET_MODE_MASK (mode);
3090   val_so_far &= GET_MODE_MASK (mode);
3091   gcc_assert (val == val_so_far);
3092 
3093   return accum;
3094 }
3095 
3096 /* Perform a multiplication and return an rtx for the result.
3097    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3098    TARGET is a suggestion for where to store the result (an rtx).
3099 
3100    We check specially for a constant integer as OP1.
3101    If you want this check for OP0 as well, then before calling
3102    you should swap the two operands if OP0 would be constant.  */
3103 
3104 rtx
expand_mult(enum machine_mode mode,rtx op0,rtx op1,rtx target,int unsignedp)3105 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3106 	     int unsignedp)
3107 {
3108   enum mult_variant variant;
3109   struct algorithm algorithm;
3110   int max_cost;
3111 
3112   /* Handling const0_rtx here allows us to use zero as a rogue value for
3113      coeff below.  */
3114   if (op1 == const0_rtx)
3115     return const0_rtx;
3116   if (op1 == const1_rtx)
3117     return op0;
3118   if (op1 == constm1_rtx)
3119     return expand_unop (mode,
3120 			GET_MODE_CLASS (mode) == MODE_INT
3121 			&& !unsignedp && flag_trapv
3122 			? negv_optab : neg_optab,
3123 			op0, target, 0);
3124 
3125   /* These are the operations that are potentially turned into a sequence
3126      of shifts and additions.  */
3127   if (SCALAR_INT_MODE_P (mode)
3128       && (unsignedp || !flag_trapv))
3129     {
3130       HOST_WIDE_INT coeff = 0;
3131       rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3132 
3133       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3134 	 less than or equal in size to `unsigned int' this doesn't matter.
3135 	 If the mode is larger than `unsigned int', then synth_mult works
3136 	 only if the constant value exactly fits in an `unsigned int' without
3137 	 any truncation.  This means that multiplying by negative values does
3138 	 not work; results are off by 2^32 on a 32 bit machine.  */
3139 
3140       if (GET_CODE (op1) == CONST_INT)
3141 	{
3142 	  /* Attempt to handle multiplication of DImode values by negative
3143 	     coefficients, by performing the multiplication by a positive
3144 	     multiplier and then inverting the result.  */
3145 	  if (INTVAL (op1) < 0
3146 	      && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3147 	    {
3148 	      /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3149 		 result is interpreted as an unsigned coefficient.
3150 		 Exclude cost of op0 from max_cost to match the cost
3151 		 calculation of the synth_mult.  */
3152 	      max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET)
3153 			 - neg_cost[mode];
3154 	      if (max_cost > 0
3155 		  && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3156 					  &variant, max_cost))
3157 		{
3158 		  rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3159 						NULL_RTX, &algorithm,
3160 						variant);
3161 		  return expand_unop (mode, neg_optab, temp, target, 0);
3162 		}
3163 	    }
3164 	  else coeff = INTVAL (op1);
3165 	}
3166       else if (GET_CODE (op1) == CONST_DOUBLE)
3167 	{
3168 	  /* If we are multiplying in DImode, it may still be a win
3169 	     to try to work with shifts and adds.  */
3170 	  if (CONST_DOUBLE_HIGH (op1) == 0)
3171 	    coeff = CONST_DOUBLE_LOW (op1);
3172 	  else if (CONST_DOUBLE_LOW (op1) == 0
3173 		   && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3174 	    {
3175 	      int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3176 			  + HOST_BITS_PER_WIDE_INT;
3177 	      return expand_shift (LSHIFT_EXPR, mode, op0,
3178 				   build_int_cst (NULL_TREE, shift),
3179 				   target, unsignedp);
3180 	    }
3181 	}
3182 
3183       /* We used to test optimize here, on the grounds that it's better to
3184 	 produce a smaller program when -O is not used.  But this causes
3185 	 such a terrible slowdown sometimes that it seems better to always
3186 	 use synth_mult.  */
3187       if (coeff != 0)
3188 	{
3189 	  /* Special case powers of two.  */
3190 	  if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3191 	    return expand_shift (LSHIFT_EXPR, mode, op0,
3192 				 build_int_cst (NULL_TREE, floor_log2 (coeff)),
3193 				 target, unsignedp);
3194 
3195 	  /* Exclude cost of op0 from max_cost to match the cost
3196 	     calculation of the synth_mult.  */
3197 	  max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET);
3198 	  if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3199 				   max_cost))
3200 	    return expand_mult_const (mode, op0, coeff, target,
3201 				      &algorithm, variant);
3202 	}
3203     }
3204 
3205   if (GET_CODE (op0) == CONST_DOUBLE)
3206     {
3207       rtx temp = op0;
3208       op0 = op1;
3209       op1 = temp;
3210     }
3211 
3212   /* Expand x*2.0 as x+x.  */
3213   if (GET_CODE (op1) == CONST_DOUBLE
3214       && SCALAR_FLOAT_MODE_P (mode))
3215     {
3216       REAL_VALUE_TYPE d;
3217       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3218 
3219       if (REAL_VALUES_EQUAL (d, dconst2))
3220 	{
3221 	  op0 = force_reg (GET_MODE (op0), op0);
3222 	  return expand_binop (mode, add_optab, op0, op0,
3223 			       target, unsignedp, OPTAB_LIB_WIDEN);
3224 	}
3225     }
3226 
3227   /* This used to use umul_optab if unsigned, but for non-widening multiply
3228      there is no difference between signed and unsigned.  */
3229   op0 = expand_binop (mode,
3230 		      ! unsignedp
3231 		      && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3232 		      ? smulv_optab : smul_optab,
3233 		      op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3234   gcc_assert (op0);
3235   return op0;
3236 }
3237 
3238 /* Return the smallest n such that 2**n >= X.  */
3239 
3240 int
ceil_log2(unsigned HOST_WIDE_INT x)3241 ceil_log2 (unsigned HOST_WIDE_INT x)
3242 {
3243   return floor_log2 (x - 1) + 1;
3244 }
3245 
3246 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3247    replace division by D, and put the least significant N bits of the result
3248    in *MULTIPLIER_PTR and return the most significant bit.
3249 
3250    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3251    needed precision is in PRECISION (should be <= N).
3252 
3253    PRECISION should be as small as possible so this function can choose
3254    multiplier more freely.
3255 
3256    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3257    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3258 
3259    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3260    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3261 
3262 static
3263 unsigned HOST_WIDE_INT
choose_multiplier(unsigned HOST_WIDE_INT d,int n,int precision,rtx * multiplier_ptr,int * post_shift_ptr,int * lgup_ptr)3264 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3265 		   rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3266 {
3267   HOST_WIDE_INT mhigh_hi, mlow_hi;
3268   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3269   int lgup, post_shift;
3270   int pow, pow2;
3271   unsigned HOST_WIDE_INT nl, dummy1;
3272   HOST_WIDE_INT nh, dummy2;
3273 
3274   /* lgup = ceil(log2(divisor)); */
3275   lgup = ceil_log2 (d);
3276 
3277   gcc_assert (lgup <= n);
3278 
3279   pow = n + lgup;
3280   pow2 = n + lgup - precision;
3281 
3282   /* We could handle this with some effort, but this case is much
3283      better handled directly with a scc insn, so rely on caller using
3284      that.  */
3285   gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3286 
3287   /* mlow = 2^(N + lgup)/d */
3288  if (pow >= HOST_BITS_PER_WIDE_INT)
3289     {
3290       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3291       nl = 0;
3292     }
3293   else
3294     {
3295       nh = 0;
3296       nl = (unsigned HOST_WIDE_INT) 1 << pow;
3297     }
3298   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3299 			&mlow_lo, &mlow_hi, &dummy1, &dummy2);
3300 
3301   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3302   if (pow2 >= HOST_BITS_PER_WIDE_INT)
3303     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3304   else
3305     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3306   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3307 			&mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3308 
3309   gcc_assert (!mhigh_hi || nh - d < d);
3310   gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3311   /* Assert that mlow < mhigh.  */
3312   gcc_assert (mlow_hi < mhigh_hi
3313 	      || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3314 
3315   /* If precision == N, then mlow, mhigh exceed 2^N
3316      (but they do not exceed 2^(N+1)).  */
3317 
3318   /* Reduce to lowest terms.  */
3319   for (post_shift = lgup; post_shift > 0; post_shift--)
3320     {
3321       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3322       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3323       if (ml_lo >= mh_lo)
3324 	break;
3325 
3326       mlow_hi = 0;
3327       mlow_lo = ml_lo;
3328       mhigh_hi = 0;
3329       mhigh_lo = mh_lo;
3330     }
3331 
3332   *post_shift_ptr = post_shift;
3333   *lgup_ptr = lgup;
3334   if (n < HOST_BITS_PER_WIDE_INT)
3335     {
3336       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3337       *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3338       return mhigh_lo >= mask;
3339     }
3340   else
3341     {
3342       *multiplier_ptr = GEN_INT (mhigh_lo);
3343       return mhigh_hi;
3344     }
3345 }
3346 
3347 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3348    congruent to 1 (mod 2**N).  */
3349 
3350 static unsigned HOST_WIDE_INT
invert_mod2n(unsigned HOST_WIDE_INT x,int n)3351 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3352 {
3353   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3354 
3355   /* The algorithm notes that the choice y = x satisfies
3356      x*y == 1 mod 2^3, since x is assumed odd.
3357      Each iteration doubles the number of bits of significance in y.  */
3358 
3359   unsigned HOST_WIDE_INT mask;
3360   unsigned HOST_WIDE_INT y = x;
3361   int nbit = 3;
3362 
3363   mask = (n == HOST_BITS_PER_WIDE_INT
3364 	  ? ~(unsigned HOST_WIDE_INT) 0
3365 	  : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3366 
3367   while (nbit < n)
3368     {
3369       y = y * (2 - x*y) & mask;		/* Modulo 2^N */
3370       nbit *= 2;
3371     }
3372   return y;
3373 }
3374 
3375 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3376    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3377    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3378    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3379    become signed.
3380 
3381    The result is put in TARGET if that is convenient.
3382 
3383    MODE is the mode of operation.  */
3384 
3385 rtx
expand_mult_highpart_adjust(enum machine_mode mode,rtx adj_operand,rtx op0,rtx op1,rtx target,int unsignedp)3386 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3387 			     rtx op1, rtx target, int unsignedp)
3388 {
3389   rtx tem;
3390   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3391 
3392   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3393 		      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3394 		      NULL_RTX, 0);
3395   tem = expand_and (mode, tem, op1, NULL_RTX);
3396   adj_operand
3397     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3398 		     adj_operand);
3399 
3400   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3401 		      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3402 		      NULL_RTX, 0);
3403   tem = expand_and (mode, tem, op0, NULL_RTX);
3404   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3405 			  target);
3406 
3407   return target;
3408 }
3409 
3410 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3411 
3412 static rtx
extract_high_half(enum machine_mode mode,rtx op)3413 extract_high_half (enum machine_mode mode, rtx op)
3414 {
3415   enum machine_mode wider_mode;
3416 
3417   if (mode == word_mode)
3418     return gen_highpart (mode, op);
3419 
3420   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3421 
3422   wider_mode = GET_MODE_WIDER_MODE (mode);
3423   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3424 		     build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
3425   return convert_modes (mode, wider_mode, op, 0);
3426 }
3427 
3428 /* Like expand_mult_highpart, but only consider using a multiplication
3429    optab.  OP1 is an rtx for the constant operand.  */
3430 
3431 static rtx
expand_mult_highpart_optab(enum machine_mode mode,rtx op0,rtx op1,rtx target,int unsignedp,int max_cost)3432 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3433 			    rtx target, int unsignedp, int max_cost)
3434 {
3435   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3436   enum machine_mode wider_mode;
3437   optab moptab;
3438   rtx tem;
3439   int size;
3440 
3441   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3442 
3443   wider_mode = GET_MODE_WIDER_MODE (mode);
3444   size = GET_MODE_BITSIZE (mode);
3445 
3446   /* Firstly, try using a multiplication insn that only generates the needed
3447      high part of the product, and in the sign flavor of unsignedp.  */
3448   if (mul_highpart_cost[mode] < max_cost)
3449     {
3450       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3451       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3452 			  unsignedp, OPTAB_DIRECT);
3453       if (tem)
3454 	return tem;
3455     }
3456 
3457   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3458      Need to adjust the result after the multiplication.  */
3459   if (size - 1 < BITS_PER_WORD
3460       && (mul_highpart_cost[mode] + 2 * shift_cost[mode][size-1]
3461 	  + 4 * add_cost[mode] < max_cost))
3462     {
3463       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3464       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3465 			  unsignedp, OPTAB_DIRECT);
3466       if (tem)
3467 	/* We used the wrong signedness.  Adjust the result.  */
3468 	return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3469 					    tem, unsignedp);
3470     }
3471 
3472   /* Try widening multiplication.  */
3473   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3474   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3475       && mul_widen_cost[wider_mode] < max_cost)
3476     {
3477       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3478 			  unsignedp, OPTAB_WIDEN);
3479       if (tem)
3480 	return extract_high_half (mode, tem);
3481     }
3482 
3483   /* Try widening the mode and perform a non-widening multiplication.  */
3484   if (smul_optab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3485       && size - 1 < BITS_PER_WORD
3486       && mul_cost[wider_mode] + shift_cost[mode][size-1] < max_cost)
3487     {
3488       rtx insns, wop0, wop1;
3489 
3490       /* We need to widen the operands, for example to ensure the
3491 	 constant multiplier is correctly sign or zero extended.
3492 	 Use a sequence to clean-up any instructions emitted by
3493 	 the conversions if things don't work out.  */
3494       start_sequence ();
3495       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3496       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3497       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3498 			  unsignedp, OPTAB_WIDEN);
3499       insns = get_insns ();
3500       end_sequence ();
3501 
3502       if (tem)
3503 	{
3504 	  emit_insn (insns);
3505 	  return extract_high_half (mode, tem);
3506 	}
3507     }
3508 
3509   /* Try widening multiplication of opposite signedness, and adjust.  */
3510   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3511   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3512       && size - 1 < BITS_PER_WORD
3513       && (mul_widen_cost[wider_mode] + 2 * shift_cost[mode][size-1]
3514 	  + 4 * add_cost[mode] < max_cost))
3515     {
3516       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3517 			  NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3518       if (tem != 0)
3519 	{
3520 	  tem = extract_high_half (mode, tem);
3521 	  /* We used the wrong signedness.  Adjust the result.  */
3522 	  return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3523 					      target, unsignedp);
3524 	}
3525     }
3526 
3527   return 0;
3528 }
3529 
3530 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3531    putting the high half of the result in TARGET if that is convenient,
3532    and return where the result is.  If the operation can not be performed,
3533    0 is returned.
3534 
3535    MODE is the mode of operation and result.
3536 
3537    UNSIGNEDP nonzero means unsigned multiply.
3538 
3539    MAX_COST is the total allowed cost for the expanded RTL.  */
3540 
3541 static rtx
expand_mult_highpart(enum machine_mode mode,rtx op0,rtx op1,rtx target,int unsignedp,int max_cost)3542 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3543 		      rtx target, int unsignedp, int max_cost)
3544 {
3545   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3546   unsigned HOST_WIDE_INT cnst1;
3547   int extra_cost;
3548   bool sign_adjust = false;
3549   enum mult_variant variant;
3550   struct algorithm alg;
3551   rtx tem;
3552 
3553   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3554   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3555   gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
3556 
3557   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3558 
3559   /* We can't optimize modes wider than BITS_PER_WORD.
3560      ??? We might be able to perform double-word arithmetic if
3561      mode == word_mode, however all the cost calculations in
3562      synth_mult etc. assume single-word operations.  */
3563   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3564     return expand_mult_highpart_optab (mode, op0, op1, target,
3565 				       unsignedp, max_cost);
3566 
3567   extra_cost = shift_cost[mode][GET_MODE_BITSIZE (mode) - 1];
3568 
3569   /* Check whether we try to multiply by a negative constant.  */
3570   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3571     {
3572       sign_adjust = true;
3573       extra_cost += add_cost[mode];
3574     }
3575 
3576   /* See whether shift/add multiplication is cheap enough.  */
3577   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3578 			   max_cost - extra_cost))
3579     {
3580       /* See whether the specialized multiplication optabs are
3581 	 cheaper than the shift/add version.  */
3582       tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3583 					alg.cost.cost + extra_cost);
3584       if (tem)
3585 	return tem;
3586 
3587       tem = convert_to_mode (wider_mode, op0, unsignedp);
3588       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3589       tem = extract_high_half (mode, tem);
3590 
3591       /* Adjust result for signedness.  */
3592       if (sign_adjust)
3593 	tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3594 
3595       return tem;
3596     }
3597   return expand_mult_highpart_optab (mode, op0, op1, target,
3598 				     unsignedp, max_cost);
3599 }
3600 
3601 
3602 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3603 
3604 static rtx
expand_smod_pow2(enum machine_mode mode,rtx op0,HOST_WIDE_INT d)3605 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3606 {
3607   unsigned HOST_WIDE_INT masklow, maskhigh;
3608   rtx result, temp, shift, label;
3609   int logd;
3610 
3611   logd = floor_log2 (d);
3612   result = gen_reg_rtx (mode);
3613 
3614   /* Avoid conditional branches when they're expensive.  */
3615   if (BRANCH_COST >= 2
3616       && !optimize_size)
3617     {
3618       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3619 				      mode, 0, -1);
3620       if (signmask)
3621 	{
3622 	  signmask = force_reg (mode, signmask);
3623 	  masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3624 	  shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3625 
3626 	  /* Use the rtx_cost of a LSHIFTRT instruction to determine
3627 	     which instruction sequence to use.  If logical right shifts
3628 	     are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3629 	     use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3630 
3631 	  temp = gen_rtx_LSHIFTRT (mode, result, shift);
3632 	  if (lshr_optab->handlers[mode].insn_code == CODE_FOR_nothing
3633 	      || rtx_cost (temp, SET) > COSTS_N_INSNS (2))
3634 	    {
3635 	      temp = expand_binop (mode, xor_optab, op0, signmask,
3636 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3637 	      temp = expand_binop (mode, sub_optab, temp, signmask,
3638 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3639 	      temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3640 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3641 	      temp = expand_binop (mode, xor_optab, temp, signmask,
3642 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3643 	      temp = expand_binop (mode, sub_optab, temp, signmask,
3644 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3645 	    }
3646 	  else
3647 	    {
3648 	      signmask = expand_binop (mode, lshr_optab, signmask, shift,
3649 				       NULL_RTX, 1, OPTAB_LIB_WIDEN);
3650 	      signmask = force_reg (mode, signmask);
3651 
3652 	      temp = expand_binop (mode, add_optab, op0, signmask,
3653 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3654 	      temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3655 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3656 	      temp = expand_binop (mode, sub_optab, temp, signmask,
3657 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3658 	    }
3659 	  return temp;
3660 	}
3661     }
3662 
3663   /* Mask contains the mode's signbit and the significant bits of the
3664      modulus.  By including the signbit in the operation, many targets
3665      can avoid an explicit compare operation in the following comparison
3666      against zero.  */
3667 
3668   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3669   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3670     {
3671       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3672       maskhigh = -1;
3673     }
3674   else
3675     maskhigh = (HOST_WIDE_INT) -1
3676 		 << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3677 
3678   temp = expand_binop (mode, and_optab, op0,
3679 		       immed_double_const (masklow, maskhigh, mode),
3680 		       result, 1, OPTAB_LIB_WIDEN);
3681   if (temp != result)
3682     emit_move_insn (result, temp);
3683 
3684   label = gen_label_rtx ();
3685   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3686 
3687   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3688 		       0, OPTAB_LIB_WIDEN);
3689   masklow = (HOST_WIDE_INT) -1 << logd;
3690   maskhigh = -1;
3691   temp = expand_binop (mode, ior_optab, temp,
3692 		       immed_double_const (masklow, maskhigh, mode),
3693 		       result, 1, OPTAB_LIB_WIDEN);
3694   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3695 		       0, OPTAB_LIB_WIDEN);
3696   if (temp != result)
3697     emit_move_insn (result, temp);
3698   emit_label (label);
3699   return result;
3700 }
3701 
3702 /* Expand signed division of OP0 by a power of two D in mode MODE.
3703    This routine is only called for positive values of D.  */
3704 
3705 static rtx
expand_sdiv_pow2(enum machine_mode mode,rtx op0,HOST_WIDE_INT d)3706 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3707 {
3708   rtx temp, label;
3709   tree shift;
3710   int logd;
3711 
3712   logd = floor_log2 (d);
3713   shift = build_int_cst (NULL_TREE, logd);
3714 
3715   if (d == 2 && BRANCH_COST >= 1)
3716     {
3717       temp = gen_reg_rtx (mode);
3718       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3719       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3720 			   0, OPTAB_LIB_WIDEN);
3721       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3722     }
3723 
3724 #ifdef HAVE_conditional_move
3725   if (BRANCH_COST >= 2)
3726     {
3727       rtx temp2;
3728 
3729       /* ??? emit_conditional_move forces a stack adjustment via
3730 	 compare_from_rtx so, if the sequence is discarded, it will
3731 	 be lost.  Do it now instead.  */
3732       do_pending_stack_adjust ();
3733 
3734       start_sequence ();
3735       temp2 = copy_to_mode_reg (mode, op0);
3736       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3737 			   NULL_RTX, 0, OPTAB_LIB_WIDEN);
3738       temp = force_reg (mode, temp);
3739 
3740       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3741       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3742 				     mode, temp, temp2, mode, 0);
3743       if (temp2)
3744 	{
3745 	  rtx seq = get_insns ();
3746 	  end_sequence ();
3747 	  emit_insn (seq);
3748 	  return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3749 	}
3750       end_sequence ();
3751     }
3752 #endif
3753 
3754   if (BRANCH_COST >= 2)
3755     {
3756       int ushift = GET_MODE_BITSIZE (mode) - logd;
3757 
3758       temp = gen_reg_rtx (mode);
3759       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3760       if (shift_cost[mode][ushift] > COSTS_N_INSNS (1))
3761 	temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3762 			     NULL_RTX, 0, OPTAB_LIB_WIDEN);
3763       else
3764 	temp = expand_shift (RSHIFT_EXPR, mode, temp,
3765 			     build_int_cst (NULL_TREE, ushift),
3766 			     NULL_RTX, 1);
3767       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3768 			   0, OPTAB_LIB_WIDEN);
3769       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3770     }
3771 
3772   label = gen_label_rtx ();
3773   temp = copy_to_mode_reg (mode, op0);
3774   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3775   expand_inc (temp, GEN_INT (d - 1));
3776   emit_label (label);
3777   return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3778 }
3779 
3780 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3781    if that is convenient, and returning where the result is.
3782    You may request either the quotient or the remainder as the result;
3783    specify REM_FLAG nonzero to get the remainder.
3784 
3785    CODE is the expression code for which kind of division this is;
3786    it controls how rounding is done.  MODE is the machine mode to use.
3787    UNSIGNEDP nonzero means do unsigned division.  */
3788 
3789 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3790    and then correct it by or'ing in missing high bits
3791    if result of ANDI is nonzero.
3792    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3793    This could optimize to a bfexts instruction.
3794    But C doesn't use these operations, so their optimizations are
3795    left for later.  */
3796 /* ??? For modulo, we don't actually need the highpart of the first product,
3797    the low part will do nicely.  And for small divisors, the second multiply
3798    can also be a low-part only multiply or even be completely left out.
3799    E.g. to calculate the remainder of a division by 3 with a 32 bit
3800    multiply, multiply with 0x55555556 and extract the upper two bits;
3801    the result is exact for inputs up to 0x1fffffff.
3802    The input range can be reduced by using cross-sum rules.
3803    For odd divisors >= 3, the following table gives right shift counts
3804    so that if a number is shifted by an integer multiple of the given
3805    amount, the remainder stays the same:
3806    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3807    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3808    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3809    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3810    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3811 
3812    Cross-sum rules for even numbers can be derived by leaving as many bits
3813    to the right alone as the divisor has zeros to the right.
3814    E.g. if x is an unsigned 32 bit number:
3815    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3816    */
3817 
3818 rtx
expand_divmod(int rem_flag,enum tree_code code,enum machine_mode mode,rtx op0,rtx op1,rtx target,int unsignedp)3819 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3820 	       rtx op0, rtx op1, rtx target, int unsignedp)
3821 {
3822   enum machine_mode compute_mode;
3823   rtx tquotient;
3824   rtx quotient = 0, remainder = 0;
3825   rtx last;
3826   int size;
3827   rtx insn, set;
3828   optab optab1, optab2;
3829   int op1_is_constant, op1_is_pow2 = 0;
3830   int max_cost, extra_cost;
3831   static HOST_WIDE_INT last_div_const = 0;
3832   static HOST_WIDE_INT ext_op1;
3833 
3834   op1_is_constant = GET_CODE (op1) == CONST_INT;
3835   if (op1_is_constant)
3836     {
3837       ext_op1 = INTVAL (op1);
3838       if (unsignedp)
3839 	ext_op1 &= GET_MODE_MASK (mode);
3840       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3841 		     || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3842     }
3843 
3844   /*
3845      This is the structure of expand_divmod:
3846 
3847      First comes code to fix up the operands so we can perform the operations
3848      correctly and efficiently.
3849 
3850      Second comes a switch statement with code specific for each rounding mode.
3851      For some special operands this code emits all RTL for the desired
3852      operation, for other cases, it generates only a quotient and stores it in
3853      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3854      to indicate that it has not done anything.
3855 
3856      Last comes code that finishes the operation.  If QUOTIENT is set and
3857      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3858      QUOTIENT is not set, it is computed using trunc rounding.
3859 
3860      We try to generate special code for division and remainder when OP1 is a
3861      constant.  If |OP1| = 2**n we can use shifts and some other fast
3862      operations.  For other values of OP1, we compute a carefully selected
3863      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3864      by m.
3865 
3866      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3867      half of the product.  Different strategies for generating the product are
3868      implemented in expand_mult_highpart.
3869 
3870      If what we actually want is the remainder, we generate that by another
3871      by-constant multiplication and a subtraction.  */
3872 
3873   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3874      code below will malfunction if we are, so check here and handle
3875      the special case if so.  */
3876   if (op1 == const1_rtx)
3877     return rem_flag ? const0_rtx : op0;
3878 
3879     /* When dividing by -1, we could get an overflow.
3880      negv_optab can handle overflows.  */
3881   if (! unsignedp && op1 == constm1_rtx)
3882     {
3883       if (rem_flag)
3884 	return const0_rtx;
3885       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3886 			  ? negv_optab : neg_optab, op0, target, 0);
3887     }
3888 
3889   if (target
3890       /* Don't use the function value register as a target
3891 	 since we have to read it as well as write it,
3892 	 and function-inlining gets confused by this.  */
3893       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3894 	  /* Don't clobber an operand while doing a multi-step calculation.  */
3895 	  || ((rem_flag || op1_is_constant)
3896 	      && (reg_mentioned_p (target, op0)
3897 		  || (MEM_P (op0) && MEM_P (target))))
3898 	  || reg_mentioned_p (target, op1)
3899 	  || (MEM_P (op1) && MEM_P (target))))
3900     target = 0;
3901 
3902   /* Get the mode in which to perform this computation.  Normally it will
3903      be MODE, but sometimes we can't do the desired operation in MODE.
3904      If so, pick a wider mode in which we can do the operation.  Convert
3905      to that mode at the start to avoid repeated conversions.
3906 
3907      First see what operations we need.  These depend on the expression
3908      we are evaluating.  (We assume that divxx3 insns exist under the
3909      same conditions that modxx3 insns and that these insns don't normally
3910      fail.  If these assumptions are not correct, we may generate less
3911      efficient code in some cases.)
3912 
3913      Then see if we find a mode in which we can open-code that operation
3914      (either a division, modulus, or shift).  Finally, check for the smallest
3915      mode for which we can do the operation with a library call.  */
3916 
3917   /* We might want to refine this now that we have division-by-constant
3918      optimization.  Since expand_mult_highpart tries so many variants, it is
3919      not straightforward to generalize this.  Maybe we should make an array
3920      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3921 
3922   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3923 	    ? (unsignedp ? lshr_optab : ashr_optab)
3924 	    : (unsignedp ? udiv_optab : sdiv_optab));
3925   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3926 	    ? optab1
3927 	    : (unsignedp ? udivmod_optab : sdivmod_optab));
3928 
3929   for (compute_mode = mode; compute_mode != VOIDmode;
3930        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3931     if (optab1->handlers[compute_mode].insn_code != CODE_FOR_nothing
3932 	|| optab2->handlers[compute_mode].insn_code != CODE_FOR_nothing)
3933       break;
3934 
3935   if (compute_mode == VOIDmode)
3936     for (compute_mode = mode; compute_mode != VOIDmode;
3937 	 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3938       if (optab1->handlers[compute_mode].libfunc
3939 	  || optab2->handlers[compute_mode].libfunc)
3940 	break;
3941 
3942   /* If we still couldn't find a mode, use MODE, but expand_binop will
3943      probably die.  */
3944   if (compute_mode == VOIDmode)
3945     compute_mode = mode;
3946 
3947   if (target && GET_MODE (target) == compute_mode)
3948     tquotient = target;
3949   else
3950     tquotient = gen_reg_rtx (compute_mode);
3951 
3952   size = GET_MODE_BITSIZE (compute_mode);
3953 #if 0
3954   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3955      (mode), and thereby get better code when OP1 is a constant.  Do that
3956      later.  It will require going over all usages of SIZE below.  */
3957   size = GET_MODE_BITSIZE (mode);
3958 #endif
3959 
3960   /* Only deduct something for a REM if the last divide done was
3961      for a different constant.   Then set the constant of the last
3962      divide.  */
3963   max_cost = unsignedp ? udiv_cost[compute_mode] : sdiv_cost[compute_mode];
3964   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3965 		     && INTVAL (op1) == last_div_const))
3966     max_cost -= mul_cost[compute_mode] + add_cost[compute_mode];
3967 
3968   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3969 
3970   /* Now convert to the best mode to use.  */
3971   if (compute_mode != mode)
3972     {
3973       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3974       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3975 
3976       /* convert_modes may have placed op1 into a register, so we
3977 	 must recompute the following.  */
3978       op1_is_constant = GET_CODE (op1) == CONST_INT;
3979       op1_is_pow2 = (op1_is_constant
3980 		     && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3981 			  || (! unsignedp
3982 			      && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3983     }
3984 
3985   /* If one of the operands is a volatile MEM, copy it into a register.  */
3986 
3987   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3988     op0 = force_reg (compute_mode, op0);
3989   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3990     op1 = force_reg (compute_mode, op1);
3991 
3992   /* If we need the remainder or if OP1 is constant, we need to
3993      put OP0 in a register in case it has any queued subexpressions.  */
3994   if (rem_flag || op1_is_constant)
3995     op0 = force_reg (compute_mode, op0);
3996 
3997   last = get_last_insn ();
3998 
3999   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4000   if (unsignedp)
4001     {
4002       if (code == FLOOR_DIV_EXPR)
4003 	code = TRUNC_DIV_EXPR;
4004       if (code == FLOOR_MOD_EXPR)
4005 	code = TRUNC_MOD_EXPR;
4006       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4007 	code = TRUNC_DIV_EXPR;
4008     }
4009 
4010   if (op1 != const0_rtx)
4011     switch (code)
4012       {
4013       case TRUNC_MOD_EXPR:
4014       case TRUNC_DIV_EXPR:
4015 	if (op1_is_constant)
4016 	  {
4017 	    if (unsignedp)
4018 	      {
4019 		unsigned HOST_WIDE_INT mh;
4020 		int pre_shift, post_shift;
4021 		int dummy;
4022 		rtx ml;
4023 		unsigned HOST_WIDE_INT d = (INTVAL (op1)
4024 					    & GET_MODE_MASK (compute_mode));
4025 
4026 		if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4027 		  {
4028 		    pre_shift = floor_log2 (d);
4029 		    if (rem_flag)
4030 		      {
4031 			remainder
4032 			  = expand_binop (compute_mode, and_optab, op0,
4033 					  GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4034 					  remainder, 1,
4035 					  OPTAB_LIB_WIDEN);
4036 			if (remainder)
4037 			  return gen_lowpart (mode, remainder);
4038 		      }
4039 		    quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4040 					     build_int_cst (NULL_TREE,
4041 							    pre_shift),
4042 					     tquotient, 1);
4043 		  }
4044 		else if (size <= HOST_BITS_PER_WIDE_INT)
4045 		  {
4046 		    if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4047 		      {
4048 			/* Most significant bit of divisor is set; emit an scc
4049 			   insn.  */
4050 			quotient = emit_store_flag (tquotient, GEU, op0, op1,
4051 						    compute_mode, 1, 1);
4052 			if (quotient == 0)
4053 			  goto fail1;
4054 		      }
4055 		    else
4056 		      {
4057 			/* Find a suitable multiplier and right shift count
4058 			   instead of multiplying with D.  */
4059 
4060 			mh = choose_multiplier (d, size, size,
4061 						&ml, &post_shift, &dummy);
4062 
4063 			/* If the suggested multiplier is more than SIZE bits,
4064 			   we can do better for even divisors, using an
4065 			   initial right shift.  */
4066 			if (mh != 0 && (d & 1) == 0)
4067 			  {
4068 			    pre_shift = floor_log2 (d & -d);
4069 			    mh = choose_multiplier (d >> pre_shift, size,
4070 						    size - pre_shift,
4071 						    &ml, &post_shift, &dummy);
4072 			    gcc_assert (!mh);
4073 			  }
4074 			else
4075 			  pre_shift = 0;
4076 
4077 			if (mh != 0)
4078 			  {
4079 			    rtx t1, t2, t3, t4;
4080 
4081 			    if (post_shift - 1 >= BITS_PER_WORD)
4082 			      goto fail1;
4083 
4084 			    extra_cost
4085 			      = (shift_cost[compute_mode][post_shift - 1]
4086 				 + shift_cost[compute_mode][1]
4087 				 + 2 * add_cost[compute_mode]);
4088 			    t1 = expand_mult_highpart (compute_mode, op0, ml,
4089 						       NULL_RTX, 1,
4090 						       max_cost - extra_cost);
4091 			    if (t1 == 0)
4092 			      goto fail1;
4093 			    t2 = force_operand (gen_rtx_MINUS (compute_mode,
4094 							       op0, t1),
4095 						NULL_RTX);
4096 			    t3 = expand_shift
4097 			      (RSHIFT_EXPR, compute_mode, t2,
4098 			       build_int_cst (NULL_TREE, 1),
4099 			       NULL_RTX,1);
4100 			    t4 = force_operand (gen_rtx_PLUS (compute_mode,
4101 							      t1, t3),
4102 						NULL_RTX);
4103 			    quotient = expand_shift
4104 			      (RSHIFT_EXPR, compute_mode, t4,
4105 			       build_int_cst (NULL_TREE, post_shift - 1),
4106 			       tquotient, 1);
4107 			  }
4108 			else
4109 			  {
4110 			    rtx t1, t2;
4111 
4112 			    if (pre_shift >= BITS_PER_WORD
4113 				|| post_shift >= BITS_PER_WORD)
4114 			      goto fail1;
4115 
4116 			    t1 = expand_shift
4117 			      (RSHIFT_EXPR, compute_mode, op0,
4118 			       build_int_cst (NULL_TREE, pre_shift),
4119 			       NULL_RTX, 1);
4120 			    extra_cost
4121 			      = (shift_cost[compute_mode][pre_shift]
4122 				 + shift_cost[compute_mode][post_shift]);
4123 			    t2 = expand_mult_highpart (compute_mode, t1, ml,
4124 						       NULL_RTX, 1,
4125 						       max_cost - extra_cost);
4126 			    if (t2 == 0)
4127 			      goto fail1;
4128 			    quotient = expand_shift
4129 			      (RSHIFT_EXPR, compute_mode, t2,
4130 			       build_int_cst (NULL_TREE, post_shift),
4131 			       tquotient, 1);
4132 			  }
4133 		      }
4134 		  }
4135 		else		/* Too wide mode to use tricky code */
4136 		  break;
4137 
4138 		insn = get_last_insn ();
4139 		if (insn != last
4140 		    && (set = single_set (insn)) != 0
4141 		    && SET_DEST (set) == quotient)
4142 		  set_unique_reg_note (insn,
4143 				       REG_EQUAL,
4144 				       gen_rtx_UDIV (compute_mode, op0, op1));
4145 	      }
4146 	    else		/* TRUNC_DIV, signed */
4147 	      {
4148 		unsigned HOST_WIDE_INT ml;
4149 		int lgup, post_shift;
4150 		rtx mlr;
4151 		HOST_WIDE_INT d = INTVAL (op1);
4152 		unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
4153 
4154 		/* n rem d = n rem -d */
4155 		if (rem_flag && d < 0)
4156 		  {
4157 		    d = abs_d;
4158 		    op1 = gen_int_mode (abs_d, compute_mode);
4159 		  }
4160 
4161 		if (d == 1)
4162 		  quotient = op0;
4163 		else if (d == -1)
4164 		  quotient = expand_unop (compute_mode, neg_optab, op0,
4165 					  tquotient, 0);
4166 		else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4167 		  {
4168 		    /* This case is not handled correctly below.  */
4169 		    quotient = emit_store_flag (tquotient, EQ, op0, op1,
4170 						compute_mode, 1, 1);
4171 		    if (quotient == 0)
4172 		      goto fail1;
4173 		  }
4174 		else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4175 			 && (rem_flag ? smod_pow2_cheap[compute_mode]
4176 				      : sdiv_pow2_cheap[compute_mode])
4177 			 /* We assume that cheap metric is true if the
4178 			    optab has an expander for this mode.  */
4179 			 && (((rem_flag ? smod_optab : sdiv_optab)
4180 			      ->handlers[compute_mode].insn_code
4181 			      != CODE_FOR_nothing)
4182 			     || (sdivmod_optab->handlers[compute_mode]
4183 				 .insn_code != CODE_FOR_nothing)))
4184 		  ;
4185 		else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4186 		  {
4187 		    if (rem_flag)
4188 		      {
4189 			remainder = expand_smod_pow2 (compute_mode, op0, d);
4190 			if (remainder)
4191 			  return gen_lowpart (mode, remainder);
4192 		      }
4193 
4194 		    if (sdiv_pow2_cheap[compute_mode]
4195 			&& ((sdiv_optab->handlers[compute_mode].insn_code
4196 			     != CODE_FOR_nothing)
4197 			    || (sdivmod_optab->handlers[compute_mode].insn_code
4198 				!= CODE_FOR_nothing)))
4199 		      quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4200 						compute_mode, op0,
4201 						gen_int_mode (abs_d,
4202 							      compute_mode),
4203 						NULL_RTX, 0);
4204 		    else
4205 		      quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4206 
4207 		    /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4208 		       negate the quotient.  */
4209 		    if (d < 0)
4210 		      {
4211 			insn = get_last_insn ();
4212 			if (insn != last
4213 			    && (set = single_set (insn)) != 0
4214 			    && SET_DEST (set) == quotient
4215 			    && abs_d < ((unsigned HOST_WIDE_INT) 1
4216 					<< (HOST_BITS_PER_WIDE_INT - 1)))
4217 			  set_unique_reg_note (insn,
4218 					       REG_EQUAL,
4219 					       gen_rtx_DIV (compute_mode,
4220 							    op0,
4221 							    GEN_INT
4222 							    (trunc_int_for_mode
4223 							     (abs_d,
4224 							      compute_mode))));
4225 
4226 			quotient = expand_unop (compute_mode, neg_optab,
4227 						quotient, quotient, 0);
4228 		      }
4229 		  }
4230 		else if (size <= HOST_BITS_PER_WIDE_INT)
4231 		  {
4232 		    choose_multiplier (abs_d, size, size - 1,
4233 				       &mlr, &post_shift, &lgup);
4234 		    ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4235 		    if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4236 		      {
4237 			rtx t1, t2, t3;
4238 
4239 			if (post_shift >= BITS_PER_WORD
4240 			    || size - 1 >= BITS_PER_WORD)
4241 			  goto fail1;
4242 
4243 			extra_cost = (shift_cost[compute_mode][post_shift]
4244 				      + shift_cost[compute_mode][size - 1]
4245 				      + add_cost[compute_mode]);
4246 			t1 = expand_mult_highpart (compute_mode, op0, mlr,
4247 						   NULL_RTX, 0,
4248 						   max_cost - extra_cost);
4249 			if (t1 == 0)
4250 			  goto fail1;
4251 			t2 = expand_shift
4252 			  (RSHIFT_EXPR, compute_mode, t1,
4253 			   build_int_cst (NULL_TREE, post_shift),
4254 			   NULL_RTX, 0);
4255 			t3 = expand_shift
4256 			  (RSHIFT_EXPR, compute_mode, op0,
4257 			   build_int_cst (NULL_TREE, size - 1),
4258 			   NULL_RTX, 0);
4259 			if (d < 0)
4260 			  quotient
4261 			    = force_operand (gen_rtx_MINUS (compute_mode,
4262 							    t3, t2),
4263 					     tquotient);
4264 			else
4265 			  quotient
4266 			    = force_operand (gen_rtx_MINUS (compute_mode,
4267 							    t2, t3),
4268 					     tquotient);
4269 		      }
4270 		    else
4271 		      {
4272 			rtx t1, t2, t3, t4;
4273 
4274 			if (post_shift >= BITS_PER_WORD
4275 			    || size - 1 >= BITS_PER_WORD)
4276 			  goto fail1;
4277 
4278 			ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4279 			mlr = gen_int_mode (ml, compute_mode);
4280 			extra_cost = (shift_cost[compute_mode][post_shift]
4281 				      + shift_cost[compute_mode][size - 1]
4282 				      + 2 * add_cost[compute_mode]);
4283 			t1 = expand_mult_highpart (compute_mode, op0, mlr,
4284 						   NULL_RTX, 0,
4285 						   max_cost - extra_cost);
4286 			if (t1 == 0)
4287 			  goto fail1;
4288 			t2 = force_operand (gen_rtx_PLUS (compute_mode,
4289 							  t1, op0),
4290 					    NULL_RTX);
4291 			t3 = expand_shift
4292 			  (RSHIFT_EXPR, compute_mode, t2,
4293 			   build_int_cst (NULL_TREE, post_shift),
4294 			   NULL_RTX, 0);
4295 			t4 = expand_shift
4296 			  (RSHIFT_EXPR, compute_mode, op0,
4297 			   build_int_cst (NULL_TREE, size - 1),
4298 			   NULL_RTX, 0);
4299 			if (d < 0)
4300 			  quotient
4301 			    = force_operand (gen_rtx_MINUS (compute_mode,
4302 							    t4, t3),
4303 					     tquotient);
4304 			else
4305 			  quotient
4306 			    = force_operand (gen_rtx_MINUS (compute_mode,
4307 							    t3, t4),
4308 					     tquotient);
4309 		      }
4310 		  }
4311 		else		/* Too wide mode to use tricky code */
4312 		  break;
4313 
4314 		insn = get_last_insn ();
4315 		if (insn != last
4316 		    && (set = single_set (insn)) != 0
4317 		    && SET_DEST (set) == quotient)
4318 		  set_unique_reg_note (insn,
4319 				       REG_EQUAL,
4320 				       gen_rtx_DIV (compute_mode, op0, op1));
4321 	      }
4322 	    break;
4323 	  }
4324       fail1:
4325 	delete_insns_since (last);
4326 	break;
4327 
4328       case FLOOR_DIV_EXPR:
4329       case FLOOR_MOD_EXPR:
4330       /* We will come here only for signed operations.  */
4331 	if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4332 	  {
4333 	    unsigned HOST_WIDE_INT mh;
4334 	    int pre_shift, lgup, post_shift;
4335 	    HOST_WIDE_INT d = INTVAL (op1);
4336 	    rtx ml;
4337 
4338 	    if (d > 0)
4339 	      {
4340 		/* We could just as easily deal with negative constants here,
4341 		   but it does not seem worth the trouble for GCC 2.6.  */
4342 		if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4343 		  {
4344 		    pre_shift = floor_log2 (d);
4345 		    if (rem_flag)
4346 		      {
4347 			remainder = expand_binop (compute_mode, and_optab, op0,
4348 						  GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4349 						  remainder, 0, OPTAB_LIB_WIDEN);
4350 			if (remainder)
4351 			  return gen_lowpart (mode, remainder);
4352 		      }
4353 		    quotient = expand_shift
4354 		      (RSHIFT_EXPR, compute_mode, op0,
4355 		       build_int_cst (NULL_TREE, pre_shift),
4356 		       tquotient, 0);
4357 		  }
4358 		else
4359 		  {
4360 		    rtx t1, t2, t3, t4;
4361 
4362 		    mh = choose_multiplier (d, size, size - 1,
4363 					    &ml, &post_shift, &lgup);
4364 		    gcc_assert (!mh);
4365 
4366 		    if (post_shift < BITS_PER_WORD
4367 			&& size - 1 < BITS_PER_WORD)
4368 		      {
4369 			t1 = expand_shift
4370 			  (RSHIFT_EXPR, compute_mode, op0,
4371 			   build_int_cst (NULL_TREE, size - 1),
4372 			   NULL_RTX, 0);
4373 			t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4374 					   NULL_RTX, 0, OPTAB_WIDEN);
4375 			extra_cost = (shift_cost[compute_mode][post_shift]
4376 				      + shift_cost[compute_mode][size - 1]
4377 				      + 2 * add_cost[compute_mode]);
4378 			t3 = expand_mult_highpart (compute_mode, t2, ml,
4379 						   NULL_RTX, 1,
4380 						   max_cost - extra_cost);
4381 			if (t3 != 0)
4382 			  {
4383 			    t4 = expand_shift
4384 			      (RSHIFT_EXPR, compute_mode, t3,
4385 			       build_int_cst (NULL_TREE, post_shift),
4386 			       NULL_RTX, 1);
4387 			    quotient = expand_binop (compute_mode, xor_optab,
4388 						     t4, t1, tquotient, 0,
4389 						     OPTAB_WIDEN);
4390 			  }
4391 		      }
4392 		  }
4393 	      }
4394 	    else
4395 	      {
4396 		rtx nsign, t1, t2, t3, t4;
4397 		t1 = force_operand (gen_rtx_PLUS (compute_mode,
4398 						  op0, constm1_rtx), NULL_RTX);
4399 		t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4400 				   0, OPTAB_WIDEN);
4401 		nsign = expand_shift
4402 		  (RSHIFT_EXPR, compute_mode, t2,
4403 		   build_int_cst (NULL_TREE, size - 1),
4404 		   NULL_RTX, 0);
4405 		t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4406 				    NULL_RTX);
4407 		t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4408 				    NULL_RTX, 0);
4409 		if (t4)
4410 		  {
4411 		    rtx t5;
4412 		    t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4413 				      NULL_RTX, 0);
4414 		    quotient = force_operand (gen_rtx_PLUS (compute_mode,
4415 							    t4, t5),
4416 					      tquotient);
4417 		  }
4418 	      }
4419 	  }
4420 
4421 	if (quotient != 0)
4422 	  break;
4423 	delete_insns_since (last);
4424 
4425 	/* Try using an instruction that produces both the quotient and
4426 	   remainder, using truncation.  We can easily compensate the quotient
4427 	   or remainder to get floor rounding, once we have the remainder.
4428 	   Notice that we compute also the final remainder value here,
4429 	   and return the result right away.  */
4430 	if (target == 0 || GET_MODE (target) != compute_mode)
4431 	  target = gen_reg_rtx (compute_mode);
4432 
4433 	if (rem_flag)
4434 	  {
4435 	    remainder
4436 	      = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4437 	    quotient = gen_reg_rtx (compute_mode);
4438 	  }
4439 	else
4440 	  {
4441 	    quotient
4442 	      = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4443 	    remainder = gen_reg_rtx (compute_mode);
4444 	  }
4445 
4446 	if (expand_twoval_binop (sdivmod_optab, op0, op1,
4447 				 quotient, remainder, 0))
4448 	  {
4449 	    /* This could be computed with a branch-less sequence.
4450 	       Save that for later.  */
4451 	    rtx tem;
4452 	    rtx label = gen_label_rtx ();
4453 	    do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4454 	    tem = expand_binop (compute_mode, xor_optab, op0, op1,
4455 				NULL_RTX, 0, OPTAB_WIDEN);
4456 	    do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4457 	    expand_dec (quotient, const1_rtx);
4458 	    expand_inc (remainder, op1);
4459 	    emit_label (label);
4460 	    return gen_lowpart (mode, rem_flag ? remainder : quotient);
4461 	  }
4462 
4463 	/* No luck with division elimination or divmod.  Have to do it
4464 	   by conditionally adjusting op0 *and* the result.  */
4465 	{
4466 	  rtx label1, label2, label3, label4, label5;
4467 	  rtx adjusted_op0;
4468 	  rtx tem;
4469 
4470 	  quotient = gen_reg_rtx (compute_mode);
4471 	  adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4472 	  label1 = gen_label_rtx ();
4473 	  label2 = gen_label_rtx ();
4474 	  label3 = gen_label_rtx ();
4475 	  label4 = gen_label_rtx ();
4476 	  label5 = gen_label_rtx ();
4477 	  do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4478 	  do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4479 	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4480 			      quotient, 0, OPTAB_LIB_WIDEN);
4481 	  if (tem != quotient)
4482 	    emit_move_insn (quotient, tem);
4483 	  emit_jump_insn (gen_jump (label5));
4484 	  emit_barrier ();
4485 	  emit_label (label1);
4486 	  expand_inc (adjusted_op0, const1_rtx);
4487 	  emit_jump_insn (gen_jump (label4));
4488 	  emit_barrier ();
4489 	  emit_label (label2);
4490 	  do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4491 	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4492 			      quotient, 0, OPTAB_LIB_WIDEN);
4493 	  if (tem != quotient)
4494 	    emit_move_insn (quotient, tem);
4495 	  emit_jump_insn (gen_jump (label5));
4496 	  emit_barrier ();
4497 	  emit_label (label3);
4498 	  expand_dec (adjusted_op0, const1_rtx);
4499 	  emit_label (label4);
4500 	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4501 			      quotient, 0, OPTAB_LIB_WIDEN);
4502 	  if (tem != quotient)
4503 	    emit_move_insn (quotient, tem);
4504 	  expand_dec (quotient, const1_rtx);
4505 	  emit_label (label5);
4506 	}
4507 	break;
4508 
4509       case CEIL_DIV_EXPR:
4510       case CEIL_MOD_EXPR:
4511 	if (unsignedp)
4512 	  {
4513 	    if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4514 	      {
4515 		rtx t1, t2, t3;
4516 		unsigned HOST_WIDE_INT d = INTVAL (op1);
4517 		t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4518 				   build_int_cst (NULL_TREE, floor_log2 (d)),
4519 				   tquotient, 1);
4520 		t2 = expand_binop (compute_mode, and_optab, op0,
4521 				   GEN_INT (d - 1),
4522 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4523 		t3 = gen_reg_rtx (compute_mode);
4524 		t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4525 				      compute_mode, 1, 1);
4526 		if (t3 == 0)
4527 		  {
4528 		    rtx lab;
4529 		    lab = gen_label_rtx ();
4530 		    do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4531 		    expand_inc (t1, const1_rtx);
4532 		    emit_label (lab);
4533 		    quotient = t1;
4534 		  }
4535 		else
4536 		  quotient = force_operand (gen_rtx_PLUS (compute_mode,
4537 							  t1, t3),
4538 					    tquotient);
4539 		break;
4540 	      }
4541 
4542 	    /* Try using an instruction that produces both the quotient and
4543 	       remainder, using truncation.  We can easily compensate the
4544 	       quotient or remainder to get ceiling rounding, once we have the
4545 	       remainder.  Notice that we compute also the final remainder
4546 	       value here, and return the result right away.  */
4547 	    if (target == 0 || GET_MODE (target) != compute_mode)
4548 	      target = gen_reg_rtx (compute_mode);
4549 
4550 	    if (rem_flag)
4551 	      {
4552 		remainder = (REG_P (target)
4553 			     ? target : gen_reg_rtx (compute_mode));
4554 		quotient = gen_reg_rtx (compute_mode);
4555 	      }
4556 	    else
4557 	      {
4558 		quotient = (REG_P (target)
4559 			    ? target : gen_reg_rtx (compute_mode));
4560 		remainder = gen_reg_rtx (compute_mode);
4561 	      }
4562 
4563 	    if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4564 				     remainder, 1))
4565 	      {
4566 		/* This could be computed with a branch-less sequence.
4567 		   Save that for later.  */
4568 		rtx label = gen_label_rtx ();
4569 		do_cmp_and_jump (remainder, const0_rtx, EQ,
4570 				 compute_mode, label);
4571 		expand_inc (quotient, const1_rtx);
4572 		expand_dec (remainder, op1);
4573 		emit_label (label);
4574 		return gen_lowpart (mode, rem_flag ? remainder : quotient);
4575 	      }
4576 
4577 	    /* No luck with division elimination or divmod.  Have to do it
4578 	       by conditionally adjusting op0 *and* the result.  */
4579 	    {
4580 	      rtx label1, label2;
4581 	      rtx adjusted_op0, tem;
4582 
4583 	      quotient = gen_reg_rtx (compute_mode);
4584 	      adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4585 	      label1 = gen_label_rtx ();
4586 	      label2 = gen_label_rtx ();
4587 	      do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4588 			       compute_mode, label1);
4589 	      emit_move_insn  (quotient, const0_rtx);
4590 	      emit_jump_insn (gen_jump (label2));
4591 	      emit_barrier ();
4592 	      emit_label (label1);
4593 	      expand_dec (adjusted_op0, const1_rtx);
4594 	      tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4595 				  quotient, 1, OPTAB_LIB_WIDEN);
4596 	      if (tem != quotient)
4597 		emit_move_insn (quotient, tem);
4598 	      expand_inc (quotient, const1_rtx);
4599 	      emit_label (label2);
4600 	    }
4601 	  }
4602 	else /* signed */
4603 	  {
4604 	    if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4605 		&& INTVAL (op1) >= 0)
4606 	      {
4607 		/* This is extremely similar to the code for the unsigned case
4608 		   above.  For 2.7 we should merge these variants, but for
4609 		   2.6.1 I don't want to touch the code for unsigned since that
4610 		   get used in C.  The signed case will only be used by other
4611 		   languages (Ada).  */
4612 
4613 		rtx t1, t2, t3;
4614 		unsigned HOST_WIDE_INT d = INTVAL (op1);
4615 		t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4616 				   build_int_cst (NULL_TREE, floor_log2 (d)),
4617 				   tquotient, 0);
4618 		t2 = expand_binop (compute_mode, and_optab, op0,
4619 				   GEN_INT (d - 1),
4620 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4621 		t3 = gen_reg_rtx (compute_mode);
4622 		t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4623 				      compute_mode, 1, 1);
4624 		if (t3 == 0)
4625 		  {
4626 		    rtx lab;
4627 		    lab = gen_label_rtx ();
4628 		    do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4629 		    expand_inc (t1, const1_rtx);
4630 		    emit_label (lab);
4631 		    quotient = t1;
4632 		  }
4633 		else
4634 		  quotient = force_operand (gen_rtx_PLUS (compute_mode,
4635 							  t1, t3),
4636 					    tquotient);
4637 		break;
4638 	      }
4639 
4640 	    /* Try using an instruction that produces both the quotient and
4641 	       remainder, using truncation.  We can easily compensate the
4642 	       quotient or remainder to get ceiling rounding, once we have the
4643 	       remainder.  Notice that we compute also the final remainder
4644 	       value here, and return the result right away.  */
4645 	    if (target == 0 || GET_MODE (target) != compute_mode)
4646 	      target = gen_reg_rtx (compute_mode);
4647 	    if (rem_flag)
4648 	      {
4649 		remainder= (REG_P (target)
4650 			    ? target : gen_reg_rtx (compute_mode));
4651 		quotient = gen_reg_rtx (compute_mode);
4652 	      }
4653 	    else
4654 	      {
4655 		quotient = (REG_P (target)
4656 			    ? target : gen_reg_rtx (compute_mode));
4657 		remainder = gen_reg_rtx (compute_mode);
4658 	      }
4659 
4660 	    if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4661 				     remainder, 0))
4662 	      {
4663 		/* This could be computed with a branch-less sequence.
4664 		   Save that for later.  */
4665 		rtx tem;
4666 		rtx label = gen_label_rtx ();
4667 		do_cmp_and_jump (remainder, const0_rtx, EQ,
4668 				 compute_mode, label);
4669 		tem = expand_binop (compute_mode, xor_optab, op0, op1,
4670 				    NULL_RTX, 0, OPTAB_WIDEN);
4671 		do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4672 		expand_inc (quotient, const1_rtx);
4673 		expand_dec (remainder, op1);
4674 		emit_label (label);
4675 		return gen_lowpart (mode, rem_flag ? remainder : quotient);
4676 	      }
4677 
4678 	    /* No luck with division elimination or divmod.  Have to do it
4679 	       by conditionally adjusting op0 *and* the result.  */
4680 	    {
4681 	      rtx label1, label2, label3, label4, label5;
4682 	      rtx adjusted_op0;
4683 	      rtx tem;
4684 
4685 	      quotient = gen_reg_rtx (compute_mode);
4686 	      adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4687 	      label1 = gen_label_rtx ();
4688 	      label2 = gen_label_rtx ();
4689 	      label3 = gen_label_rtx ();
4690 	      label4 = gen_label_rtx ();
4691 	      label5 = gen_label_rtx ();
4692 	      do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4693 	      do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4694 			       compute_mode, label1);
4695 	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4696 				  quotient, 0, OPTAB_LIB_WIDEN);
4697 	      if (tem != quotient)
4698 		emit_move_insn (quotient, tem);
4699 	      emit_jump_insn (gen_jump (label5));
4700 	      emit_barrier ();
4701 	      emit_label (label1);
4702 	      expand_dec (adjusted_op0, const1_rtx);
4703 	      emit_jump_insn (gen_jump (label4));
4704 	      emit_barrier ();
4705 	      emit_label (label2);
4706 	      do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4707 			       compute_mode, label3);
4708 	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4709 				  quotient, 0, OPTAB_LIB_WIDEN);
4710 	      if (tem != quotient)
4711 		emit_move_insn (quotient, tem);
4712 	      emit_jump_insn (gen_jump (label5));
4713 	      emit_barrier ();
4714 	      emit_label (label3);
4715 	      expand_inc (adjusted_op0, const1_rtx);
4716 	      emit_label (label4);
4717 	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4718 				  quotient, 0, OPTAB_LIB_WIDEN);
4719 	      if (tem != quotient)
4720 		emit_move_insn (quotient, tem);
4721 	      expand_inc (quotient, const1_rtx);
4722 	      emit_label (label5);
4723 	    }
4724 	  }
4725 	break;
4726 
4727       case EXACT_DIV_EXPR:
4728 	if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4729 	  {
4730 	    HOST_WIDE_INT d = INTVAL (op1);
4731 	    unsigned HOST_WIDE_INT ml;
4732 	    int pre_shift;
4733 	    rtx t1;
4734 
4735 	    pre_shift = floor_log2 (d & -d);
4736 	    ml = invert_mod2n (d >> pre_shift, size);
4737 	    t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4738 			       build_int_cst (NULL_TREE, pre_shift),
4739 			       NULL_RTX, unsignedp);
4740 	    quotient = expand_mult (compute_mode, t1,
4741 				    gen_int_mode (ml, compute_mode),
4742 				    NULL_RTX, 1);
4743 
4744 	    insn = get_last_insn ();
4745 	    set_unique_reg_note (insn,
4746 				 REG_EQUAL,
4747 				 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4748 						 compute_mode,
4749 						 op0, op1));
4750 	  }
4751 	break;
4752 
4753       case ROUND_DIV_EXPR:
4754       case ROUND_MOD_EXPR:
4755 	if (unsignedp)
4756 	  {
4757 	    rtx tem;
4758 	    rtx label;
4759 	    label = gen_label_rtx ();
4760 	    quotient = gen_reg_rtx (compute_mode);
4761 	    remainder = gen_reg_rtx (compute_mode);
4762 	    if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4763 	      {
4764 		rtx tem;
4765 		quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4766 					 quotient, 1, OPTAB_LIB_WIDEN);
4767 		tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4768 		remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4769 					  remainder, 1, OPTAB_LIB_WIDEN);
4770 	      }
4771 	    tem = plus_constant (op1, -1);
4772 	    tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4773 				build_int_cst (NULL_TREE, 1),
4774 				NULL_RTX, 1);
4775 	    do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4776 	    expand_inc (quotient, const1_rtx);
4777 	    expand_dec (remainder, op1);
4778 	    emit_label (label);
4779 	  }
4780 	else
4781 	  {
4782 	    rtx abs_rem, abs_op1, tem, mask;
4783 	    rtx label;
4784 	    label = gen_label_rtx ();
4785 	    quotient = gen_reg_rtx (compute_mode);
4786 	    remainder = gen_reg_rtx (compute_mode);
4787 	    if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4788 	      {
4789 		rtx tem;
4790 		quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4791 					 quotient, 0, OPTAB_LIB_WIDEN);
4792 		tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4793 		remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4794 					  remainder, 0, OPTAB_LIB_WIDEN);
4795 	      }
4796 	    abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4797 	    abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4798 	    tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4799 				build_int_cst (NULL_TREE, 1),
4800 				NULL_RTX, 1);
4801 	    do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4802 	    tem = expand_binop (compute_mode, xor_optab, op0, op1,
4803 				NULL_RTX, 0, OPTAB_WIDEN);
4804 	    mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4805 				 build_int_cst (NULL_TREE, size - 1),
4806 				 NULL_RTX, 0);
4807 	    tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4808 				NULL_RTX, 0, OPTAB_WIDEN);
4809 	    tem = expand_binop (compute_mode, sub_optab, tem, mask,
4810 				NULL_RTX, 0, OPTAB_WIDEN);
4811 	    expand_inc (quotient, tem);
4812 	    tem = expand_binop (compute_mode, xor_optab, mask, op1,
4813 				NULL_RTX, 0, OPTAB_WIDEN);
4814 	    tem = expand_binop (compute_mode, sub_optab, tem, mask,
4815 				NULL_RTX, 0, OPTAB_WIDEN);
4816 	    expand_dec (remainder, tem);
4817 	    emit_label (label);
4818 	  }
4819 	return gen_lowpart (mode, rem_flag ? remainder : quotient);
4820 
4821       default:
4822 	gcc_unreachable ();
4823       }
4824 
4825   if (quotient == 0)
4826     {
4827       if (target && GET_MODE (target) != compute_mode)
4828 	target = 0;
4829 
4830       if (rem_flag)
4831 	{
4832 	  /* Try to produce the remainder without producing the quotient.
4833 	     If we seem to have a divmod pattern that does not require widening,
4834 	     don't try widening here.  We should really have a WIDEN argument
4835 	     to expand_twoval_binop, since what we'd really like to do here is
4836 	     1) try a mod insn in compute_mode
4837 	     2) try a divmod insn in compute_mode
4838 	     3) try a div insn in compute_mode and multiply-subtract to get
4839 	        remainder
4840 	     4) try the same things with widening allowed.  */
4841 	  remainder
4842 	    = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4843 				 op0, op1, target,
4844 				 unsignedp,
4845 				 ((optab2->handlers[compute_mode].insn_code
4846 				   != CODE_FOR_nothing)
4847 				  ? OPTAB_DIRECT : OPTAB_WIDEN));
4848 	  if (remainder == 0)
4849 	    {
4850 	      /* No luck there.  Can we do remainder and divide at once
4851 		 without a library call?  */
4852 	      remainder = gen_reg_rtx (compute_mode);
4853 	      if (! expand_twoval_binop ((unsignedp
4854 					  ? udivmod_optab
4855 					  : sdivmod_optab),
4856 					 op0, op1,
4857 					 NULL_RTX, remainder, unsignedp))
4858 		remainder = 0;
4859 	    }
4860 
4861 	  if (remainder)
4862 	    return gen_lowpart (mode, remainder);
4863 	}
4864 
4865       /* Produce the quotient.  Try a quotient insn, but not a library call.
4866 	 If we have a divmod in this mode, use it in preference to widening
4867 	 the div (for this test we assume it will not fail). Note that optab2
4868 	 is set to the one of the two optabs that the call below will use.  */
4869       quotient
4870 	= sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4871 			     op0, op1, rem_flag ? NULL_RTX : target,
4872 			     unsignedp,
4873 			     ((optab2->handlers[compute_mode].insn_code
4874 			       != CODE_FOR_nothing)
4875 			      ? OPTAB_DIRECT : OPTAB_WIDEN));
4876 
4877       if (quotient == 0)
4878 	{
4879 	  /* No luck there.  Try a quotient-and-remainder insn,
4880 	     keeping the quotient alone.  */
4881 	  quotient = gen_reg_rtx (compute_mode);
4882 	  if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4883 				     op0, op1,
4884 				     quotient, NULL_RTX, unsignedp))
4885 	    {
4886 	      quotient = 0;
4887 	      if (! rem_flag)
4888 		/* Still no luck.  If we are not computing the remainder,
4889 		   use a library call for the quotient.  */
4890 		quotient = sign_expand_binop (compute_mode,
4891 					      udiv_optab, sdiv_optab,
4892 					      op0, op1, target,
4893 					      unsignedp, OPTAB_LIB_WIDEN);
4894 	    }
4895 	}
4896     }
4897 
4898   if (rem_flag)
4899     {
4900       if (target && GET_MODE (target) != compute_mode)
4901 	target = 0;
4902 
4903       if (quotient == 0)
4904 	{
4905 	  /* No divide instruction either.  Use library for remainder.  */
4906 	  remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4907 					 op0, op1, target,
4908 					 unsignedp, OPTAB_LIB_WIDEN);
4909 	  /* No remainder function.  Try a quotient-and-remainder
4910 	     function, keeping the remainder.  */
4911 	  if (!remainder)
4912 	    {
4913 	      remainder = gen_reg_rtx (compute_mode);
4914 	      if (!expand_twoval_binop_libfunc
4915 		  (unsignedp ? udivmod_optab : sdivmod_optab,
4916 		   op0, op1,
4917 		   NULL_RTX, remainder,
4918 		   unsignedp ? UMOD : MOD))
4919 		remainder = NULL_RTX;
4920 	    }
4921 	}
4922       else
4923 	{
4924 	  /* We divided.  Now finish doing X - Y * (X / Y).  */
4925 	  remainder = expand_mult (compute_mode, quotient, op1,
4926 				   NULL_RTX, unsignedp);
4927 	  remainder = expand_binop (compute_mode, sub_optab, op0,
4928 				    remainder, target, unsignedp,
4929 				    OPTAB_LIB_WIDEN);
4930 	}
4931     }
4932 
4933   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4934 }
4935 
4936 /* Return a tree node with data type TYPE, describing the value of X.
4937    Usually this is an VAR_DECL, if there is no obvious better choice.
4938    X may be an expression, however we only support those expressions
4939    generated by loop.c.  */
4940 
4941 tree
make_tree(tree type,rtx x)4942 make_tree (tree type, rtx x)
4943 {
4944   tree t;
4945 
4946   switch (GET_CODE (x))
4947     {
4948     case CONST_INT:
4949       {
4950 	HOST_WIDE_INT hi = 0;
4951 
4952 	if (INTVAL (x) < 0
4953 	    && !(TYPE_UNSIGNED (type)
4954 		 && (GET_MODE_BITSIZE (TYPE_MODE (type))
4955 		     < HOST_BITS_PER_WIDE_INT)))
4956 	  hi = -1;
4957 
4958 	t = build_int_cst_wide (type, INTVAL (x), hi);
4959 
4960 	return t;
4961       }
4962 
4963     case CONST_DOUBLE:
4964       if (GET_MODE (x) == VOIDmode)
4965 	t = build_int_cst_wide (type,
4966 				CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4967       else
4968 	{
4969 	  REAL_VALUE_TYPE d;
4970 
4971 	  REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4972 	  t = build_real (type, d);
4973 	}
4974 
4975       return t;
4976 
4977     case CONST_VECTOR:
4978       {
4979 	int units = CONST_VECTOR_NUNITS (x);
4980 	tree itype = TREE_TYPE (type);
4981 	tree t = NULL_TREE;
4982 	int i;
4983 
4984 
4985 	/* Build a tree with vector elements.  */
4986 	for (i = units - 1; i >= 0; --i)
4987 	  {
4988 	    rtx elt = CONST_VECTOR_ELT (x, i);
4989 	    t = tree_cons (NULL_TREE, make_tree (itype, elt), t);
4990 	  }
4991 
4992 	return build_vector (type, t);
4993       }
4994 
4995     case PLUS:
4996       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4997 			  make_tree (type, XEXP (x, 1)));
4998 
4999     case MINUS:
5000       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5001 			  make_tree (type, XEXP (x, 1)));
5002 
5003     case NEG:
5004       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5005 
5006     case MULT:
5007       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5008 			  make_tree (type, XEXP (x, 1)));
5009 
5010     case ASHIFT:
5011       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5012 			  make_tree (type, XEXP (x, 1)));
5013 
5014     case LSHIFTRT:
5015       t = lang_hooks.types.unsigned_type (type);
5016       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5017 			    		 make_tree (t, XEXP (x, 0)),
5018 				    	 make_tree (type, XEXP (x, 1))));
5019 
5020     case ASHIFTRT:
5021       t = lang_hooks.types.signed_type (type);
5022       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5023 					 make_tree (t, XEXP (x, 0)),
5024 				    	 make_tree (type, XEXP (x, 1))));
5025 
5026     case DIV:
5027       if (TREE_CODE (type) != REAL_TYPE)
5028 	t = lang_hooks.types.signed_type (type);
5029       else
5030 	t = type;
5031 
5032       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5033 				    	 make_tree (t, XEXP (x, 0)),
5034 				    	 make_tree (t, XEXP (x, 1))));
5035     case UDIV:
5036       t = lang_hooks.types.unsigned_type (type);
5037       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5038 				    	 make_tree (t, XEXP (x, 0)),
5039 				    	 make_tree (t, XEXP (x, 1))));
5040 
5041     case SIGN_EXTEND:
5042     case ZERO_EXTEND:
5043       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5044 					  GET_CODE (x) == ZERO_EXTEND);
5045       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5046 
5047     case CONST:
5048       return make_tree (type, XEXP (x, 0));
5049 
5050     case SYMBOL_REF:
5051       t = SYMBOL_REF_DECL (x);
5052       if (t)
5053 	return fold_convert (type, build_fold_addr_expr (t));
5054       /* else fall through.  */
5055 
5056     default:
5057       t = build_decl (VAR_DECL, NULL_TREE, type);
5058 
5059       /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
5060 	 ptr_mode.  So convert.  */
5061       if (POINTER_TYPE_P (type))
5062 	x = convert_memory_address (TYPE_MODE (type), x);
5063 
5064       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5065 	 want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5066       t->decl_with_rtl.rtl = x;
5067 
5068       return t;
5069     }
5070 }
5071 
5072 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5073    and returning TARGET.
5074 
5075    If TARGET is 0, a pseudo-register or constant is returned.  */
5076 
5077 rtx
expand_and(enum machine_mode mode,rtx op0,rtx op1,rtx target)5078 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5079 {
5080   rtx tem = 0;
5081 
5082   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5083     tem = simplify_binary_operation (AND, mode, op0, op1);
5084   if (tem == 0)
5085     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5086 
5087   if (target == 0)
5088     target = tem;
5089   else if (tem != target)
5090     emit_move_insn (target, tem);
5091   return target;
5092 }
5093 
5094 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5095    and storing in TARGET.  Normally return TARGET.
5096    Return 0 if that cannot be done.
5097 
5098    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5099    it is VOIDmode, they cannot both be CONST_INT.
5100 
5101    UNSIGNEDP is for the case where we have to widen the operands
5102    to perform the operation.  It says to use zero-extension.
5103 
5104    NORMALIZEP is 1 if we should convert the result to be either zero
5105    or one.  Normalize is -1 if we should convert the result to be
5106    either zero or -1.  If NORMALIZEP is zero, the result will be left
5107    "raw" out of the scc insn.  */
5108 
5109 rtx
emit_store_flag(rtx target,enum rtx_code code,rtx op0,rtx op1,enum machine_mode mode,int unsignedp,int normalizep)5110 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5111 		 enum machine_mode mode, int unsignedp, int normalizep)
5112 {
5113   rtx subtarget;
5114   enum insn_code icode;
5115   enum machine_mode compare_mode;
5116   enum machine_mode target_mode = GET_MODE (target);
5117   rtx tem;
5118   rtx last = get_last_insn ();
5119   rtx pattern, comparison;
5120 
5121   if (unsignedp)
5122     code = unsigned_condition (code);
5123 
5124   /* If one operand is constant, make it the second one.  Only do this
5125      if the other operand is not constant as well.  */
5126 
5127   if (swap_commutative_operands_p (op0, op1))
5128     {
5129       tem = op0;
5130       op0 = op1;
5131       op1 = tem;
5132       code = swap_condition (code);
5133     }
5134 
5135   if (mode == VOIDmode)
5136     mode = GET_MODE (op0);
5137 
5138   /* For some comparisons with 1 and -1, we can convert this to
5139      comparisons with zero.  This will often produce more opportunities for
5140      store-flag insns.  */
5141 
5142   switch (code)
5143     {
5144     case LT:
5145       if (op1 == const1_rtx)
5146 	op1 = const0_rtx, code = LE;
5147       break;
5148     case LE:
5149       if (op1 == constm1_rtx)
5150 	op1 = const0_rtx, code = LT;
5151       break;
5152     case GE:
5153       if (op1 == const1_rtx)
5154 	op1 = const0_rtx, code = GT;
5155       break;
5156     case GT:
5157       if (op1 == constm1_rtx)
5158 	op1 = const0_rtx, code = GE;
5159       break;
5160     case GEU:
5161       if (op1 == const1_rtx)
5162 	op1 = const0_rtx, code = NE;
5163       break;
5164     case LTU:
5165       if (op1 == const1_rtx)
5166 	op1 = const0_rtx, code = EQ;
5167       break;
5168     default:
5169       break;
5170     }
5171 
5172   /* If we are comparing a double-word integer with zero or -1, we can
5173      convert the comparison into one involving a single word.  */
5174   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5175       && GET_MODE_CLASS (mode) == MODE_INT
5176       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5177     {
5178       if ((code == EQ || code == NE)
5179 	  && (op1 == const0_rtx || op1 == constm1_rtx))
5180 	{
5181 	  rtx op00, op01, op0both;
5182 
5183 	  /* Do a logical OR or AND of the two words and compare the result.  */
5184 	  op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5185 	  op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5186 	  op0both = expand_binop (word_mode,
5187 				  op1 == const0_rtx ? ior_optab : and_optab,
5188 				  op00, op01, NULL_RTX, unsignedp, OPTAB_DIRECT);
5189 
5190 	  if (op0both != 0)
5191 	    return emit_store_flag (target, code, op0both, op1, word_mode,
5192 				    unsignedp, normalizep);
5193 	}
5194       else if ((code == LT || code == GE) && op1 == const0_rtx)
5195 	{
5196 	  rtx op0h;
5197 
5198 	  /* If testing the sign bit, can just test on high word.  */
5199 	  op0h = simplify_gen_subreg (word_mode, op0, mode,
5200 				      subreg_highpart_offset (word_mode, mode));
5201 	  return emit_store_flag (target, code, op0h, op1, word_mode,
5202 				  unsignedp, normalizep);
5203 	}
5204     }
5205 
5206   /* From now on, we won't change CODE, so set ICODE now.  */
5207   icode = setcc_gen_code[(int) code];
5208 
5209   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5210      complement of A (for GE) and shifting the sign bit to the low bit.  */
5211   if (op1 == const0_rtx && (code == LT || code == GE)
5212       && GET_MODE_CLASS (mode) == MODE_INT
5213       && (normalizep || STORE_FLAG_VALUE == 1
5214 	  || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5215 	      && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5216 		  == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
5217     {
5218       subtarget = target;
5219 
5220       /* If the result is to be wider than OP0, it is best to convert it
5221 	 first.  If it is to be narrower, it is *incorrect* to convert it
5222 	 first.  */
5223       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5224 	{
5225 	  op0 = convert_modes (target_mode, mode, op0, 0);
5226 	  mode = target_mode;
5227 	}
5228 
5229       if (target_mode != mode)
5230 	subtarget = 0;
5231 
5232       if (code == GE)
5233 	op0 = expand_unop (mode, one_cmpl_optab, op0,
5234 			   ((STORE_FLAG_VALUE == 1 || normalizep)
5235 			    ? 0 : subtarget), 0);
5236 
5237       if (STORE_FLAG_VALUE == 1 || normalizep)
5238 	/* If we are supposed to produce a 0/1 value, we want to do
5239 	   a logical shift from the sign bit to the low-order bit; for
5240 	   a -1/0 value, we do an arithmetic shift.  */
5241 	op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5242 			    size_int (GET_MODE_BITSIZE (mode) - 1),
5243 			    subtarget, normalizep != -1);
5244 
5245       if (mode != target_mode)
5246 	op0 = convert_modes (target_mode, mode, op0, 0);
5247 
5248       return op0;
5249     }
5250 
5251   if (icode != CODE_FOR_nothing)
5252     {
5253       insn_operand_predicate_fn pred;
5254 
5255       /* We think we may be able to do this with a scc insn.  Emit the
5256 	 comparison and then the scc insn.  */
5257 
5258       do_pending_stack_adjust ();
5259       last = get_last_insn ();
5260 
5261       comparison
5262 	= compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
5263       if (CONSTANT_P (comparison))
5264 	{
5265 	  switch (GET_CODE (comparison))
5266 	    {
5267 	    case CONST_INT:
5268 	      if (comparison == const0_rtx)
5269 		return const0_rtx;
5270 	      break;
5271 
5272 #ifdef FLOAT_STORE_FLAG_VALUE
5273 	    case CONST_DOUBLE:
5274 	      if (comparison == CONST0_RTX (GET_MODE (comparison)))
5275 		return const0_rtx;
5276 	      break;
5277 #endif
5278 	    default:
5279 	      gcc_unreachable ();
5280 	    }
5281 
5282 	  if (normalizep == 1)
5283 	    return const1_rtx;
5284 	  if (normalizep == -1)
5285 	    return constm1_rtx;
5286 	  return const_true_rtx;
5287 	}
5288 
5289       /* The code of COMPARISON may not match CODE if compare_from_rtx
5290 	 decided to swap its operands and reverse the original code.
5291 
5292 	 We know that compare_from_rtx returns either a CONST_INT or
5293 	 a new comparison code, so it is safe to just extract the
5294 	 code from COMPARISON.  */
5295       code = GET_CODE (comparison);
5296 
5297       /* Get a reference to the target in the proper mode for this insn.  */
5298       compare_mode = insn_data[(int) icode].operand[0].mode;
5299       subtarget = target;
5300       pred = insn_data[(int) icode].operand[0].predicate;
5301       if (optimize || ! (*pred) (subtarget, compare_mode))
5302 	subtarget = gen_reg_rtx (compare_mode);
5303 
5304       pattern = GEN_FCN (icode) (subtarget);
5305       if (pattern)
5306 	{
5307 	  emit_insn (pattern);
5308 
5309 	  /* If we are converting to a wider mode, first convert to
5310 	     TARGET_MODE, then normalize.  This produces better combining
5311 	     opportunities on machines that have a SIGN_EXTRACT when we are
5312 	     testing a single bit.  This mostly benefits the 68k.
5313 
5314 	     If STORE_FLAG_VALUE does not have the sign bit set when
5315 	     interpreted in COMPARE_MODE, we can do this conversion as
5316 	     unsigned, which is usually more efficient.  */
5317 	  if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
5318 	    {
5319 	      convert_move (target, subtarget,
5320 			    (GET_MODE_BITSIZE (compare_mode)
5321 			     <= HOST_BITS_PER_WIDE_INT)
5322 			    && 0 == (STORE_FLAG_VALUE
5323 				     & ((HOST_WIDE_INT) 1
5324 					<< (GET_MODE_BITSIZE (compare_mode) -1))));
5325 	      op0 = target;
5326 	      compare_mode = target_mode;
5327 	    }
5328 	  else
5329 	    op0 = subtarget;
5330 
5331 	  /* If we want to keep subexpressions around, don't reuse our
5332 	     last target.  */
5333 
5334 	  if (optimize)
5335 	    subtarget = 0;
5336 
5337 	  /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
5338 	     we don't have to do anything.  */
5339 	  if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5340 	    ;
5341 	  /* STORE_FLAG_VALUE might be the most negative number, so write
5342 	     the comparison this way to avoid a compiler-time warning.  */
5343 	  else if (- normalizep == STORE_FLAG_VALUE)
5344 	    op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
5345 
5346 	  /* We don't want to use STORE_FLAG_VALUE < 0 below since this
5347 	     makes it hard to use a value of just the sign bit due to
5348 	     ANSI integer constant typing rules.  */
5349 	  else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
5350 		   && (STORE_FLAG_VALUE
5351 		       & ((HOST_WIDE_INT) 1
5352 			  << (GET_MODE_BITSIZE (compare_mode) - 1))))
5353 	    op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
5354 				size_int (GET_MODE_BITSIZE (compare_mode) - 1),
5355 				subtarget, normalizep == 1);
5356 	  else
5357 	    {
5358 	      gcc_assert (STORE_FLAG_VALUE & 1);
5359 
5360 	      op0 = expand_and (compare_mode, op0, const1_rtx, subtarget);
5361 	      if (normalizep == -1)
5362 		op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
5363 	    }
5364 
5365 	  /* If we were converting to a smaller mode, do the
5366 	     conversion now.  */
5367 	  if (target_mode != compare_mode)
5368 	    {
5369 	      convert_move (target, op0, 0);
5370 	      return target;
5371 	    }
5372 	  else
5373 	    return op0;
5374 	}
5375     }
5376 
5377   delete_insns_since (last);
5378 
5379   /* If optimizing, use different pseudo registers for each insn, instead
5380      of reusing the same pseudo.  This leads to better CSE, but slows
5381      down the compiler, since there are more pseudos */
5382   subtarget = (!optimize
5383 	       && (target_mode == mode)) ? target : NULL_RTX;
5384 
5385   /* If we reached here, we can't do this with a scc insn.  However, there
5386      are some comparisons that can be done directly.  For example, if
5387      this is an equality comparison of integers, we can try to exclusive-or
5388      (or subtract) the two operands and use a recursive call to try the
5389      comparison with zero.  Don't do any of these cases if branches are
5390      very cheap.  */
5391 
5392   if (BRANCH_COST > 0
5393       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
5394       && op1 != const0_rtx)
5395     {
5396       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5397 			  OPTAB_WIDEN);
5398 
5399       if (tem == 0)
5400 	tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5401 			    OPTAB_WIDEN);
5402       if (tem != 0)
5403 	tem = emit_store_flag (target, code, tem, const0_rtx,
5404 			       mode, unsignedp, normalizep);
5405       if (tem == 0)
5406 	delete_insns_since (last);
5407       return tem;
5408     }
5409 
5410   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5411      the constant zero.  Reject all other comparisons at this point.  Only
5412      do LE and GT if branches are expensive since they are expensive on
5413      2-operand machines.  */
5414 
5415   if (BRANCH_COST == 0
5416       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
5417       || (code != EQ && code != NE
5418 	  && (BRANCH_COST <= 1 || (code != LE && code != GT))))
5419     return 0;
5420 
5421   /* See what we need to return.  We can only return a 1, -1, or the
5422      sign bit.  */
5423 
5424   if (normalizep == 0)
5425     {
5426       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5427 	normalizep = STORE_FLAG_VALUE;
5428 
5429       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5430 	       && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5431 		   == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5432 	;
5433       else
5434 	return 0;
5435     }
5436 
5437   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5438      do the necessary operation below.  */
5439 
5440   tem = 0;
5441 
5442   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5443      the sign bit set.  */
5444 
5445   if (code == LE)
5446     {
5447       /* This is destructive, so SUBTARGET can't be OP0.  */
5448       if (rtx_equal_p (subtarget, op0))
5449 	subtarget = 0;
5450 
5451       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5452 			  OPTAB_WIDEN);
5453       if (tem)
5454 	tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5455 			    OPTAB_WIDEN);
5456     }
5457 
5458   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5459      number of bits in the mode of OP0, minus one.  */
5460 
5461   if (code == GT)
5462     {
5463       if (rtx_equal_p (subtarget, op0))
5464 	subtarget = 0;
5465 
5466       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5467 			  size_int (GET_MODE_BITSIZE (mode) - 1),
5468 			  subtarget, 0);
5469       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5470 			  OPTAB_WIDEN);
5471     }
5472 
5473   if (code == EQ || code == NE)
5474     {
5475       /* For EQ or NE, one way to do the comparison is to apply an operation
5476 	 that converts the operand into a positive number if it is nonzero
5477 	 or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5478 	 for NE we negate.  This puts the result in the sign bit.  Then we
5479 	 normalize with a shift, if needed.
5480 
5481 	 Two operations that can do the above actions are ABS and FFS, so try
5482 	 them.  If that doesn't work, and MODE is smaller than a full word,
5483 	 we can use zero-extension to the wider mode (an unsigned conversion)
5484 	 as the operation.  */
5485 
5486       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5487 	 that is compensated by the subsequent overflow when subtracting
5488 	 one / negating.  */
5489 
5490       if (abs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5491 	tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5492       else if (ffs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5493 	tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5494       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5495 	{
5496 	  tem = convert_modes (word_mode, mode, op0, 1);
5497 	  mode = word_mode;
5498 	}
5499 
5500       if (tem != 0)
5501 	{
5502 	  if (code == EQ)
5503 	    tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5504 				0, OPTAB_WIDEN);
5505 	  else
5506 	    tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5507 	}
5508 
5509       /* If we couldn't do it that way, for NE we can "or" the two's complement
5510 	 of the value with itself.  For EQ, we take the one's complement of
5511 	 that "or", which is an extra insn, so we only handle EQ if branches
5512 	 are expensive.  */
5513 
5514       if (tem == 0 && (code == NE || BRANCH_COST > 1))
5515 	{
5516 	  if (rtx_equal_p (subtarget, op0))
5517 	    subtarget = 0;
5518 
5519 	  tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5520 	  tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5521 			      OPTAB_WIDEN);
5522 
5523 	  if (tem && code == EQ)
5524 	    tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5525 	}
5526     }
5527 
5528   if (tem && normalizep)
5529     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5530 			size_int (GET_MODE_BITSIZE (mode) - 1),
5531 			subtarget, normalizep == 1);
5532 
5533   if (tem)
5534     {
5535       if (GET_MODE (tem) != target_mode)
5536 	{
5537 	  convert_move (target, tem, 0);
5538 	  tem = target;
5539 	}
5540       else if (!subtarget)
5541 	{
5542 	  emit_move_insn (target, tem);
5543 	  tem = target;
5544 	}
5545     }
5546   else
5547     delete_insns_since (last);
5548 
5549   return tem;
5550 }
5551 
5552 /* Like emit_store_flag, but always succeeds.  */
5553 
5554 rtx
emit_store_flag_force(rtx target,enum rtx_code code,rtx op0,rtx op1,enum machine_mode mode,int unsignedp,int normalizep)5555 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5556 		       enum machine_mode mode, int unsignedp, int normalizep)
5557 {
5558   rtx tem, label;
5559 
5560   /* First see if emit_store_flag can do the job.  */
5561   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5562   if (tem != 0)
5563     return tem;
5564 
5565   if (normalizep == 0)
5566     normalizep = 1;
5567 
5568   /* If this failed, we have to do this with set/compare/jump/set code.  */
5569 
5570   if (!REG_P (target)
5571       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5572     target = gen_reg_rtx (GET_MODE (target));
5573 
5574   emit_move_insn (target, const1_rtx);
5575   label = gen_label_rtx ();
5576   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5577 			   NULL_RTX, label);
5578 
5579   emit_move_insn (target, const0_rtx);
5580   emit_label (label);
5581 
5582   return target;
5583 }
5584 
5585 /* Perform possibly multi-word comparison and conditional jump to LABEL
5586    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5587    now a thin wrapper around do_compare_rtx_and_jump.  */
5588 
5589 static void
do_cmp_and_jump(rtx arg1,rtx arg2,enum rtx_code op,enum machine_mode mode,rtx label)5590 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5591 		 rtx label)
5592 {
5593   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5594   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5595 			   NULL_RTX, NULL_RTX, label);
5596 }
5597