1/* Assembly functions for the Xtensa version of libgcc1.
2   Copyright (C) 2001, 2002, 2003, 2005, 2006 Free Software Foundation, Inc.
3   Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 2, or (at your option) any later
10version.
11
12In addition to the permissions in the GNU General Public License, the
13Free Software Foundation gives you unlimited permission to link the
14compiled version of this file into combinations with other programs,
15and to distribute those combinations without any restriction coming
16from the use of this file.  (The General Public License restrictions
17do apply in other respects; for example, they cover modification of
18the file, and distribution when not linked into a combine
19executable.)
20
21GCC is distributed in the hope that it will be useful, but WITHOUT ANY
22WARRANTY; without even the implied warranty of MERCHANTABILITY or
23FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
24for more details.
25
26You should have received a copy of the GNU General Public License
27along with GCC; see the file COPYING.  If not, write to the Free
28Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
2902110-1301, USA.  */
30
31#include "xtensa-config.h"
32
33# Define macros for the ABS and ADDX* instructions to handle cases
34# where they are not included in the Xtensa processor configuration.
35
36	.macro	do_abs dst, src, tmp
37#if XCHAL_HAVE_ABS
38	abs	\dst, \src
39#else
40	neg	\tmp, \src
41	movgez	\tmp, \src, \src
42	mov	\dst, \tmp
43#endif
44	.endm
45
46	.macro	do_addx2 dst, as, at, tmp
47#if XCHAL_HAVE_ADDX
48	addx2	\dst, \as, \at
49#else
50	slli	\tmp, \as, 1
51	add	\dst, \tmp, \at
52#endif
53	.endm
54
55	.macro	do_addx4 dst, as, at, tmp
56#if XCHAL_HAVE_ADDX
57	addx4	\dst, \as, \at
58#else
59	slli	\tmp, \as, 2
60	add	\dst, \tmp, \at
61#endif
62	.endm
63
64	.macro	do_addx8 dst, as, at, tmp
65#if XCHAL_HAVE_ADDX
66	addx8	\dst, \as, \at
67#else
68	slli	\tmp, \as, 3
69	add	\dst, \tmp, \at
70#endif
71	.endm
72
73# Define macros for leaf function entry and return, supporting either the
74# standard register windowed ABI or the non-windowed call0 ABI.  These
75# macros do not allocate any extra stack space, so they only work for
76# leaf functions that do not need to spill anything to the stack.
77
78	.macro leaf_entry reg, size
79#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
80	entry \reg, \size
81#else
82	/* do nothing */
83#endif
84	.endm
85
86	.macro leaf_return
87#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
88	retw
89#else
90	ret
91#endif
92	.endm
93
94
95#ifdef L_mulsi3
96	.align	4
97	.global	__mulsi3
98	.type	__mulsi3,@function
99__mulsi3:
100	leaf_entry sp, 16
101
102#if XCHAL_HAVE_MUL16
103	or	a4, a2, a3
104	srai	a4, a4, 16
105	bnez	a4, .LMUL16
106	mul16u	a2, a2, a3
107	leaf_return
108.LMUL16:
109	srai	a4, a2, 16
110	srai	a5, a3, 16
111	mul16u	a7, a4, a3
112	mul16u	a6, a5, a2
113	mul16u	a4, a2, a3
114	add	a7, a7, a6
115	slli	a7, a7, 16
116	add	a2, a7, a4
117
118#elif XCHAL_HAVE_MAC16
119	mul.aa.hl a2, a3
120	mula.aa.lh a2, a3
121	rsr	a5, ACCLO
122	umul.aa.ll a2, a3
123	rsr	a4, ACCLO
124	slli	a5, a5, 16
125	add	a2, a4, a5
126
127#else /* !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MAC16 */
128
129	# Multiply one bit at a time, but unroll the loop 4x to better
130	# exploit the addx instructions and avoid overhead.
131	# Peel the first iteration to save a cycle on init.
132
133	# Avoid negative numbers.
134	xor	a5, a2, a3  # top bit is 1 iff one of the inputs is negative
135	do_abs	a3, a3, a6
136	do_abs	a2, a2, a6
137
138	# Swap so the second argument is smaller.
139	sub	a7, a2, a3
140	mov	a4, a3
141	movgez	a4, a2, a7  # a4 = max(a2, a3)
142	movltz	a3, a2, a7  # a3 = min(a2, a3)
143
144	movi	a2, 0
145	extui	a6, a3, 0, 1
146	movnez	a2, a4, a6
147
148	do_addx2 a7, a4, a2, a7
149	extui	a6, a3, 1, 1
150	movnez	a2, a7, a6
151
152	do_addx4 a7, a4, a2, a7
153	extui	a6, a3, 2, 1
154	movnez	a2, a7, a6
155
156	do_addx8 a7, a4, a2, a7
157	extui	a6, a3, 3, 1
158	movnez	a2, a7, a6
159
160	bgeui	a3, 16, .Lmult_main_loop
161	neg	a3, a2
162	movltz	a2, a3, a5
163	leaf_return
164
165	.align	4
166.Lmult_main_loop:
167	srli	a3, a3, 4
168	slli	a4, a4, 4
169
170	add	a7, a4, a2
171	extui	a6, a3, 0, 1
172	movnez	a2, a7, a6
173
174	do_addx2 a7, a4, a2, a7
175	extui	a6, a3, 1, 1
176	movnez	a2, a7, a6
177
178	do_addx4 a7, a4, a2, a7
179	extui	a6, a3, 2, 1
180	movnez	a2, a7, a6
181
182	do_addx8 a7, a4, a2, a7
183	extui	a6, a3, 3, 1
184	movnez	a2, a7, a6
185
186	bgeui	a3, 16, .Lmult_main_loop
187
188	neg	a3, a2
189	movltz	a2, a3, a5
190
191#endif /* !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MAC16 */
192
193	leaf_return
194	.size	__mulsi3,.-__mulsi3
195
196#endif /* L_mulsi3 */
197
198
199# Define a macro for the NSAU (unsigned normalize shift amount)
200# instruction, which computes the number of leading zero bits,
201# to handle cases where it is not included in the Xtensa processor
202# configuration.
203
204	.macro	do_nsau cnt, val, tmp, a
205#if XCHAL_HAVE_NSA
206	nsau	\cnt, \val
207#else
208	mov	\a, \val
209	movi	\cnt, 0
210	extui	\tmp, \a, 16, 16
211	bnez	\tmp, 0f
212	movi	\cnt, 16
213	slli	\a, \a, 16
2140:
215	extui	\tmp, \a, 24, 8
216	bnez	\tmp, 1f
217	addi	\cnt, \cnt, 8
218	slli	\a, \a, 8
2191:
220	movi	\tmp, __nsau_data
221	extui	\a, \a, 24, 8
222	add	\tmp, \tmp, \a
223	l8ui	\tmp, \tmp, 0
224	add	\cnt, \cnt, \tmp
225#endif /* !XCHAL_HAVE_NSA */
226	.endm
227
228#ifdef L_nsau
229	.section .rodata
230	.align	4
231	.global	__nsau_data
232	.type	__nsau_data,@object
233__nsau_data:
234#if !XCHAL_HAVE_NSA
235	.byte	8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4
236	.byte	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
237	.byte	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
238	.byte	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
239	.byte	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
240	.byte	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
241	.byte	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
242	.byte	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
243	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
244	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
245	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
246	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
247	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
248	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
249	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
250	.byte	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
251#endif /* !XCHAL_HAVE_NSA */
252	.size	__nsau_data,.-__nsau_data
253	.hidden	__nsau_data
254#endif /* L_nsau */
255
256
257#ifdef L_udivsi3
258	.align	4
259	.global	__udivsi3
260	.type	__udivsi3,@function
261__udivsi3:
262	leaf_entry sp, 16
263	bltui	a3, 2, .Lle_one	# check if the divisor <= 1
264
265	mov	a6, a2		# keep dividend in a6
266	do_nsau	a5, a6, a2, a7	# dividend_shift = nsau(dividend)
267	do_nsau	a4, a3, a2, a7	# divisor_shift = nsau(divisor)
268	bgeu	a5, a4, .Lspecial
269
270	sub	a4, a4, a5	# count = divisor_shift - dividend_shift
271	ssl	a4
272	sll	a3, a3		# divisor <<= count
273	movi	a2, 0		# quotient = 0
274
275	# test-subtract-and-shift loop; one quotient bit on each iteration
276#if XCHAL_HAVE_LOOPS
277	loopnez	a4, .Lloopend
278#endif /* XCHAL_HAVE_LOOPS */
279.Lloop:
280	bltu	a6, a3, .Lzerobit
281	sub	a6, a6, a3
282	addi	a2, a2, 1
283.Lzerobit:
284	slli	a2, a2, 1
285	srli	a3, a3, 1
286#if !XCHAL_HAVE_LOOPS
287	addi	a4, a4, -1
288	bnez	a4, .Lloop
289#endif /* !XCHAL_HAVE_LOOPS */
290.Lloopend:
291
292	bltu	a6, a3, .Lreturn
293	addi	a2, a2, 1	# increment quotient if dividend >= divisor
294.Lreturn:
295	leaf_return
296
297.Lle_one:
298	beqz	a3, .Lerror	# if divisor == 1, return the dividend
299	leaf_return
300
301.Lspecial:
302	# return dividend >= divisor
303	bltu	a6, a3, .Lreturn0
304	movi	a2, 1
305	leaf_return
306
307.Lerror:
308	# just return 0; could throw an exception
309
310.Lreturn0:
311	movi	a2, 0
312	leaf_return
313	.size	__udivsi3,.-__udivsi3
314
315#endif /* L_udivsi3 */
316
317
318#ifdef L_divsi3
319	.align	4
320	.global	__divsi3
321	.type	__divsi3,@function
322__divsi3:
323	leaf_entry sp, 16
324	xor	a7, a2, a3	# sign = dividend ^ divisor
325	do_abs	a6, a2, a4	# udividend = abs(dividend)
326	do_abs	a3, a3, a4	# udivisor = abs(divisor)
327	bltui	a3, 2, .Lle_one	# check if udivisor <= 1
328	do_nsau	a5, a6, a2, a8	# udividend_shift = nsau(udividend)
329	do_nsau	a4, a3, a2, a8	# udivisor_shift = nsau(udivisor)
330	bgeu	a5, a4, .Lspecial
331
332	sub	a4, a4, a5	# count = udivisor_shift - udividend_shift
333	ssl	a4
334	sll	a3, a3		# udivisor <<= count
335	movi	a2, 0		# quotient = 0
336
337	# test-subtract-and-shift loop; one quotient bit on each iteration
338#if XCHAL_HAVE_LOOPS
339	loopnez	a4, .Lloopend
340#endif /* XCHAL_HAVE_LOOPS */
341.Lloop:
342	bltu	a6, a3, .Lzerobit
343	sub	a6, a6, a3
344	addi	a2, a2, 1
345.Lzerobit:
346	slli	a2, a2, 1
347	srli	a3, a3, 1
348#if !XCHAL_HAVE_LOOPS
349	addi	a4, a4, -1
350	bnez	a4, .Lloop
351#endif /* !XCHAL_HAVE_LOOPS */
352.Lloopend:
353
354	bltu	a6, a3, .Lreturn
355	addi	a2, a2, 1	# increment quotient if udividend >= udivisor
356.Lreturn:
357	neg	a5, a2
358	movltz	a2, a5, a7	# return (sign < 0) ? -quotient : quotient
359	leaf_return
360
361.Lle_one:
362	beqz	a3, .Lerror
363	neg	a2, a6		# if udivisor == 1, then return...
364	movgez	a2, a6, a7	# (sign < 0) ? -udividend : udividend
365	leaf_return
366
367.Lspecial:
368	bltu	a6, a3, .Lreturn0 #  if dividend < divisor, return 0
369	movi	a2, 1
370	movi	a4, -1
371	movltz	a2, a4, a7	# else return (sign < 0) ? -1 :	 1
372	leaf_return
373
374.Lerror:
375	# just return 0; could throw an exception
376
377.Lreturn0:
378	movi	a2, 0
379	leaf_return
380	.size	__divsi3,.-__divsi3
381
382#endif /* L_divsi3 */
383
384
385#ifdef L_umodsi3
386	.align	4
387	.global	__umodsi3
388	.type	__umodsi3,@function
389__umodsi3:
390	leaf_entry sp, 16
391	bltui	a3, 2, .Lle_one	# check if the divisor is <= 1
392
393	do_nsau	a5, a2, a6, a7	# dividend_shift = nsau(dividend)
394	do_nsau	a4, a3, a6, a7	# divisor_shift = nsau(divisor)
395	bgeu	a5, a4, .Lspecial
396
397	sub	a4, a4, a5	# count = divisor_shift - dividend_shift
398	ssl	a4
399	sll	a3, a3		# divisor <<= count
400
401	# test-subtract-and-shift loop
402#if XCHAL_HAVE_LOOPS
403	loopnez	a4, .Lloopend
404#endif /* XCHAL_HAVE_LOOPS */
405.Lloop:
406	bltu	a2, a3, .Lzerobit
407	sub	a2, a2, a3
408.Lzerobit:
409	srli	a3, a3, 1
410#if !XCHAL_HAVE_LOOPS
411	addi	a4, a4, -1
412	bnez	a4, .Lloop
413#endif /* !XCHAL_HAVE_LOOPS */
414.Lloopend:
415
416.Lspecial:
417	bltu	a2, a3, .Lreturn
418	sub	a2, a2, a3	# subtract once more if dividend >= divisor
419.Lreturn:
420	leaf_return
421
422.Lle_one:
423	# the divisor is either 0 or 1, so just return 0.
424	# someday we may want to throw an exception if the divisor is 0.
425	movi	a2, 0
426	leaf_return
427	.size	__umodsi3,.-__umodsi3
428
429#endif /* L_umodsi3 */
430
431
432#ifdef L_modsi3
433	.align	4
434	.global	__modsi3
435	.type	__modsi3,@function
436__modsi3:
437	leaf_entry sp, 16
438	mov	a7, a2		# save original (signed) dividend
439	do_abs	a2, a2, a4	# udividend = abs(dividend)
440	do_abs	a3, a3, a4	# udivisor = abs(divisor)
441	bltui	a3, 2, .Lle_one	# check if udivisor <= 1
442	do_nsau	a5, a2, a6, a8	# udividend_shift = nsau(udividend)
443	do_nsau	a4, a3, a6, a8	# udivisor_shift = nsau(udivisor)
444	bgeu	a5, a4, .Lspecial
445
446	sub	a4, a4, a5	# count = udivisor_shift - udividend_shift
447	ssl	a4
448	sll	a3, a3		# udivisor <<= count
449
450	# test-subtract-and-shift loop
451#if XCHAL_HAVE_LOOPS
452	loopnez	a4, .Lloopend
453#endif /* XCHAL_HAVE_LOOPS */
454.Lloop:
455	bltu	a2, a3, .Lzerobit
456	sub	a2, a2, a3
457.Lzerobit:
458	srli	a3, a3, 1
459#if !XCHAL_HAVE_LOOPS
460	addi	a4, a4, -1
461	bnez	a4, .Lloop
462#endif /* !XCHAL_HAVE_LOOPS */
463.Lloopend:
464
465.Lspecial:
466	bltu	a2, a3, .Lreturn
467	sub	a2, a2, a3	# subtract once more if udividend >= udivisor
468.Lreturn:
469	bgez	a7, .Lpositive
470	neg	a2, a2		# if (dividend < 0), return -udividend
471.Lpositive:
472	leaf_return
473
474.Lle_one:
475	# udivisor is either 0 or 1, so just return 0.
476	# someday we may want to throw an exception if udivisor is 0.
477	movi	a2, 0
478	leaf_return
479	.size	__modsi3,.-__modsi3
480
481#endif /* L_modsi3 */
482
483#include "ieee754-df.S"
484#include "ieee754-sf.S"
485