1/* Assembly functions for the Xtensa version of libgcc1. 2 Copyright (C) 2001, 2002, 2003, 2005, 2006 Free Software Foundation, Inc. 3 Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica. 4 5This file is part of GCC. 6 7GCC is free software; you can redistribute it and/or modify it under 8the terms of the GNU General Public License as published by the Free 9Software Foundation; either version 2, or (at your option) any later 10version. 11 12In addition to the permissions in the GNU General Public License, the 13Free Software Foundation gives you unlimited permission to link the 14compiled version of this file into combinations with other programs, 15and to distribute those combinations without any restriction coming 16from the use of this file. (The General Public License restrictions 17do apply in other respects; for example, they cover modification of 18the file, and distribution when not linked into a combine 19executable.) 20 21GCC is distributed in the hope that it will be useful, but WITHOUT ANY 22WARRANTY; without even the implied warranty of MERCHANTABILITY or 23FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 24for more details. 25 26You should have received a copy of the GNU General Public License 27along with GCC; see the file COPYING. If not, write to the Free 28Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 2902110-1301, USA. */ 30 31#include "xtensa-config.h" 32 33# Define macros for the ABS and ADDX* instructions to handle cases 34# where they are not included in the Xtensa processor configuration. 35 36 .macro do_abs dst, src, tmp 37#if XCHAL_HAVE_ABS 38 abs \dst, \src 39#else 40 neg \tmp, \src 41 movgez \tmp, \src, \src 42 mov \dst, \tmp 43#endif 44 .endm 45 46 .macro do_addx2 dst, as, at, tmp 47#if XCHAL_HAVE_ADDX 48 addx2 \dst, \as, \at 49#else 50 slli \tmp, \as, 1 51 add \dst, \tmp, \at 52#endif 53 .endm 54 55 .macro do_addx4 dst, as, at, tmp 56#if XCHAL_HAVE_ADDX 57 addx4 \dst, \as, \at 58#else 59 slli \tmp, \as, 2 60 add \dst, \tmp, \at 61#endif 62 .endm 63 64 .macro do_addx8 dst, as, at, tmp 65#if XCHAL_HAVE_ADDX 66 addx8 \dst, \as, \at 67#else 68 slli \tmp, \as, 3 69 add \dst, \tmp, \at 70#endif 71 .endm 72 73# Define macros for leaf function entry and return, supporting either the 74# standard register windowed ABI or the non-windowed call0 ABI. These 75# macros do not allocate any extra stack space, so they only work for 76# leaf functions that do not need to spill anything to the stack. 77 78 .macro leaf_entry reg, size 79#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__ 80 entry \reg, \size 81#else 82 /* do nothing */ 83#endif 84 .endm 85 86 .macro leaf_return 87#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__ 88 retw 89#else 90 ret 91#endif 92 .endm 93 94 95#ifdef L_mulsi3 96 .align 4 97 .global __mulsi3 98 .type __mulsi3,@function 99__mulsi3: 100 leaf_entry sp, 16 101 102#if XCHAL_HAVE_MUL16 103 or a4, a2, a3 104 srai a4, a4, 16 105 bnez a4, .LMUL16 106 mul16u a2, a2, a3 107 leaf_return 108.LMUL16: 109 srai a4, a2, 16 110 srai a5, a3, 16 111 mul16u a7, a4, a3 112 mul16u a6, a5, a2 113 mul16u a4, a2, a3 114 add a7, a7, a6 115 slli a7, a7, 16 116 add a2, a7, a4 117 118#elif XCHAL_HAVE_MAC16 119 mul.aa.hl a2, a3 120 mula.aa.lh a2, a3 121 rsr a5, ACCLO 122 umul.aa.ll a2, a3 123 rsr a4, ACCLO 124 slli a5, a5, 16 125 add a2, a4, a5 126 127#else /* !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MAC16 */ 128 129 # Multiply one bit at a time, but unroll the loop 4x to better 130 # exploit the addx instructions and avoid overhead. 131 # Peel the first iteration to save a cycle on init. 132 133 # Avoid negative numbers. 134 xor a5, a2, a3 # top bit is 1 iff one of the inputs is negative 135 do_abs a3, a3, a6 136 do_abs a2, a2, a6 137 138 # Swap so the second argument is smaller. 139 sub a7, a2, a3 140 mov a4, a3 141 movgez a4, a2, a7 # a4 = max(a2, a3) 142 movltz a3, a2, a7 # a3 = min(a2, a3) 143 144 movi a2, 0 145 extui a6, a3, 0, 1 146 movnez a2, a4, a6 147 148 do_addx2 a7, a4, a2, a7 149 extui a6, a3, 1, 1 150 movnez a2, a7, a6 151 152 do_addx4 a7, a4, a2, a7 153 extui a6, a3, 2, 1 154 movnez a2, a7, a6 155 156 do_addx8 a7, a4, a2, a7 157 extui a6, a3, 3, 1 158 movnez a2, a7, a6 159 160 bgeui a3, 16, .Lmult_main_loop 161 neg a3, a2 162 movltz a2, a3, a5 163 leaf_return 164 165 .align 4 166.Lmult_main_loop: 167 srli a3, a3, 4 168 slli a4, a4, 4 169 170 add a7, a4, a2 171 extui a6, a3, 0, 1 172 movnez a2, a7, a6 173 174 do_addx2 a7, a4, a2, a7 175 extui a6, a3, 1, 1 176 movnez a2, a7, a6 177 178 do_addx4 a7, a4, a2, a7 179 extui a6, a3, 2, 1 180 movnez a2, a7, a6 181 182 do_addx8 a7, a4, a2, a7 183 extui a6, a3, 3, 1 184 movnez a2, a7, a6 185 186 bgeui a3, 16, .Lmult_main_loop 187 188 neg a3, a2 189 movltz a2, a3, a5 190 191#endif /* !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MAC16 */ 192 193 leaf_return 194 .size __mulsi3,.-__mulsi3 195 196#endif /* L_mulsi3 */ 197 198 199# Define a macro for the NSAU (unsigned normalize shift amount) 200# instruction, which computes the number of leading zero bits, 201# to handle cases where it is not included in the Xtensa processor 202# configuration. 203 204 .macro do_nsau cnt, val, tmp, a 205#if XCHAL_HAVE_NSA 206 nsau \cnt, \val 207#else 208 mov \a, \val 209 movi \cnt, 0 210 extui \tmp, \a, 16, 16 211 bnez \tmp, 0f 212 movi \cnt, 16 213 slli \a, \a, 16 2140: 215 extui \tmp, \a, 24, 8 216 bnez \tmp, 1f 217 addi \cnt, \cnt, 8 218 slli \a, \a, 8 2191: 220 movi \tmp, __nsau_data 221 extui \a, \a, 24, 8 222 add \tmp, \tmp, \a 223 l8ui \tmp, \tmp, 0 224 add \cnt, \cnt, \tmp 225#endif /* !XCHAL_HAVE_NSA */ 226 .endm 227 228#ifdef L_nsau 229 .section .rodata 230 .align 4 231 .global __nsau_data 232 .type __nsau_data,@object 233__nsau_data: 234#if !XCHAL_HAVE_NSA 235 .byte 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4 236 .byte 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 237 .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 238 .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 239 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 240 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 241 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 242 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 243 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 244 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 245 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 246 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 247 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 248 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 249 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 250 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 251#endif /* !XCHAL_HAVE_NSA */ 252 .size __nsau_data,.-__nsau_data 253 .hidden __nsau_data 254#endif /* L_nsau */ 255 256 257#ifdef L_udivsi3 258 .align 4 259 .global __udivsi3 260 .type __udivsi3,@function 261__udivsi3: 262 leaf_entry sp, 16 263 bltui a3, 2, .Lle_one # check if the divisor <= 1 264 265 mov a6, a2 # keep dividend in a6 266 do_nsau a5, a6, a2, a7 # dividend_shift = nsau(dividend) 267 do_nsau a4, a3, a2, a7 # divisor_shift = nsau(divisor) 268 bgeu a5, a4, .Lspecial 269 270 sub a4, a4, a5 # count = divisor_shift - dividend_shift 271 ssl a4 272 sll a3, a3 # divisor <<= count 273 movi a2, 0 # quotient = 0 274 275 # test-subtract-and-shift loop; one quotient bit on each iteration 276#if XCHAL_HAVE_LOOPS 277 loopnez a4, .Lloopend 278#endif /* XCHAL_HAVE_LOOPS */ 279.Lloop: 280 bltu a6, a3, .Lzerobit 281 sub a6, a6, a3 282 addi a2, a2, 1 283.Lzerobit: 284 slli a2, a2, 1 285 srli a3, a3, 1 286#if !XCHAL_HAVE_LOOPS 287 addi a4, a4, -1 288 bnez a4, .Lloop 289#endif /* !XCHAL_HAVE_LOOPS */ 290.Lloopend: 291 292 bltu a6, a3, .Lreturn 293 addi a2, a2, 1 # increment quotient if dividend >= divisor 294.Lreturn: 295 leaf_return 296 297.Lle_one: 298 beqz a3, .Lerror # if divisor == 1, return the dividend 299 leaf_return 300 301.Lspecial: 302 # return dividend >= divisor 303 bltu a6, a3, .Lreturn0 304 movi a2, 1 305 leaf_return 306 307.Lerror: 308 # just return 0; could throw an exception 309 310.Lreturn0: 311 movi a2, 0 312 leaf_return 313 .size __udivsi3,.-__udivsi3 314 315#endif /* L_udivsi3 */ 316 317 318#ifdef L_divsi3 319 .align 4 320 .global __divsi3 321 .type __divsi3,@function 322__divsi3: 323 leaf_entry sp, 16 324 xor a7, a2, a3 # sign = dividend ^ divisor 325 do_abs a6, a2, a4 # udividend = abs(dividend) 326 do_abs a3, a3, a4 # udivisor = abs(divisor) 327 bltui a3, 2, .Lle_one # check if udivisor <= 1 328 do_nsau a5, a6, a2, a8 # udividend_shift = nsau(udividend) 329 do_nsau a4, a3, a2, a8 # udivisor_shift = nsau(udivisor) 330 bgeu a5, a4, .Lspecial 331 332 sub a4, a4, a5 # count = udivisor_shift - udividend_shift 333 ssl a4 334 sll a3, a3 # udivisor <<= count 335 movi a2, 0 # quotient = 0 336 337 # test-subtract-and-shift loop; one quotient bit on each iteration 338#if XCHAL_HAVE_LOOPS 339 loopnez a4, .Lloopend 340#endif /* XCHAL_HAVE_LOOPS */ 341.Lloop: 342 bltu a6, a3, .Lzerobit 343 sub a6, a6, a3 344 addi a2, a2, 1 345.Lzerobit: 346 slli a2, a2, 1 347 srli a3, a3, 1 348#if !XCHAL_HAVE_LOOPS 349 addi a4, a4, -1 350 bnez a4, .Lloop 351#endif /* !XCHAL_HAVE_LOOPS */ 352.Lloopend: 353 354 bltu a6, a3, .Lreturn 355 addi a2, a2, 1 # increment quotient if udividend >= udivisor 356.Lreturn: 357 neg a5, a2 358 movltz a2, a5, a7 # return (sign < 0) ? -quotient : quotient 359 leaf_return 360 361.Lle_one: 362 beqz a3, .Lerror 363 neg a2, a6 # if udivisor == 1, then return... 364 movgez a2, a6, a7 # (sign < 0) ? -udividend : udividend 365 leaf_return 366 367.Lspecial: 368 bltu a6, a3, .Lreturn0 # if dividend < divisor, return 0 369 movi a2, 1 370 movi a4, -1 371 movltz a2, a4, a7 # else return (sign < 0) ? -1 : 1 372 leaf_return 373 374.Lerror: 375 # just return 0; could throw an exception 376 377.Lreturn0: 378 movi a2, 0 379 leaf_return 380 .size __divsi3,.-__divsi3 381 382#endif /* L_divsi3 */ 383 384 385#ifdef L_umodsi3 386 .align 4 387 .global __umodsi3 388 .type __umodsi3,@function 389__umodsi3: 390 leaf_entry sp, 16 391 bltui a3, 2, .Lle_one # check if the divisor is <= 1 392 393 do_nsau a5, a2, a6, a7 # dividend_shift = nsau(dividend) 394 do_nsau a4, a3, a6, a7 # divisor_shift = nsau(divisor) 395 bgeu a5, a4, .Lspecial 396 397 sub a4, a4, a5 # count = divisor_shift - dividend_shift 398 ssl a4 399 sll a3, a3 # divisor <<= count 400 401 # test-subtract-and-shift loop 402#if XCHAL_HAVE_LOOPS 403 loopnez a4, .Lloopend 404#endif /* XCHAL_HAVE_LOOPS */ 405.Lloop: 406 bltu a2, a3, .Lzerobit 407 sub a2, a2, a3 408.Lzerobit: 409 srli a3, a3, 1 410#if !XCHAL_HAVE_LOOPS 411 addi a4, a4, -1 412 bnez a4, .Lloop 413#endif /* !XCHAL_HAVE_LOOPS */ 414.Lloopend: 415 416.Lspecial: 417 bltu a2, a3, .Lreturn 418 sub a2, a2, a3 # subtract once more if dividend >= divisor 419.Lreturn: 420 leaf_return 421 422.Lle_one: 423 # the divisor is either 0 or 1, so just return 0. 424 # someday we may want to throw an exception if the divisor is 0. 425 movi a2, 0 426 leaf_return 427 .size __umodsi3,.-__umodsi3 428 429#endif /* L_umodsi3 */ 430 431 432#ifdef L_modsi3 433 .align 4 434 .global __modsi3 435 .type __modsi3,@function 436__modsi3: 437 leaf_entry sp, 16 438 mov a7, a2 # save original (signed) dividend 439 do_abs a2, a2, a4 # udividend = abs(dividend) 440 do_abs a3, a3, a4 # udivisor = abs(divisor) 441 bltui a3, 2, .Lle_one # check if udivisor <= 1 442 do_nsau a5, a2, a6, a8 # udividend_shift = nsau(udividend) 443 do_nsau a4, a3, a6, a8 # udivisor_shift = nsau(udivisor) 444 bgeu a5, a4, .Lspecial 445 446 sub a4, a4, a5 # count = udivisor_shift - udividend_shift 447 ssl a4 448 sll a3, a3 # udivisor <<= count 449 450 # test-subtract-and-shift loop 451#if XCHAL_HAVE_LOOPS 452 loopnez a4, .Lloopend 453#endif /* XCHAL_HAVE_LOOPS */ 454.Lloop: 455 bltu a2, a3, .Lzerobit 456 sub a2, a2, a3 457.Lzerobit: 458 srli a3, a3, 1 459#if !XCHAL_HAVE_LOOPS 460 addi a4, a4, -1 461 bnez a4, .Lloop 462#endif /* !XCHAL_HAVE_LOOPS */ 463.Lloopend: 464 465.Lspecial: 466 bltu a2, a3, .Lreturn 467 sub a2, a2, a3 # subtract once more if udividend >= udivisor 468.Lreturn: 469 bgez a7, .Lpositive 470 neg a2, a2 # if (dividend < 0), return -udividend 471.Lpositive: 472 leaf_return 473 474.Lle_one: 475 # udivisor is either 0 or 1, so just return 0. 476 # someday we may want to throw an exception if udivisor is 0. 477 movi a2, 0 478 leaf_return 479 .size __modsi3,.-__modsi3 480 481#endif /* L_modsi3 */ 482 483#include "ieee754-df.S" 484#include "ieee754-sf.S" 485