1#ifndef __ASSEMBLER__
2# define __ASSEMBLER__ 1
3#endif
4#include "crypto/sparc_arch.h"
5
6.text
7
8.globl    cmll_t4_encrypt
9.align    32
10cmll_t4_encrypt:
11          andcc               %o0, 7, %g1                   ! is input aligned?
12          andn                %o0, 7, %o0
13
14          ldx                 [%o2 + 0], %g4
15          ldx                 [%o2 + 8], %g5
16
17          ldx                 [%o0 + 0], %o4
18          bz,pt               %icc, 1f
19          ldx                 [%o0 + 8], %o5
20          ldx                 [%o0 + 16], %o0
21          sll                 %g1, 3, %g1
22          sub                 %g0, %g1, %o3
23          sllx                %o4, %g1, %o4
24          sllx                %o5, %g1, %g1
25          srlx                %o5, %o3, %o5
26          srlx                %o0, %o3, %o3
27          or                  %o5, %o4, %o4
28          or                  %o3, %g1, %o5
291:
30          ld                  [%o2 + 272], %o3    ! grandRounds, 3 or 4
31          ldd                 [%o2 + 16], %f12
32          ldd                 [%o2 + 24], %f14
33          xor                 %g4, %o4, %o4
34          xor                 %g5, %o5, %o5
35          ldd                 [%o2 + 32], %f16
36          ldd                 [%o2 + 40], %f18
37          .word     0x81b0230c !movxtod %o4,%f0
38          .word     0x85b0230d !movxtod %o5,%f2
39          ldd                 [%o2 + 48], %f20
40          ldd                 [%o2 + 56], %f22
41          sub                 %o3, 1, %o3
42          ldd                 [%o2 + 64], %f24
43          ldd                 [%o2 + 72], %f26
44          add                 %o2, 80, %o2
45
46.Lenc:
47          .word     0x84cb0182 !camellia_f        %f12,%f2,%f0,%f2
48          ldd                 [%o2 + 0], %f12
49          sub                 %o3,1,%o3
50          .word     0x80cb8580 !camellia_f        %f14,%f0,%f2,%f0
51          ldd                 [%o2 + 8], %f14
52          .word     0x84cc0182 !camellia_f        %f16,%f2,%f0,%f2
53          ldd                 [%o2 + 16], %f16
54          .word     0x80cc8580 !camellia_f        %f18,%f0,%f2,%f0
55          ldd                 [%o2 + 24], %f18
56          .word     0x84cd0182 !camellia_f        %f20,%f2,%f0,%f2
57          ldd                 [%o2 + 32], %f20
58          .word     0x80cd8580 !camellia_f        %f22,%f0,%f2,%f0
59          ldd                 [%o2 + 40], %f22
60          .word     0x81b62780 !camellia_fl       %f24,%f0,%f0
61          ldd                 [%o2 + 48], %f24
62          .word     0x85b6a7a2 !camellia_fli      %f26,%f2,%f2
63          ldd                 [%o2 + 56], %f26
64          brnz,pt             %o3, .Lenc
65          add                 %o2, 64, %o2
66
67          andcc               %o1, 7, %o4                   ! is output aligned?
68          .word     0x84cb0182 !camellia_f        %f12,%f2,%f0,%f2
69          .word     0x80cb8580 !camellia_f        %f14,%f0,%f2,%f0
70          .word     0x84cc0182 !camellia_f        %f16,%f2,%f0,%f2
71          .word     0x80cc8580 !camellia_f        %f18,%f0,%f2,%f0
72          .word     0x88cd0182 !camellia_f        %f20,%f2,%f0,%f4
73          .word     0x84cd8980 !camellia_f        %f22,%f0,%f4,%f2
74          .word     0x81b60d84 !fxor    %f24,%f4,%f0
75          .word     0x85b68d82 !fxor    %f26,%f2,%f2
76
77          bnz,pn              %icc, 2f
78          nop
79
80          std                 %f0, [%o1 + 0]
81          retl
82          std                 %f2, [%o1 + 8]
83
842:        .word     0x93b24340 !alignaddrl        %o1,%g0,%o1
85          mov                 0xff, %o5
86          srl                 %o5, %o4, %o5
87
88          .word     0x89b00900 !faligndata        %f0,%f0,%f4
89          .word     0x8db00902 !faligndata        %f0,%f2,%f6
90          .word     0x91b08902 !faligndata        %f2,%f2,%f8
91
92          stda                %f4, [%o1 + %o5]0xc0          ! partial store
93          std                 %f6, [%o1 + 8]
94          add                 %o1, 16, %o1
95          orn                 %g0, %o5, %o5
96          retl
97          stda                %f8, [%o1 + %o5]0xc0          ! partial store
98.type     cmll_t4_encrypt,#function
99.size     cmll_t4_encrypt,.-cmll_t4_encrypt
100
101.globl    cmll_t4_decrypt
102.align    32
103cmll_t4_decrypt:
104          ld                  [%o2 + 272], %o3    ! grandRounds, 3 or 4
105          andcc               %o0, 7, %g1                   ! is input aligned?
106          andn                %o0, 7, %o0
107
108          sll                 %o3, 6, %o3
109          add                 %o3, %o2, %o2
110
111          ldx                 [%o0 + 0], %o4
112          bz,pt               %icc, 1f
113          ldx                 [%o0 + 8], %o5
114          ldx                 [%o0 + 16], %o0
115          sll                 %g1, 3, %g1
116          sub                 %g0, %g1, %g4
117          sllx                %o4, %g1, %o4
118          sllx                %o5, %g1, %g1
119          srlx                %o5, %g4, %o5
120          srlx                %o0, %g4, %g4
121          or                  %o5, %o4, %o4
122          or                  %g4, %g1, %o5
1231:
124          ldx                 [%o2 + 0], %g4
125          ldx                 [%o2 + 8], %g5
126          ldd                 [%o2 - 8], %f12
127          ldd                 [%o2 - 16], %f14
128          xor                 %g4, %o4, %o4
129          xor                 %g5, %o5, %o5
130          ldd                 [%o2 - 24], %f16
131          ldd                 [%o2 - 32], %f18
132          .word     0x81b0230c !movxtod %o4,%f0
133          .word     0x85b0230d !movxtod %o5,%f2
134          ldd                 [%o2 - 40], %f20
135          ldd                 [%o2 - 48], %f22
136          sub                 %o3, 64, %o3
137          ldd                 [%o2 - 56], %f24
138          ldd                 [%o2 - 64], %f26
139          sub                 %o2, 64, %o2
140
141.Ldec:
142          .word     0x84cb0182 !camellia_f        %f12,%f2,%f0,%f2
143          ldd                 [%o2 - 8], %f12
144          sub                 %o3, 64, %o3
145          .word     0x80cb8580 !camellia_f        %f14,%f0,%f2,%f0
146          ldd                 [%o2 - 16], %f14
147          .word     0x84cc0182 !camellia_f        %f16,%f2,%f0,%f2
148          ldd                 [%o2 - 24], %f16
149          .word     0x80cc8580 !camellia_f        %f18,%f0,%f2,%f0
150          ldd                 [%o2 - 32], %f18
151          .word     0x84cd0182 !camellia_f        %f20,%f2,%f0,%f2
152          ldd                 [%o2 - 40], %f20
153          .word     0x80cd8580 !camellia_f        %f22,%f0,%f2,%f0
154          ldd                 [%o2 - 48], %f22
155          .word     0x81b62780 !camellia_fl       %f24,%f0,%f0
156          ldd                 [%o2 - 56], %f24
157          .word     0x85b6a7a2 !camellia_fli      %f26,%f2,%f2
158          ldd                 [%o2 - 64], %f26
159          brnz,pt             %o3, .Ldec
160          sub                 %o2, 64, %o2
161
162          andcc               %o1, 7, %o4                   ! is output aligned?
163          .word     0x84cb0182 !camellia_f        %f12,%f2,%f0,%f2
164          .word     0x80cb8580 !camellia_f        %f14,%f0,%f2,%f0
165          .word     0x84cc0182 !camellia_f        %f16,%f2,%f0,%f2
166          .word     0x80cc8580 !camellia_f        %f18,%f0,%f2,%f0
167          .word     0x88cd0182 !camellia_f        %f20,%f2,%f0,%f4
168          .word     0x84cd8980 !camellia_f        %f22,%f0,%f4,%f2
169          .word     0x81b68d84 !fxor    %f26,%f4,%f0
170          .word     0x85b60d82 !fxor    %f24,%f2,%f2
171
172          bnz,pn              %icc, 2f
173          nop
174
175          std                 %f0, [%o1 + 0]
176          retl
177          std                 %f2, [%o1 + 8]
178
1792:        .word     0x93b24340 !alignaddrl        %o1,%g0,%o1
180          mov                 0xff, %o5
181          srl                 %o5, %o4, %o5
182
183          .word     0x89b00900 !faligndata        %f0,%f0,%f4
184          .word     0x8db00902 !faligndata        %f0,%f2,%f6
185          .word     0x91b08902 !faligndata        %f2,%f2,%f8
186
187          stda                %f4, [%o1 + %o5]0xc0          ! partial store
188          std                 %f6, [%o1 + 8]
189          add                 %o1, 16, %o1
190          orn                 %g0, %o5, %o5
191          retl
192          stda                %f8, [%o1 + %o5]0xc0          ! partial store
193.type     cmll_t4_decrypt,#function
194.size     cmll_t4_decrypt,.-cmll_t4_decrypt
195.globl    cmll_t4_set_key
196.align    32
197cmll_t4_set_key:
198          and                 %o0, 7, %o3
199          .word     0x91b20300 !alignaddr         %o0,%g0,%o0
200          cmp                 %o1, 192
201          ldd                 [%o0 + 0], %f0
202          bl,pt               %icc,.L128
203          ldd                 [%o0 + 8], %f2
204
205          be,pt               %icc,.L192
206          ldd                 [%o0 + 16], %f4
207
208          brz,pt              %o3, .L256aligned
209          ldd                 [%o0 + 24], %f6
210
211          ldd                 [%o0 + 32], %f8
212          .word     0x81b00902 !faligndata        %f0,%f2,%f0
213          .word     0x85b08904 !faligndata        %f2,%f4,%f2
214          .word     0x89b10906 !faligndata        %f4,%f6,%f4
215          b                   .L256aligned
216          .word     0x8db18908 !faligndata        %f6,%f8,%f6
217
218.align    16
219.L192:
220          brz,a,pt  %o3, .L256aligned
221          .word     0x8db00cc4 !fnot2   %f0,%f4,%f6
222
223          ldd                 [%o0 + 24], %f6
224          nop
225          .word     0x81b00902 !faligndata        %f0,%f2,%f0
226          .word     0x85b08904 !faligndata        %f2,%f4,%f2
227          .word     0x89b10906 !faligndata        %f4,%f6,%f4
228          .word     0x8db00cc4 !fnot2   %f0,%f4,%f6
229
230.L256aligned:
231          std                 %f0, [%o2 + 0]                ! k[0, 1]
232          .word     0xb9b00f00 !fsrc2   %f0,%f0,%f28
233          std                 %f2, [%o2 + 8]                ! k[2, 3]
234          .word     0xbdb00f02 !fsrc2   %f0,%f2,%f30
235          .word     0x81b10d80 !fxor    %f4,%f0,%f0
236          b                   .L128key
237          .word     0x85b18d82 !fxor    %f6,%f2,%f2
238
239.align    16
240.L128:
241          brz,pt              %o3, .L128aligned
242          nop
243
244          ldd                 [%o0 + 16], %f4
245          nop
246          .word     0x81b00902 !faligndata        %f0,%f2,%f0
247          .word     0x85b08904 !faligndata        %f2,%f4,%f2
248
249.L128aligned:
250          std                 %f0, [%o2 + 0]                ! k[0, 1]
251          .word     0xb9b00f00 !fsrc2   %f0,%f0,%f28
252          std                 %f2, [%o2 + 8]                ! k[2, 3]
253          .word     0xbdb00f02 !fsrc2   %f0,%f2,%f30
254
255.L128key:
256          mov                 %o7, %o5
2571:        call                .+8
258          add                 %o7, SIGMA-1b, %o4
259          mov                 %o5, %o7
260
261          ldd                 [%o4 + 0], %f16
262          ldd                 [%o4 + 8], %f18
263          ldd                 [%o4 + 16], %f20
264          ldd                 [%o4 + 24], %f22
265
266          .word     0x84cc0182 !camellia_f        %f16,%f2,%f0,%f2
267          .word     0x80cc8580 !camellia_f        %f18,%f0,%f2,%f0
268          .word     0x81b70d80 !fxor    %f28,%f0,%f0
269          .word     0x85b78d82 !fxor    %f30,%f2,%f2
270          .word     0x84cd0182 !camellia_f        %f20,%f2,%f0,%f2
271          .word     0x80cd8580 !camellia_f        %f22,%f0,%f2,%f0
272
273          bge,pn              %icc, .L256key
274          nop
275          std       %f0, [%o2 + 0x10]   ! k[ 4,  5]
276          std       %f2, [%o2 + 0x18]   ! k[ 6,  7]
277
278          .word     0x99b02200 !movdtox %f0,%o4
279          .word     0x9bb02202 !movdtox %f2,%o5
280          srlx      %o4, 64-15, %g4
281          sllx      %o4, 15, %o4
282          srlx      %o5, 64-15, %g5
283          sllx      %o5, 15, %o5
284          or        %o4, %g5, %o4
285          or        %o5, %g4, %o5
286          stx       %o4, [%o2 + 0x30]   ! k[12, 13]
287          stx       %o5, [%o2 + 0x38]   ! k[14, 15]
288          srlx      %o4, 64-15, %g4
289          sllx      %o4, 15, %o4
290          srlx      %o5, 64-15, %g5
291          sllx      %o5, 15, %o5
292          or        %o4, %g5, %o4
293          or        %o5, %g4, %o5
294          stx       %o4, [%o2 + 0x40]   ! k[16, 17]
295          stx       %o5, [%o2 + 0x48]   ! k[18, 19]
296          srlx      %o4, 64-15, %g4
297          sllx      %o4, 15, %o4
298          srlx      %o5, 64-15, %g5
299          sllx      %o5, 15, %o5
300          or        %o4, %g5, %o4
301          or        %o5, %g4, %o5
302          stx       %o4, [%o2 + 0x60]   ! k[24, 25]
303          srlx      %o4, 64-15, %g4
304          sllx      %o4, 15, %o4
305          srlx      %o5, 64-15, %g5
306          sllx      %o5, 15, %o5
307          or        %o4, %g5, %o4
308          or        %o5, %g4, %o5
309          stx       %o4, [%o2 + 0x70]   ! k[28, 29]
310          stx       %o5, [%o2 + 0x78]   ! k[30, 31]
311          srlx      %o4, 64-34, %g4
312          sllx      %o4, 34, %o4
313          srlx      %o5, 64-34, %g5
314          sllx      %o5, 34, %o5
315          or        %o4, %g5, %o4
316          or        %o5, %g4, %o5
317          stx       %o4, [%o2 + 0xa0]   ! k[40, 41]
318          stx       %o5, [%o2 + 0xa8]   ! k[42, 43]
319          srlx      %o4, 64-17, %g4
320          sllx      %o4, 17, %o4
321          srlx      %o5, 64-17, %g5
322          sllx      %o5, 17, %o5
323          or        %o4, %g5, %o4
324          or        %o5, %g4, %o5
325          stx       %o4, [%o2 + 0xc0]   ! k[48, 49]
326          stx       %o5, [%o2 + 0xc8]   ! k[50, 51]
327
328          .word     0x99b0221c !movdtox %f28,%o4            ! k[ 0,  1]
329          .word     0x9bb0221e !movdtox %f30,%o5            ! k[ 2,  3]
330          srlx      %o4, 64-15, %g4
331          sllx      %o4, 15, %o4
332          srlx      %o5, 64-15, %g5
333          sllx      %o5, 15, %o5
334          or        %o4, %g5, %o4
335          or        %o5, %g4, %o5
336          stx       %o4, [%o2 + 0x20]   ! k[ 8,  9]
337          stx       %o5, [%o2 + 0x28]   ! k[10, 11]
338          srlx      %o4, 64-30, %g4
339          sllx      %o4, 30, %o4
340          srlx      %o5, 64-30, %g5
341          sllx      %o5, 30, %o5
342          or        %o4, %g5, %o4
343          or        %o5, %g4, %o5
344          stx       %o4, [%o2 + 0x50]   ! k[20, 21]
345          stx       %o5, [%o2 + 0x58]   ! k[22, 23]
346          srlx      %o4, 64-15, %g4
347          sllx      %o4, 15, %o4
348          srlx      %o5, 64-15, %g5
349          sllx      %o5, 15, %o5
350          or        %o4, %g5, %o4
351          or        %o5, %g4, %o5
352          stx       %o5, [%o2 + 0x68]   ! k[26, 27]
353          srlx      %o4, 64-17, %g4
354          sllx      %o4, 17, %o4
355          srlx      %o5, 64-17, %g5
356          sllx      %o5, 17, %o5
357          or        %o4, %g5, %o4
358          or        %o5, %g4, %o5
359          stx       %o4, [%o2 + 0x80]   ! k[32, 33]
360          stx       %o5, [%o2 + 0x88]   ! k[34, 35]
361          srlx      %o4, 64-17, %g4
362          sllx      %o4, 17, %o4
363          srlx      %o5, 64-17, %g5
364          sllx      %o5, 17, %o5
365          or        %o4, %g5, %o4
366          or        %o5, %g4, %o5
367          stx       %o4, [%o2 + 0x90]   ! k[36, 37]
368          stx       %o5, [%o2 + 0x98]   ! k[38, 39]
369          srlx      %o4, 64-17, %g4
370          sllx      %o4, 17, %o4
371          srlx      %o5, 64-17, %g5
372          sllx      %o5, 17, %o5
373          or        %o4, %g5, %o4
374          or        %o5, %g4, %o5
375          stx       %o4, [%o2 + 0xb0]   ! k[44, 45]
376          stx       %o5, [%o2 + 0xb8]   ! k[46, 47]
377
378          mov                 3, %o3
379          st                  %o3, [%o2 + 0x110]
380          retl
381          xor                 %o0, %o0, %o0
382
383.align    16
384.L256key:
385          ldd                 [%o4 + 32], %f24
386          ldd                 [%o4 + 40], %f26
387
388          std                 %f0, [%o2 + 0x30]   ! k[12, 13]
389          std                 %f2, [%o2 + 0x38]   ! k[14, 15]
390
391          .word     0x81b10d80 !fxor    %f4,%f0,%f0
392          .word     0x85b18d82 !fxor    %f6,%f2,%f2
393          .word     0x84ce0182 !camellia_f        %f24,%f2,%f0,%f2
394          .word     0x80ce8580 !camellia_f        %f26,%f0,%f2,%f0
395
396          std       %f0, [%o2 + 0x10]   ! k[ 4,  5]
397          std       %f2, [%o2 + 0x18]   ! k[ 6,  7]
398
399          .word     0x99b02200 !movdtox %f0,%o4
400          .word     0x9bb02202 !movdtox %f2,%o5
401          srlx      %o4, 64-30, %g4
402          sllx      %o4, 30, %o4
403          srlx      %o5, 64-30, %g5
404          sllx      %o5, 30, %o5
405          or        %o4, %g5, %o4
406          or        %o5, %g4, %o5
407          stx       %o4, [%o2 + 0x50]   ! k[20, 21]
408          stx       %o5, [%o2 + 0x58]   ! k[22, 23]
409          srlx      %o4, 64-30, %g4
410          sllx      %o4, 30, %o4
411          srlx      %o5, 64-30, %g5
412          sllx      %o5, 30, %o5
413          or        %o4, %g5, %o4
414          or        %o5, %g4, %o5
415          stx       %o4, [%o2 + 0xa0]   ! k[40, 41]
416          stx       %o5, [%o2 + 0xa8]   ! k[42, 43]
417          srlx      %o4, 64-51, %g4
418          sllx      %o4, 51, %o4
419          srlx      %o5, 64-51, %g5
420          sllx      %o5, 51, %o5
421          or        %o4, %g5, %o4
422          or        %o5, %g4, %o5
423          stx       %o4, [%o2 + 0x100]  ! k[64, 65]
424          stx       %o5, [%o2 + 0x108]  ! k[66, 67]
425
426          .word     0x99b02204 !movdtox %f4,%o4             ! k[ 8,  9]
427          .word     0x9bb02206 !movdtox %f6,%o5             ! k[10, 11]
428          srlx      %o4, 64-15, %g4
429          sllx      %o4, 15, %o4
430          srlx      %o5, 64-15, %g5
431          sllx      %o5, 15, %o5
432          or        %o4, %g5, %o4
433          or        %o5, %g4, %o5
434          stx       %o4, [%o2 + 0x20]   ! k[ 8,  9]
435          stx       %o5, [%o2 + 0x28]   ! k[10, 11]
436          srlx      %o4, 64-15, %g4
437          sllx      %o4, 15, %o4
438          srlx      %o5, 64-15, %g5
439          sllx      %o5, 15, %o5
440          or        %o4, %g5, %o4
441          or        %o5, %g4, %o5
442          stx       %o4, [%o2 + 0x40]   ! k[16, 17]
443          stx       %o5, [%o2 + 0x48]   ! k[18, 19]
444          srlx      %o4, 64-30, %g4
445          sllx      %o4, 30, %o4
446          srlx      %o5, 64-30, %g5
447          sllx      %o5, 30, %o5
448          or        %o4, %g5, %o4
449          or        %o5, %g4, %o5
450          stx       %o4, [%o2 + 0x90]   ! k[36, 37]
451          stx       %o5, [%o2 + 0x98]   ! k[38, 39]
452          srlx      %o4, 64-34, %g4
453          sllx      %o4, 34, %o4
454          srlx      %o5, 64-34, %g5
455          sllx      %o5, 34, %o5
456          or        %o4, %g5, %o4
457          or        %o5, %g4, %o5
458          stx       %o4, [%o2 + 0xd0]   ! k[52, 53]
459          stx       %o5, [%o2 + 0xd8]   ! k[54, 55]
460          ldx       [%o2 + 0x30], %o4   ! k[12, 13]
461          ldx       [%o2 + 0x38], %o5   ! k[14, 15]
462          srlx      %o4, 64-15, %g4
463          sllx      %o4, 15, %o4
464          srlx      %o5, 64-15, %g5
465          sllx      %o5, 15, %o5
466          or        %o4, %g5, %o4
467          or        %o5, %g4, %o5
468          stx       %o4, [%o2 + 0x30]   ! k[12, 13]
469          stx       %o5, [%o2 + 0x38]   ! k[14, 15]
470          srlx      %o4, 64-30, %g4
471          sllx      %o4, 30, %o4
472          srlx      %o5, 64-30, %g5
473          sllx      %o5, 30, %o5
474          or        %o4, %g5, %o4
475          or        %o5, %g4, %o5
476          stx       %o4, [%o2 + 0x70]   ! k[28, 29]
477          stx       %o5, [%o2 + 0x78]   ! k[30, 31]
478          srlx      %o4, 32, %g4
479          srlx      %o5, 32, %g5
480          st        %o4, [%o2 + 0xc0]   ! k[48]
481          st        %g5, [%o2 + 0xc4]   ! k[49]
482          st        %o5, [%o2 + 0xc8]   ! k[50]
483          st        %g4, [%o2 + 0xcc]   ! k[51]
484          srlx      %o4, 64-49, %g4
485          sllx      %o4, 49, %o4
486          srlx      %o5, 64-49, %g5
487          sllx      %o5, 49, %o5
488          or        %o4, %g5, %o4
489          or        %o5, %g4, %o5
490          stx       %o4, [%o2 + 0xe0]   ! k[56, 57]
491          stx       %o5, [%o2 + 0xe8]   ! k[58, 59]
492
493          .word     0x99b0221c !movdtox %f28,%o4            ! k[ 0,  1]
494          .word     0x9bb0221e !movdtox %f30,%o5            ! k[ 2,  3]
495          srlx      %o4, 64-45, %g4
496          sllx      %o4, 45, %o4
497          srlx      %o5, 64-45, %g5
498          sllx      %o5, 45, %o5
499          or        %o4, %g5, %o4
500          or        %o5, %g4, %o5
501          stx       %o4, [%o2 + 0x60]   ! k[24, 25]
502          stx       %o5, [%o2 + 0x68]   ! k[26, 27]
503          srlx      %o4, 64-15, %g4
504          sllx      %o4, 15, %o4
505          srlx      %o5, 64-15, %g5
506          sllx      %o5, 15, %o5
507          or        %o4, %g5, %o4
508          or        %o5, %g4, %o5
509          stx       %o4, [%o2 + 0x80]   ! k[32, 33]
510          stx       %o5, [%o2 + 0x88]   ! k[34, 35]
511          srlx      %o4, 64-17, %g4
512          sllx      %o4, 17, %o4
513          srlx      %o5, 64-17, %g5
514          sllx      %o5, 17, %o5
515          or        %o4, %g5, %o4
516          or        %o5, %g4, %o5
517          stx       %o4, [%o2 + 0xb0]   ! k[44, 45]
518          stx       %o5, [%o2 + 0xb8]   ! k[46, 47]
519          srlx      %o4, 64-34, %g4
520          sllx      %o4, 34, %o4
521          srlx      %o5, 64-34, %g5
522          sllx      %o5, 34, %o5
523          or        %o4, %g5, %o4
524          or        %o5, %g4, %o5
525          stx       %o4, [%o2 + 0xf0]   ! k[60, 61]
526          stx       %o5, [%o2 + 0xf8]   ! k[62, 63]
527
528          mov                 4, %o3
529          st                  %o3, [%o2 + 0x110]
530          retl
531          xor                 %o0, %o0, %o0
532.type     cmll_t4_set_key,#function
533.size     cmll_t4_set_key,.-cmll_t4_set_key
534.align    32
535SIGMA:
536          .long     0xa09e667f, 0x3bcc908b, 0xb67ae858, 0x4caa73b2
537          .long     0xc6ef372f, 0xe94f82be, 0x54ff53a5, 0xf1d36f1c
538          .long     0x10e527fa, 0xde682d1d, 0xb05688c2, 0xb3e6c1fd
539.type     SIGMA,#object
540.size     SIGMA,.-SIGMA
541.asciz    "Camellia for SPARC T4, David S. Miller, Andy Polyakov"
542.align    32
543_cmll128_load_enckey:
544          ldx                 [%i3 + 0], %g4
545          ldx                 [%i3 + 8], %g5
546          ldd                 [%i3 + 16], %f16
547          ldd                 [%i3 + 24], %f18
548          ldd                 [%i3 + 32], %f20
549          ldd                 [%i3 + 40], %f22
550          ldd                 [%i3 + 48], %f24
551          ldd                 [%i3 + 56], %f26
552          ldd                 [%i3 + 64], %f28
553          ldd                 [%i3 + 72], %f30
554          ldd                 [%i3 + 80], %f32
555          ldd                 [%i3 + 88], %f34
556          ldd                 [%i3 + 96], %f36
557          ldd                 [%i3 + 104], %f38
558          ldd                 [%i3 + 112], %f40
559          ldd                 [%i3 + 120], %f42
560          ldd                 [%i3 + 128], %f44
561          ldd                 [%i3 + 136], %f46
562          ldd                 [%i3 + 144], %f48
563          ldd                 [%i3 + 152], %f50
564          ldd                 [%i3 + 160], %f52
565          ldd                 [%i3 + 168], %f54
566          ldd                 [%i3 + 176], %f56
567          ldd                 [%i3 + 184], %f58
568          ldd                 [%i3 + 192], %f60
569          ldd                 [%i3 + 200], %f62
570          retl
571          nop
572.type     _cmll128_load_enckey,#function
573.size     _cmll128_load_enckey,.-_cmll128_load_enckey
574_cmll256_load_enckey=_cmll128_load_enckey
575
576.align    32
577_cmll256_load_deckey:
578          ldd                 [%i3 + 64], %f62
579          ldd                 [%i3 + 72], %f60
580          b                   .Load_deckey
581          add                 %i3, 64, %i3
582_cmll128_load_deckey:
583          ldd                 [%i3 + 0], %f60
584          ldd                 [%i3 + 8], %f62
585.Load_deckey:
586          ldd                 [%i3 + 16], %f58
587          ldd                 [%i3 + 24], %f56
588          ldd                 [%i3 + 32], %f54
589          ldd                 [%i3 + 40], %f52
590          ldd                 [%i3 + 48], %f50
591          ldd                 [%i3 + 56], %f48
592          ldd                 [%i3 + 64], %f46
593          ldd                 [%i3 + 72], %f44
594          ldd                 [%i3 + 80], %f42
595          ldd                 [%i3 + 88], %f40
596          ldd                 [%i3 + 96], %f38
597          ldd                 [%i3 + 104], %f36
598          ldd                 [%i3 + 112], %f34
599          ldd                 [%i3 + 120], %f32
600          ldd                 [%i3 + 128], %f30
601          ldd                 [%i3 + 136], %f28
602          ldd                 [%i3 + 144], %f26
603          ldd                 [%i3 + 152], %f24
604          ldd                 [%i3 + 160], %f22
605          ldd                 [%i3 + 168], %f20
606          ldd                 [%i3 + 176], %f18
607          ldd                 [%i3 + 184], %f16
608          ldx                 [%i3 + 192], %g4
609          retl
610          ldx                 [%i3 + 200], %g5
611.type     _cmll256_load_deckey,#function
612.size     _cmll256_load_deckey,.-_cmll256_load_deckey
613
614.align    32
615_cmll128_encrypt_1x:
616          .word     0x84cc0182 !camellia_f        %f16,%f2,%f0,%f2
617          .word     0x80cc8580 !camellia_f        %f18,%f0,%f2,%f0
618          .word     0x84cd0182 !camellia_f        %f20,%f2,%f0,%f2
619          .word     0x80cd8580 !camellia_f        %f22,%f0,%f2,%f0
620          .word     0x84ce0182 !camellia_f        %f24,%f2,%f0,%f2
621          .word     0x80ce8580 !camellia_f        %f26,%f0,%f2,%f0
622          .word     0x81b72780 !camellia_fl       %f28,%f0,%f0
623          .word     0x85b7a7a2 !camellia_fli      %f30,%f2,%f2
624          .word     0x84c84182 !camellia_f        %f32,%f2,%f0,%f2
625          .word     0x80c8c580 !camellia_f        %f34,%f0,%f2,%f0
626          .word     0x84c94182 !camellia_f        %f36,%f2,%f0,%f2
627          .word     0x80c9c580 !camellia_f        %f38,%f0,%f2,%f0
628          .word     0x84ca4182 !camellia_f        %f40,%f2,%f0,%f2
629          .word     0x80cac580 !camellia_f        %f42,%f0,%f2,%f0
630          .word     0x81b36780 !camellia_fl       %f44,%f0,%f0
631          .word     0x85b3e7a2 !camellia_fli      %f46,%f2,%f2
632          .word     0x84cc4182 !camellia_f        %f48,%f2,%f0,%f2
633          .word     0x80ccc580 !camellia_f        %f50,%f0,%f2,%f0
634          .word     0x84cd4182 !camellia_f        %f52,%f2,%f0,%f2
635          .word     0x80cdc580 !camellia_f        %f54,%f0,%f2,%f0
636          .word     0x88ce4182 !camellia_f        %f56,%f2,%f0,%f4
637          .word     0x84cec980 !camellia_f        %f58,%f0,%f4,%f2
638          .word     0x81b74d84 !fxor    %f60,%f4,%f0
639          retl
640          .word     0x85b7cd82 !fxor    %f62,%f2,%f2
641.type     _cmll128_encrypt_1x,#function
642.size     _cmll128_encrypt_1x,.-_cmll128_encrypt_1x
643_cmll128_decrypt_1x=_cmll128_encrypt_1x
644
645.align    32
646_cmll128_encrypt_2x:
647          .word     0x84cc0182 !camellia_f        %f16,%f2,%f0,%f2
648          .word     0x8ccc0986 !camellia_f        %f16,%f6,%f4,%f6
649          .word     0x80cc8580 !camellia_f        %f18,%f0,%f2,%f0
650          .word     0x88cc8d84 !camellia_f        %f18,%f4,%f6,%f4
651          .word     0x84cd0182 !camellia_f        %f20,%f2,%f0,%f2
652          .word     0x8ccd0986 !camellia_f        %f20,%f6,%f4,%f6
653          .word     0x80cd8580 !camellia_f        %f22,%f0,%f2,%f0
654          .word     0x88cd8d84 !camellia_f        %f22,%f4,%f6,%f4
655          .word     0x84ce0182 !camellia_f        %f24,%f2,%f0,%f2
656          .word     0x8cce0986 !camellia_f        %f24,%f6,%f4,%f6
657          .word     0x80ce8580 !camellia_f        %f26,%f0,%f2,%f0
658          .word     0x88ce8d84 !camellia_f        %f26,%f4,%f6,%f4
659          .word     0x81b72780 !camellia_fl       %f28,%f0,%f0
660          .word     0x89b72784 !camellia_fl       %f28,%f4,%f4
661          .word     0x85b7a7a2 !camellia_fli      %f30,%f2,%f2
662          .word     0x8db7a7a6 !camellia_fli      %f30,%f6,%f6
663          .word     0x84c84182 !camellia_f        %f32,%f2,%f0,%f2
664          .word     0x8cc84986 !camellia_f        %f32,%f6,%f4,%f6
665          .word     0x80c8c580 !camellia_f        %f34,%f0,%f2,%f0
666          .word     0x88c8cd84 !camellia_f        %f34,%f4,%f6,%f4
667          .word     0x84c94182 !camellia_f        %f36,%f2,%f0,%f2
668          .word     0x8cc94986 !camellia_f        %f36,%f6,%f4,%f6
669          .word     0x80c9c580 !camellia_f        %f38,%f0,%f2,%f0
670          .word     0x88c9cd84 !camellia_f        %f38,%f4,%f6,%f4
671          .word     0x84ca4182 !camellia_f        %f40,%f2,%f0,%f2
672          .word     0x8cca4986 !camellia_f        %f40,%f6,%f4,%f6
673          .word     0x80cac580 !camellia_f        %f42,%f0,%f2,%f0
674          .word     0x88cacd84 !camellia_f        %f42,%f4,%f6,%f4
675          .word     0x81b36780 !camellia_fl       %f44,%f0,%f0
676          .word     0x89b36784 !camellia_fl       %f44,%f4,%f4
677          .word     0x85b3e7a2 !camellia_fli      %f46,%f2,%f2
678          .word     0x8db3e7a6 !camellia_fli      %f46,%f6,%f6
679          .word     0x84cc4182 !camellia_f        %f48,%f2,%f0,%f2
680          .word     0x8ccc4986 !camellia_f        %f48,%f6,%f4,%f6
681          .word     0x80ccc580 !camellia_f        %f50,%f0,%f2,%f0
682          .word     0x88cccd84 !camellia_f        %f50,%f4,%f6,%f4
683          .word     0x84cd4182 !camellia_f        %f52,%f2,%f0,%f2
684          .word     0x8ccd4986 !camellia_f        %f52,%f6,%f4,%f6
685          .word     0x80cdc580 !camellia_f        %f54,%f0,%f2,%f0
686          .word     0x88cdcd84 !camellia_f        %f54,%f4,%f6,%f4
687          .word     0x90ce4182 !camellia_f        %f56,%f2,%f0,%f8
688          .word     0x94ce4986 !camellia_f        %f56,%f6,%f4,%f10
689          .word     0x84ced180 !camellia_f        %f58,%f0,%f8,%f2
690          .word     0x8cced584 !camellia_f        %f58,%f4,%f10,%f6
691          .word     0x81b74d88 !fxor    %f60,%f8,%f0
692          .word     0x89b74d8a !fxor    %f60,%f10,%f4
693          .word     0x85b7cd82 !fxor    %f62,%f2,%f2
694          retl
695          .word     0x8db7cd86 !fxor    %f62,%f6,%f6
696.type     _cmll128_encrypt_2x,#function
697.size     _cmll128_encrypt_2x,.-_cmll128_encrypt_2x
698_cmll128_decrypt_2x=_cmll128_encrypt_2x
699
700.align    32
701_cmll256_encrypt_1x:
702          .word     0x84cc0182 !camellia_f        %f16,%f2,%f0,%f2
703          .word     0x80cc8580 !camellia_f        %f18,%f0,%f2,%f0
704          ldd                 [%i3 + 208], %f16
705          ldd                 [%i3 + 216], %f18
706          .word     0x84cd0182 !camellia_f        %f20,%f2,%f0,%f2
707          .word     0x80cd8580 !camellia_f        %f22,%f0,%f2,%f0
708          ldd                 [%i3 + 224], %f20
709          ldd                 [%i3 + 232], %f22
710          .word     0x84ce0182 !camellia_f        %f24,%f2,%f0,%f2
711          .word     0x80ce8580 !camellia_f        %f26,%f0,%f2,%f0
712          ldd                 [%i3 + 240], %f24
713          ldd                 [%i3 + 248], %f26
714          .word     0x81b72780 !camellia_fl       %f28,%f0,%f0
715          .word     0x85b7a7a2 !camellia_fli      %f30,%f2,%f2
716          ldd                 [%i3 + 256], %f28
717          ldd                 [%i3 + 264], %f30
718          .word     0x84c84182 !camellia_f        %f32,%f2,%f0,%f2
719          .word     0x80c8c580 !camellia_f        %f34,%f0,%f2,%f0
720          .word     0x84c94182 !camellia_f        %f36,%f2,%f0,%f2
721          .word     0x80c9c580 !camellia_f        %f38,%f0,%f2,%f0
722          .word     0x84ca4182 !camellia_f        %f40,%f2,%f0,%f2
723          .word     0x80cac580 !camellia_f        %f42,%f0,%f2,%f0
724          .word     0x81b36780 !camellia_fl       %f44,%f0,%f0
725          .word     0x85b3e7a2 !camellia_fli      %f46,%f2,%f2
726          .word     0x84cc4182 !camellia_f        %f48,%f2,%f0,%f2
727          .word     0x80ccc580 !camellia_f        %f50,%f0,%f2,%f0
728          .word     0x84cd4182 !camellia_f        %f52,%f2,%f0,%f2
729          .word     0x80cdc580 !camellia_f        %f54,%f0,%f2,%f0
730          .word     0x84ce4182 !camellia_f        %f56,%f2,%f0,%f2
731          .word     0x80cec580 !camellia_f        %f58,%f0,%f2,%f0
732          .word     0x81b76780 !camellia_fl       %f60,%f0,%f0
733          .word     0x85b7e7a2 !camellia_fli      %f62,%f2,%f2
734          .word     0x84cc0182 !camellia_f        %f16,%f2,%f0,%f2
735          .word     0x80cc8580 !camellia_f        %f18,%f0,%f2,%f0
736          ldd                 [%i3 + 16], %f16
737          ldd                 [%i3 + 24], %f18
738          .word     0x84cd0182 !camellia_f        %f20,%f2,%f0,%f2
739          .word     0x80cd8580 !camellia_f        %f22,%f0,%f2,%f0
740          ldd                 [%i3 + 32], %f20
741          ldd                 [%i3 + 40], %f22
742          .word     0x88ce0182 !camellia_f        %f24,%f2,%f0,%f4
743          .word     0x84ce8980 !camellia_f        %f26,%f0,%f4,%f2
744          ldd                 [%i3 + 48], %f24
745          ldd                 [%i3 + 56], %f26
746          .word     0x81b70d84 !fxor    %f28,%f4,%f0
747          .word     0x85b78d82 !fxor    %f30,%f2,%f2
748          ldd                 [%i3 + 64], %f28
749          retl
750          ldd                 [%i3 + 72], %f30
751.type     _cmll256_encrypt_1x,#function
752.size     _cmll256_encrypt_1x,.-_cmll256_encrypt_1x
753
754.align    32
755_cmll256_encrypt_2x:
756          .word     0x84cc0182 !camellia_f        %f16,%f2,%f0,%f2
757          .word     0x8ccc0986 !camellia_f        %f16,%f6,%f4,%f6
758          .word     0x80cc8580 !camellia_f        %f18,%f0,%f2,%f0
759          .word     0x88cc8d84 !camellia_f        %f18,%f4,%f6,%f4
760          ldd                 [%i3 + 208], %f16
761          ldd                 [%i3 + 216], %f18
762          .word     0x84cd0182 !camellia_f        %f20,%f2,%f0,%f2
763          .word     0x8ccd0986 !camellia_f        %f20,%f6,%f4,%f6
764          .word     0x80cd8580 !camellia_f        %f22,%f0,%f2,%f0
765          .word     0x88cd8d84 !camellia_f        %f22,%f4,%f6,%f4
766          ldd                 [%i3 + 224], %f20
767          ldd                 [%i3 + 232], %f22
768          .word     0x84ce0182 !camellia_f        %f24,%f2,%f0,%f2
769          .word     0x8cce0986 !camellia_f        %f24,%f6,%f4,%f6
770          .word     0x80ce8580 !camellia_f        %f26,%f0,%f2,%f0
771          .word     0x88ce8d84 !camellia_f        %f26,%f4,%f6,%f4
772          ldd                 [%i3 + 240], %f24
773          ldd                 [%i3 + 248], %f26
774          .word     0x81b72780 !camellia_fl       %f28,%f0,%f0
775          .word     0x89b72784 !camellia_fl       %f28,%f4,%f4
776          .word     0x85b7a7a2 !camellia_fli      %f30,%f2,%f2
777          .word     0x8db7a7a6 !camellia_fli      %f30,%f6,%f6
778          ldd                 [%i3 + 256], %f28
779          ldd                 [%i3 + 264], %f30
780          .word     0x84c84182 !camellia_f        %f32,%f2,%f0,%f2
781          .word     0x8cc84986 !camellia_f        %f32,%f6,%f4,%f6
782          .word     0x80c8c580 !camellia_f        %f34,%f0,%f2,%f0
783          .word     0x88c8cd84 !camellia_f        %f34,%f4,%f6,%f4
784          .word     0x84c94182 !camellia_f        %f36,%f2,%f0,%f2
785          .word     0x8cc94986 !camellia_f        %f36,%f6,%f4,%f6
786          .word     0x80c9c580 !camellia_f        %f38,%f0,%f2,%f0
787          .word     0x88c9cd84 !camellia_f        %f38,%f4,%f6,%f4
788          .word     0x84ca4182 !camellia_f        %f40,%f2,%f0,%f2
789          .word     0x8cca4986 !camellia_f        %f40,%f6,%f4,%f6
790          .word     0x80cac580 !camellia_f        %f42,%f0,%f2,%f0
791          .word     0x88cacd84 !camellia_f        %f42,%f4,%f6,%f4
792          .word     0x81b36780 !camellia_fl       %f44,%f0,%f0
793          .word     0x89b36784 !camellia_fl       %f44,%f4,%f4
794          .word     0x85b3e7a2 !camellia_fli      %f46,%f2,%f2
795          .word     0x8db3e7a6 !camellia_fli      %f46,%f6,%f6
796          .word     0x84cc4182 !camellia_f        %f48,%f2,%f0,%f2
797          .word     0x8ccc4986 !camellia_f        %f48,%f6,%f4,%f6
798          .word     0x80ccc580 !camellia_f        %f50,%f0,%f2,%f0
799          .word     0x88cccd84 !camellia_f        %f50,%f4,%f6,%f4
800          .word     0x84cd4182 !camellia_f        %f52,%f2,%f0,%f2
801          .word     0x8ccd4986 !camellia_f        %f52,%f6,%f4,%f6
802          .word     0x80cdc580 !camellia_f        %f54,%f0,%f2,%f0
803          .word     0x88cdcd84 !camellia_f        %f54,%f4,%f6,%f4
804          .word     0x84ce4182 !camellia_f        %f56,%f2,%f0,%f2
805          .word     0x8cce4986 !camellia_f        %f56,%f6,%f4,%f6
806          .word     0x80cec580 !camellia_f        %f58,%f0,%f2,%f0
807          .word     0x88cecd84 !camellia_f        %f58,%f4,%f6,%f4
808          .word     0x81b76780 !camellia_fl       %f60,%f0,%f0
809          .word     0x89b76784 !camellia_fl       %f60,%f4,%f4
810          .word     0x85b7e7a2 !camellia_fli      %f62,%f2,%f2
811          .word     0x8db7e7a6 !camellia_fli      %f62,%f6,%f6
812          .word     0x84cc0182 !camellia_f        %f16,%f2,%f0,%f2
813          .word     0x8ccc0986 !camellia_f        %f16,%f6,%f4,%f6
814          .word     0x80cc8580 !camellia_f        %f18,%f0,%f2,%f0
815          .word     0x88cc8d84 !camellia_f        %f18,%f4,%f6,%f4
816          ldd                 [%i3 + 16], %f16
817          ldd                 [%i3 + 24], %f18
818          .word     0x84cd0182 !camellia_f        %f20,%f2,%f0,%f2
819          .word     0x8ccd0986 !camellia_f        %f20,%f6,%f4,%f6
820          .word     0x80cd8580 !camellia_f        %f22,%f0,%f2,%f0
821          .word     0x88cd8d84 !camellia_f        %f22,%f4,%f6,%f4
822          ldd                 [%i3 + 32], %f20
823          ldd                 [%i3 + 40], %f22
824          .word     0x90ce0182 !camellia_f        %f24,%f2,%f0,%f8
825          .word     0x94ce0986 !camellia_f        %f24,%f6,%f4,%f10
826          .word     0x84ce9180 !camellia_f        %f26,%f0,%f8,%f2
827          .word     0x8cce9584 !camellia_f        %f26,%f4,%f10,%f6
828          ldd                 [%i3 + 48], %f24
829          ldd                 [%i3 + 56], %f26
830          .word     0x81b70d88 !fxor    %f28,%f8,%f0
831          .word     0x89b70d8a !fxor    %f28,%f10,%f4
832          .word     0x85b78d82 !fxor    %f30,%f2,%f2
833          .word     0x8db78d86 !fxor    %f30,%f6,%f6
834          ldd                 [%i3 + 64], %f28
835          retl
836          ldd                 [%i3 + 72], %f30
837.type     _cmll256_encrypt_2x,#function
838.size     _cmll256_encrypt_2x,.-_cmll256_encrypt_2x
839
840.align    32
841_cmll256_decrypt_1x:
842          .word     0x84cc0182 !camellia_f        %f16,%f2,%f0,%f2
843          .word     0x80cc8580 !camellia_f        %f18,%f0,%f2,%f0
844          ldd                 [%i3 - 8], %f16
845          ldd                 [%i3 - 16], %f18
846          .word     0x84cd0182 !camellia_f        %f20,%f2,%f0,%f2
847          .word     0x80cd8580 !camellia_f        %f22,%f0,%f2,%f0
848          ldd                 [%i3 - 24], %f20
849          ldd                 [%i3 - 32], %f22
850          .word     0x84ce0182 !camellia_f        %f24,%f2,%f0,%f2
851          .word     0x80ce8580 !camellia_f        %f26,%f0,%f2,%f0
852          ldd                 [%i3 - 40], %f24
853          ldd                 [%i3 - 48], %f26
854          .word     0x81b72780 !camellia_fl       %f28,%f0,%f0
855          .word     0x85b7a7a2 !camellia_fli      %f30,%f2,%f2
856          ldd                 [%i3 - 56], %f28
857          ldd                 [%i3 - 64], %f30
858          .word     0x84c84182 !camellia_f        %f32,%f2,%f0,%f2
859          .word     0x80c8c580 !camellia_f        %f34,%f0,%f2,%f0
860          .word     0x84c94182 !camellia_f        %f36,%f2,%f0,%f2
861          .word     0x80c9c580 !camellia_f        %f38,%f0,%f2,%f0
862          .word     0x84ca4182 !camellia_f        %f40,%f2,%f0,%f2
863          .word     0x80cac580 !camellia_f        %f42,%f0,%f2,%f0
864          .word     0x81b36780 !camellia_fl       %f44,%f0,%f0
865          .word     0x85b3e7a2 !camellia_fli      %f46,%f2,%f2
866          .word     0x84cc4182 !camellia_f        %f48,%f2,%f0,%f2
867          .word     0x80ccc580 !camellia_f        %f50,%f0,%f2,%f0
868          .word     0x84cd4182 !camellia_f        %f52,%f2,%f0,%f2
869          .word     0x80cdc580 !camellia_f        %f54,%f0,%f2,%f0
870          .word     0x84ce4182 !camellia_f        %f56,%f2,%f0,%f2
871          .word     0x80cec580 !camellia_f        %f58,%f0,%f2,%f0
872          .word     0x81b76780 !camellia_fl       %f60,%f0,%f0
873          .word     0x85b7e7a2 !camellia_fli      %f62,%f2,%f2
874          .word     0x84cc0182 !camellia_f        %f16,%f2,%f0,%f2
875          .word     0x80cc8580 !camellia_f        %f18,%f0,%f2,%f0
876          ldd                 [%i3 + 184], %f16
877          ldd                 [%i3 + 176], %f18
878          .word     0x84cd0182 !camellia_f        %f20,%f2,%f0,%f2
879          .word     0x80cd8580 !camellia_f        %f22,%f0,%f2,%f0
880          ldd                 [%i3 + 168], %f20
881          ldd                 [%i3 + 160], %f22
882          .word     0x88ce0182 !camellia_f        %f24,%f2,%f0,%f4
883          .word     0x84ce8980 !camellia_f        %f26,%f0,%f4,%f2
884          ldd                 [%i3 + 152], %f24
885          ldd                 [%i3 + 144], %f26
886          .word     0x81b78d84 !fxor    %f30,%f4,%f0
887          .word     0x85b70d82 !fxor    %f28,%f2,%f2
888          ldd                 [%i3 + 136], %f28
889          retl
890          ldd                 [%i3 + 128], %f30
891.type     _cmll256_decrypt_1x,#function
892.size     _cmll256_decrypt_1x,.-_cmll256_decrypt_1x
893
894.align    32
895_cmll256_decrypt_2x:
896          .word     0x84cc0182 !camellia_f        %f16,%f2,%f0,%f2
897          .word     0x8ccc0986 !camellia_f        %f16,%f6,%f4,%f6
898          .word     0x80cc8580 !camellia_f        %f18,%f0,%f2,%f0
899          .word     0x88cc8d84 !camellia_f        %f18,%f4,%f6,%f4
900          ldd                 [%i3 - 8], %f16
901          ldd                 [%i3 - 16], %f18
902          .word     0x84cd0182 !camellia_f        %f20,%f2,%f0,%f2
903          .word     0x8ccd0986 !camellia_f        %f20,%f6,%f4,%f6
904          .word     0x80cd8580 !camellia_f        %f22,%f0,%f2,%f0
905          .word     0x88cd8d84 !camellia_f        %f22,%f4,%f6,%f4
906          ldd                 [%i3 - 24], %f20
907          ldd                 [%i3 - 32], %f22
908          .word     0x84ce0182 !camellia_f        %f24,%f2,%f0,%f2
909          .word     0x8cce0986 !camellia_f        %f24,%f6,%f4,%f6
910          .word     0x80ce8580 !camellia_f        %f26,%f0,%f2,%f0
911          .word     0x88ce8d84 !camellia_f        %f26,%f4,%f6,%f4
912          ldd                 [%i3 - 40], %f24
913          ldd                 [%i3 - 48], %f26
914          .word     0x81b72780 !camellia_fl       %f28,%f0,%f0
915          .word     0x89b72784 !camellia_fl       %f28,%f4,%f4
916          .word     0x85b7a7a2 !camellia_fli      %f30,%f2,%f2
917          .word     0x8db7a7a6 !camellia_fli      %f30,%f6,%f6
918          ldd                 [%i3 - 56], %f28
919          ldd                 [%i3 - 64], %f30
920          .word     0x84c84182 !camellia_f        %f32,%f2,%f0,%f2
921          .word     0x8cc84986 !camellia_f        %f32,%f6,%f4,%f6
922          .word     0x80c8c580 !camellia_f        %f34,%f0,%f2,%f0
923          .word     0x88c8cd84 !camellia_f        %f34,%f4,%f6,%f4
924          .word     0x84c94182 !camellia_f        %f36,%f2,%f0,%f2
925          .word     0x8cc94986 !camellia_f        %f36,%f6,%f4,%f6
926          .word     0x80c9c580 !camellia_f        %f38,%f0,%f2,%f0
927          .word     0x88c9cd84 !camellia_f        %f38,%f4,%f6,%f4
928          .word     0x84ca4182 !camellia_f        %f40,%f2,%f0,%f2
929          .word     0x8cca4986 !camellia_f        %f40,%f6,%f4,%f6
930          .word     0x80cac580 !camellia_f        %f42,%f0,%f2,%f0
931          .word     0x88cacd84 !camellia_f        %f42,%f4,%f6,%f4
932          .word     0x81b36780 !camellia_fl       %f44,%f0,%f0
933          .word     0x89b36784 !camellia_fl       %f44,%f4,%f4
934          .word     0x85b3e7a2 !camellia_fli      %f46,%f2,%f2
935          .word     0x8db3e7a6 !camellia_fli      %f46,%f6,%f6
936          .word     0x84cc4182 !camellia_f        %f48,%f2,%f0,%f2
937          .word     0x8ccc4986 !camellia_f        %f48,%f6,%f4,%f6
938          .word     0x80ccc580 !camellia_f        %f50,%f0,%f2,%f0
939          .word     0x88cccd84 !camellia_f        %f50,%f4,%f6,%f4
940          .word     0x84cd4182 !camellia_f        %f52,%f2,%f0,%f2
941          .word     0x8ccd4986 !camellia_f        %f52,%f6,%f4,%f6
942          .word     0x80cdc580 !camellia_f        %f54,%f0,%f2,%f0
943          .word     0x88cdcd84 !camellia_f        %f54,%f4,%f6,%f4
944          .word     0x84ce4182 !camellia_f        %f56,%f2,%f0,%f2
945          .word     0x8cce4986 !camellia_f        %f56,%f6,%f4,%f6
946          .word     0x80cec580 !camellia_f        %f58,%f0,%f2,%f0
947          .word     0x88cecd84 !camellia_f        %f58,%f4,%f6,%f4
948          .word     0x81b76780 !camellia_fl       %f60,%f0,%f0
949          .word     0x89b76784 !camellia_fl       %f60,%f4,%f4
950          .word     0x85b7e7a2 !camellia_fli      %f62,%f2,%f2
951          .word     0x8db7e7a6 !camellia_fli      %f62,%f6,%f6
952          .word     0x84cc0182 !camellia_f        %f16,%f2,%f0,%f2
953          .word     0x8ccc0986 !camellia_f        %f16,%f6,%f4,%f6
954          .word     0x80cc8580 !camellia_f        %f18,%f0,%f2,%f0
955          .word     0x88cc8d84 !camellia_f        %f18,%f4,%f6,%f4
956          ldd                 [%i3 + 184], %f16
957          ldd                 [%i3 + 176], %f18
958          .word     0x84cd0182 !camellia_f        %f20,%f2,%f0,%f2
959          .word     0x8ccd0986 !camellia_f        %f20,%f6,%f4,%f6
960          .word     0x80cd8580 !camellia_f        %f22,%f0,%f2,%f0
961          .word     0x88cd8d84 !camellia_f        %f22,%f4,%f6,%f4
962          ldd                 [%i3 + 168], %f20
963          ldd                 [%i3 + 160], %f22
964          .word     0x90ce0182 !camellia_f        %f24,%f2,%f0,%f8
965          .word     0x94ce0986 !camellia_f        %f24,%f6,%f4,%f10
966          .word     0x84ce9180 !camellia_f        %f26,%f0,%f8,%f2
967          .word     0x8cce9584 !camellia_f        %f26,%f4,%f10,%f6
968          ldd                 [%i3 + 152], %f24
969          ldd                 [%i3 + 144], %f26
970          .word     0x81b78d88 !fxor    %f30,%f8,%f0
971          .word     0x89b78d8a !fxor    %f30,%f10,%f4
972          .word     0x85b70d82 !fxor    %f28,%f2,%f2
973          .word     0x8db70d86 !fxor    %f28,%f6,%f6
974          ldd                 [%i3 + 136], %f28
975          retl
976          ldd                 [%i3 + 128], %f30
977.type     _cmll256_decrypt_2x,#function
978.size     _cmll256_decrypt_2x,.-_cmll256_decrypt_2x
979.globl    cmll128_t4_cbc_encrypt
980.align    32
981cmll128_t4_cbc_encrypt:
982          save                %sp, -STACK_FRAME, %sp
983          cmp                 %i2, 0
984          be,pn               SIZE_T_CC, .L128_cbc_enc_abort
985          srln                %i2, 0, %i2                   ! needed on v8+, "nop" on v9
986          sub                 %i0, %i1, %l5       ! %i0!=%i1
987          ld                  [%i4 + 0], %f0
988          ld                  [%i4 + 4], %f1
989          ld                  [%i4 + 8], %f2
990          ld                  [%i4 + 12], %f3
991          prefetch  [%i0], 20
992          prefetch  [%i0 + 63], 20
993          call                _cmll128_load_enckey
994          and                 %i0, 7, %l0
995          andn                %i0, 7, %i0
996          sll                 %l0, 3, %l0
997          mov                 64, %l1
998          mov                 0xff, %l3
999          sub                 %l1, %l0, %l1
1000          and                 %i1, 7, %l2
1001          cmp                 %i2, 127
1002          movrnz              %l2, 0, %l5                   ! if (    %i1&7 ||
1003          movleu              SIZE_T_CC, 0, %l5   !         %i2<128 ||
1004          brnz,pn             %l5, .L128cbc_enc_blk         !         %i0==%i1)
1005          srl                 %l3, %l2, %l3
1006
1007          .word     0xb3b64340 !alignaddrl        %i1,%g0,%i1
1008          srlx                %i2, 4, %i2
1009          prefetch  [%i1], 22
1010
1011.L128_cbc_enc_loop:
1012          ldx                 [%i0 + 0], %o0
1013          brz,pt              %l0, 4f
1014          ldx                 [%i0 + 8], %o1
1015
1016          ldx                 [%i0 + 16], %o2
1017          sllx                %o0, %l0, %o0
1018          srlx                %o1, %l1, %g1
1019          sllx                %o1, %l0, %o1
1020          or                  %g1, %o0, %o0
1021          srlx                %o2, %l1, %o2
1022          or                  %o2, %o1, %o1
10234:
1024          xor                 %g4, %o0, %o0                 ! ^= rk[0]
1025          xor                 %g5, %o1, %o1
1026          .word     0x99b02308 !movxtod %o0,%f12
1027          .word     0x9db02309 !movxtod %o1,%f14
1028
1029          .word     0x81b30d80 !fxor    %f12,%f0,%f0                  ! ^= ivec
1030          .word     0x85b38d82 !fxor    %f14,%f2,%f2
1031          prefetch  [%i1 + 63], 22
1032          prefetch  [%i0 + 16+63], 20
1033          call                _cmll128_encrypt_1x
1034          add                 %i0, 16, %i0
1035
1036          brnz,pn             %l2, 2f
1037          sub                 %i2, 1, %i2
1038
1039          std                 %f0, [%i1 + 0]
1040          std                 %f2, [%i1 + 8]
1041          brnz,pt             %i2, .L128_cbc_enc_loop
1042          add                 %i1, 16, %i1
1043          st                  %f0, [%i4 + 0]
1044          st                  %f1, [%i4 + 4]
1045          st                  %f2, [%i4 + 8]
1046          st                  %f3, [%i4 + 12]
1047.L128_cbc_enc_abort:
1048          ret
1049          restore
1050
1051.align    16
10522:        ldxa                [%i0]0x82, %o0                ! avoid read-after-write hazard
1053                                                            ! and ~3x deterioration
1054                                                            ! in inp==out case
1055          .word     0x89b00900 !faligndata        %f0,%f0,%f4                   ! handle unaligned output
1056          .word     0x8db00902 !faligndata        %f0,%f2,%f6
1057          .word     0x91b08902 !faligndata        %f2,%f2,%f8
1058
1059          stda                %f4, [%i1 + %l3]0xc0          ! partial store
1060          std                 %f6, [%i1 + 8]
1061          add                 %i1, 16, %i1
1062          orn                 %g0, %l3, %l3
1063          stda                %f8, [%i1 + %l3]0xc0          ! partial store
1064
1065          brnz,pt             %i2, .L128_cbc_enc_loop+4
1066          orn                 %g0, %l3, %l3
1067          st                  %f0, [%i4 + 0]
1068          st                  %f1, [%i4 + 4]
1069          st                  %f2, [%i4 + 8]
1070          st                  %f3, [%i4 + 12]
1071          ret
1072          restore
1073
1074!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1075.align    32
1076.L128cbc_enc_blk:
1077          add       %i1, %i2, %l5
1078          and       %l5, 63, %l5        ! tail
1079          sub       %i2, %l5, %i2
1080          add       %l5, 15, %l5        ! round up to 16n
1081          srlx      %i2, 4, %i2
1082          srl       %l5, 4, %l5
1083
1084.L128_cbc_enc_blk_loop:
1085          ldx                 [%i0 + 0], %o0
1086          brz,pt              %l0, 5f
1087          ldx                 [%i0 + 8], %o1
1088
1089          ldx                 [%i0 + 16], %o2
1090          sllx                %o0, %l0, %o0
1091          srlx                %o1, %l1, %g1
1092          sllx                %o1, %l0, %o1
1093          or                  %g1, %o0, %o0
1094          srlx                %o2, %l1, %o2
1095          or                  %o2, %o1, %o1
10965:
1097          xor                 %g4, %o0, %o0                 ! ^= rk[0]
1098          xor                 %g5, %o1, %o1
1099          .word     0x99b02308 !movxtod %o0,%f12
1100          .word     0x9db02309 !movxtod %o1,%f14
1101
1102          .word     0x81b30d80 !fxor    %f12,%f0,%f0                  ! ^= ivec
1103          .word     0x85b38d82 !fxor    %f14,%f2,%f2
1104          prefetch  [%i0 + 16+63], 20
1105          call                _cmll128_encrypt_1x
1106          add                 %i0, 16, %i0
1107          sub                 %i2, 1, %i2
1108
1109          stda                %f0, [%i1]0xe2                ! ASI_BLK_INIT, T4-specific
1110          add                 %i1, 8, %i1
1111          stda                %f2, [%i1]0xe2                ! ASI_BLK_INIT, T4-specific
1112          brnz,pt             %i2, .L128_cbc_enc_blk_loop
1113          add                 %i1, 8, %i1
1114
1115          membar              #StoreLoad|#StoreStore
1116          brnz,pt             %l5, .L128_cbc_enc_loop
1117          mov                 %l5, %i2
1118          st                  %f0, [%i4 + 0]
1119          st                  %f1, [%i4 + 4]
1120          st                  %f2, [%i4 + 8]
1121          st                  %f3, [%i4 + 12]
1122          ret
1123          restore
1124.type     cmll128_t4_cbc_encrypt,#function
1125.size     cmll128_t4_cbc_encrypt,.-cmll128_t4_cbc_encrypt
1126.globl    cmll256_t4_cbc_encrypt
1127.align    32
1128cmll256_t4_cbc_encrypt:
1129          save                %sp, -STACK_FRAME, %sp
1130          cmp                 %i2, 0
1131          be,pn               SIZE_T_CC, .L256_cbc_enc_abort
1132          srln                %i2, 0, %i2                   ! needed on v8+, "nop" on v9
1133          sub                 %i0, %i1, %l5       ! %i0!=%i1
1134          ld                  [%i4 + 0], %f0
1135          ld                  [%i4 + 4], %f1
1136          ld                  [%i4 + 8], %f2
1137          ld                  [%i4 + 12], %f3
1138          prefetch  [%i0], 20
1139          prefetch  [%i0 + 63], 20
1140          call                _cmll256_load_enckey
1141          and                 %i0, 7, %l0
1142          andn                %i0, 7, %i0
1143          sll                 %l0, 3, %l0
1144          mov                 64, %l1
1145          mov                 0xff, %l3
1146          sub                 %l1, %l0, %l1
1147          and                 %i1, 7, %l2
1148          cmp                 %i2, 127
1149          movrnz              %l2, 0, %l5                   ! if (    %i1&7 ||
1150          movleu              SIZE_T_CC, 0, %l5   !         %i2<128 ||
1151          brnz,pn             %l5, .L256cbc_enc_blk         !         %i0==%i1)
1152          srl                 %l3, %l2, %l3
1153
1154          .word     0xb3b64340 !alignaddrl        %i1,%g0,%i1
1155          srlx                %i2, 4, %i2
1156          prefetch  [%i1], 22
1157
1158.L256_cbc_enc_loop:
1159          ldx                 [%i0 + 0], %o0
1160          brz,pt              %l0, 4f
1161          ldx                 [%i0 + 8], %o1
1162
1163          ldx                 [%i0 + 16], %o2
1164          sllx                %o0, %l0, %o0
1165          srlx                %o1, %l1, %g1
1166          sllx                %o1, %l0, %o1
1167          or                  %g1, %o0, %o0
1168          srlx                %o2, %l1, %o2
1169          or                  %o2, %o1, %o1
11704:
1171          xor                 %g4, %o0, %o0                 ! ^= rk[0]
1172          xor                 %g5, %o1, %o1
1173          .word     0x99b02308 !movxtod %o0,%f12
1174          .word     0x9db02309 !movxtod %o1,%f14
1175
1176          .word     0x81b30d80 !fxor    %f12,%f0,%f0                  ! ^= ivec
1177          .word     0x85b38d82 !fxor    %f14,%f2,%f2
1178          prefetch  [%i1 + 63], 22
1179          prefetch  [%i0 + 16+63], 20
1180          call                _cmll256_encrypt_1x
1181          add                 %i0, 16, %i0
1182
1183          brnz,pn             %l2, 2f
1184          sub                 %i2, 1, %i2
1185
1186          std                 %f0, [%i1 + 0]
1187          std                 %f2, [%i1 + 8]
1188          brnz,pt             %i2, .L256_cbc_enc_loop
1189          add                 %i1, 16, %i1
1190          st                  %f0, [%i4 + 0]
1191          st                  %f1, [%i4 + 4]
1192          st                  %f2, [%i4 + 8]
1193          st                  %f3, [%i4 + 12]
1194.L256_cbc_enc_abort:
1195          ret
1196          restore
1197
1198.align    16
11992:        ldxa                [%i0]0x82, %o0                ! avoid read-after-write hazard
1200                                                            ! and ~3x deterioration
1201                                                            ! in inp==out case
1202          .word     0x89b00900 !faligndata        %f0,%f0,%f4                   ! handle unaligned output
1203          .word     0x8db00902 !faligndata        %f0,%f2,%f6
1204          .word     0x91b08902 !faligndata        %f2,%f2,%f8
1205
1206          stda                %f4, [%i1 + %l3]0xc0          ! partial store
1207          std                 %f6, [%i1 + 8]
1208          add                 %i1, 16, %i1
1209          orn                 %g0, %l3, %l3
1210          stda                %f8, [%i1 + %l3]0xc0          ! partial store
1211
1212          brnz,pt             %i2, .L256_cbc_enc_loop+4
1213          orn                 %g0, %l3, %l3
1214          st                  %f0, [%i4 + 0]
1215          st                  %f1, [%i4 + 4]
1216          st                  %f2, [%i4 + 8]
1217          st                  %f3, [%i4 + 12]
1218          ret
1219          restore
1220
1221!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1222.align    32
1223.L256cbc_enc_blk:
1224          add       %i1, %i2, %l5
1225          and       %l5, 63, %l5        ! tail
1226          sub       %i2, %l5, %i2
1227          add       %l5, 15, %l5        ! round up to 16n
1228          srlx      %i2, 4, %i2
1229          srl       %l5, 4, %l5
1230
1231.L256_cbc_enc_blk_loop:
1232          ldx                 [%i0 + 0], %o0
1233          brz,pt              %l0, 5f
1234          ldx                 [%i0 + 8], %o1
1235
1236          ldx                 [%i0 + 16], %o2
1237          sllx                %o0, %l0, %o0
1238          srlx                %o1, %l1, %g1
1239          sllx                %o1, %l0, %o1
1240          or                  %g1, %o0, %o0
1241          srlx                %o2, %l1, %o2
1242          or                  %o2, %o1, %o1
12435:
1244          xor                 %g4, %o0, %o0                 ! ^= rk[0]
1245          xor                 %g5, %o1, %o1
1246          .word     0x99b02308 !movxtod %o0,%f12
1247          .word     0x9db02309 !movxtod %o1,%f14
1248
1249          .word     0x81b30d80 !fxor    %f12,%f0,%f0                  ! ^= ivec
1250          .word     0x85b38d82 !fxor    %f14,%f2,%f2
1251          prefetch  [%i0 + 16+63], 20
1252          call                _cmll256_encrypt_1x
1253          add                 %i0, 16, %i0
1254          sub                 %i2, 1, %i2
1255
1256          stda                %f0, [%i1]0xe2                ! ASI_BLK_INIT, T4-specific
1257          add                 %i1, 8, %i1
1258          stda                %f2, [%i1]0xe2                ! ASI_BLK_INIT, T4-specific
1259          brnz,pt             %i2, .L256_cbc_enc_blk_loop
1260          add                 %i1, 8, %i1
1261
1262          membar              #StoreLoad|#StoreStore
1263          brnz,pt             %l5, .L256_cbc_enc_loop
1264          mov                 %l5, %i2
1265          st                  %f0, [%i4 + 0]
1266          st                  %f1, [%i4 + 4]
1267          st                  %f2, [%i4 + 8]
1268          st                  %f3, [%i4 + 12]
1269          ret
1270          restore
1271.type     cmll256_t4_cbc_encrypt,#function
1272.size     cmll256_t4_cbc_encrypt,.-cmll256_t4_cbc_encrypt
1273.globl    cmll128_t4_cbc_decrypt
1274.align    32
1275cmll128_t4_cbc_decrypt:
1276          save                %sp, -STACK_FRAME, %sp
1277          cmp                 %i2, 0
1278          be,pn               SIZE_T_CC, .L128_cbc_dec_abort
1279          srln                %i2, 0, %i2                   ! needed on v8+, "nop" on v9
1280          sub                 %i0, %i1, %l5       ! %i0!=%i1
1281          ld                  [%i4 + 0], %f12     ! load ivec
1282          ld                  [%i4 + 4], %f13
1283          ld                  [%i4 + 8], %f14
1284          ld                  [%i4 + 12], %f15
1285          prefetch  [%i0], 20
1286          prefetch  [%i0 + 63], 20
1287          call                _cmll128_load_deckey
1288          and                 %i0, 7, %l0
1289          andn                %i0, 7, %i0
1290          sll                 %l0, 3, %l0
1291          mov                 64, %l1
1292          mov                 0xff, %l3
1293          sub                 %l1, %l0, %l1
1294          and                 %i1, 7, %l2
1295          cmp                 %i2, 255
1296          movrnz              %l2, 0, %l5                   ! if (    %i1&7 ||
1297          movleu              SIZE_T_CC, 0, %l5   !         %i2<256 ||
1298          brnz,pn             %l5, .L128cbc_dec_blk         !         %i0==%i1)
1299          srl                 %l3, %l2, %l3
1300
1301          andcc               %i2, 16, %g0                  ! is number of blocks even?
1302          srlx                %i2, 4, %i2
1303          .word     0xb3b64340 !alignaddrl        %i1,%g0,%i1
1304          bz                  %icc, .L128_cbc_dec_loop2x
1305          prefetch  [%i1], 22
1306.L128_cbc_dec_loop:
1307          ldx                 [%i0 + 0], %o0
1308          brz,pt              %l0, 4f
1309          ldx                 [%i0 + 8], %o1
1310
1311          ldx                 [%i0 + 16], %o2
1312          sllx                %o0, %l0, %o0
1313          srlx                %o1, %l1, %g1
1314          sllx                %o1, %l0, %o1
1315          or                  %g1, %o0, %o0
1316          srlx                %o2, %l1, %o2
1317          or                  %o2, %o1, %o1
13184:
1319          xor                 %g4, %o0, %o2                 ! ^= rk[0]
1320          xor                 %g5, %o1, %o3
1321          .word     0x81b0230a !movxtod %o2,%f0
1322          .word     0x85b0230b !movxtod %o3,%f2
1323
1324          prefetch  [%i1 + 63], 22
1325          prefetch  [%i0 + 16+63], 20
1326          call                _cmll128_decrypt_1x
1327          add                 %i0, 16, %i0
1328
1329          .word     0x81b30d80 !fxor    %f12,%f0,%f0                  ! ^= ivec
1330          .word     0x85b38d82 !fxor    %f14,%f2,%f2
1331          .word     0x99b02308 !movxtod %o0,%f12
1332          .word     0x9db02309 !movxtod %o1,%f14
1333
1334          brnz,pn             %l2, 2f
1335          sub                 %i2, 1, %i2
1336
1337          std                 %f0, [%i1 + 0]
1338          std                 %f2, [%i1 + 8]
1339          brnz,pt             %i2, .L128_cbc_dec_loop2x
1340          add                 %i1, 16, %i1
1341          st                  %f12, [%i4 + 0]
1342          st                  %f13, [%i4 + 4]
1343          st                  %f14, [%i4 + 8]
1344          st                  %f15, [%i4 + 12]
1345.L128_cbc_dec_abort:
1346          ret
1347          restore
1348
1349.align    16
13502:        ldxa                [%i0]0x82, %o0                ! avoid read-after-write hazard
1351                                                            ! and ~3x deterioration
1352                                                            ! in inp==out case
1353          .word     0x89b00900 !faligndata        %f0,%f0,%f4                   ! handle unaligned output
1354          .word     0x8db00902 !faligndata        %f0,%f2,%f6
1355          .word     0x91b08902 !faligndata        %f2,%f2,%f8
1356
1357          stda                %f4, [%i1 + %l3]0xc0          ! partial store
1358          std                 %f6, [%i1 + 8]
1359          add                 %i1, 16, %i1
1360          orn                 %g0, %l3, %l3
1361          stda                %f8, [%i1 + %l3]0xc0          ! partial store
1362
1363          brnz,pt             %i2, .L128_cbc_dec_loop2x+4
1364          orn                 %g0, %l3, %l3
1365          st                  %f12, [%i4 + 0]
1366          st                  %f13, [%i4 + 4]
1367          st                  %f14, [%i4 + 8]
1368          st                  %f15, [%i4 + 12]
1369          ret
1370          restore
1371
1372!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1373.align    32
1374.L128_cbc_dec_loop2x:
1375          ldx                 [%i0 + 0], %o0
1376          ldx                 [%i0 + 8], %o1
1377          ldx                 [%i0 + 16], %o2
1378          brz,pt              %l0, 4f
1379          ldx                 [%i0 + 24], %o3
1380
1381          ldx                 [%i0 + 32], %o4
1382          sllx                %o0, %l0, %o0
1383          srlx                %o1, %l1, %g1
1384          or                  %g1, %o0, %o0
1385          sllx                %o1, %l0, %o1
1386          srlx                %o2, %l1, %g1
1387          or                  %g1, %o1, %o1
1388          sllx                %o2, %l0, %o2
1389          srlx                %o3, %l1, %g1
1390          or                  %g1, %o2, %o2
1391          sllx                %o3, %l0, %o3
1392          srlx                %o4, %l1, %o4
1393          or                  %o4, %o3, %o3
13944:
1395          xor                 %g4, %o0, %o4                 ! ^= rk[0]
1396          xor                 %g5, %o1, %o5
1397          .word     0x81b0230c !movxtod %o4,%f0
1398          .word     0x85b0230d !movxtod %o5,%f2
1399          xor                 %g4, %o2, %o4
1400          xor                 %g5, %o3, %o5
1401          .word     0x89b0230c !movxtod %o4,%f4
1402          .word     0x8db0230d !movxtod %o5,%f6
1403
1404          prefetch  [%i1 + 63], 22
1405          prefetch  [%i0 + 32+63], 20
1406          call                _cmll128_decrypt_2x
1407          add                 %i0, 32, %i0
1408
1409          .word     0x91b02308 !movxtod %o0,%f8
1410          .word     0x95b02309 !movxtod %o1,%f10
1411          .word     0x81b30d80 !fxor    %f12,%f0,%f0                  ! ^= ivec
1412          .word     0x85b38d82 !fxor    %f14,%f2,%f2
1413          .word     0x99b0230a !movxtod %o2,%f12
1414          .word     0x9db0230b !movxtod %o3,%f14
1415          .word     0x89b20d84 !fxor    %f8,%f4,%f4
1416          .word     0x8db28d86 !fxor    %f10,%f6,%f6
1417
1418          brnz,pn             %l2, 2f
1419          sub                 %i2, 2, %i2
1420
1421          std                 %f0, [%i1 + 0]
1422          std                 %f2, [%i1 + 8]
1423          std                 %f4, [%i1 + 16]
1424          std                 %f6, [%i1 + 24]
1425          brnz,pt             %i2, .L128_cbc_dec_loop2x
1426          add                 %i1, 32, %i1
1427          st                  %f12, [%i4 + 0]
1428          st                  %f13, [%i4 + 4]
1429          st                  %f14, [%i4 + 8]
1430          st                  %f15, [%i4 + 12]
1431          ret
1432          restore
1433
1434.align    16
14352:        ldxa                [%i0]0x82, %o0                ! avoid read-after-write hazard
1436                                                            ! and ~3x deterioration
1437                                                            ! in inp==out case
1438          .word     0x91b00900 !faligndata        %f0,%f0,%f8                   ! handle unaligned output
1439          .word     0x81b00902 !faligndata        %f0,%f2,%f0
1440          .word     0x85b08904 !faligndata        %f2,%f4,%f2
1441          .word     0x89b10906 !faligndata        %f4,%f6,%f4
1442          .word     0x8db18906 !faligndata        %f6,%f6,%f6
1443          stda                %f8, [%i1 + %l3]0xc0          ! partial store
1444          std                 %f0, [%i1 + 8]
1445          std                 %f2, [%i1 + 16]
1446          std                 %f4, [%i1 + 24]
1447          add                 %i1, 32, %i1
1448          orn                 %g0, %l3, %l3
1449          stda                %f6, [%i1 + %l3]0xc0          ! partial store
1450
1451          brnz,pt             %i2, .L128_cbc_dec_loop2x+4
1452          orn                 %g0, %l3, %l3
1453          st                  %f12, [%i4 + 0]
1454          st                  %f13, [%i4 + 4]
1455          st                  %f14, [%i4 + 8]
1456          st                  %f15, [%i4 + 12]
1457          ret
1458          restore
1459
1460!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1461.align    32
1462.L128cbc_dec_blk:
1463          add       %i1, %i2, %l5
1464          and       %l5, 63, %l5        ! tail
1465          sub       %i2, %l5, %i2
1466          add       %l5, 15, %l5        ! round up to 16n
1467          srlx      %i2, 4, %i2
1468          srl       %l5, 4, %l5
1469          sub       %i2, 1, %i2
1470          add       %l5, 1, %l5
1471
1472.L128_cbc_dec_blk_loop2x:
1473          ldx                 [%i0 + 0], %o0
1474          ldx                 [%i0 + 8], %o1
1475          ldx                 [%i0 + 16], %o2
1476          brz,pt              %l0, 5f
1477          ldx                 [%i0 + 24], %o3
1478
1479          ldx                 [%i0 + 32], %o4
1480          sllx                %o0, %l0, %o0
1481          srlx                %o1, %l1, %g1
1482          or                  %g1, %o0, %o0
1483          sllx                %o1, %l0, %o1
1484          srlx                %o2, %l1, %g1
1485          or                  %g1, %o1, %o1
1486          sllx                %o2, %l0, %o2
1487          srlx                %o3, %l1, %g1
1488          or                  %g1, %o2, %o2
1489          sllx                %o3, %l0, %o3
1490          srlx                %o4, %l1, %o4
1491          or                  %o4, %o3, %o3
14925:
1493          xor                 %g4, %o0, %o4                 ! ^= rk[0]
1494          xor                 %g5, %o1, %o5
1495          .word     0x81b0230c !movxtod %o4,%f0
1496          .word     0x85b0230d !movxtod %o5,%f2
1497          xor                 %g4, %o2, %o4
1498          xor                 %g5, %o3, %o5
1499          .word     0x89b0230c !movxtod %o4,%f4
1500          .word     0x8db0230d !movxtod %o5,%f6
1501
1502          prefetch  [%i0 + 32+63], 20
1503          call                _cmll128_decrypt_2x
1504          add                 %i0, 32, %i0
1505          subcc               %i2, 2, %i2
1506
1507          .word     0x91b02308 !movxtod %o0,%f8
1508          .word     0x95b02309 !movxtod %o1,%f10
1509          .word     0x81b30d80 !fxor    %f12,%f0,%f0                  ! ^= ivec
1510          .word     0x85b38d82 !fxor    %f14,%f2,%f2
1511          .word     0x99b0230a !movxtod %o2,%f12
1512          .word     0x9db0230b !movxtod %o3,%f14
1513          .word     0x89b20d84 !fxor    %f8,%f4,%f4
1514          .word     0x8db28d86 !fxor    %f10,%f6,%f6
1515
1516          stda                %f0, [%i1]0xe2                ! ASI_BLK_INIT, T4-specific
1517          add                 %i1, 8, %i1
1518          stda                %f2, [%i1]0xe2                ! ASI_BLK_INIT, T4-specific
1519          add                 %i1, 8, %i1
1520          stda                %f4, [%i1]0xe2                ! ASI_BLK_INIT, T4-specific
1521          add                 %i1, 8, %i1
1522          stda                %f6, [%i1]0xe2                ! ASI_BLK_INIT, T4-specific
1523          bgu,pt              SIZE_T_CC, .L128_cbc_dec_blk_loop2x
1524          add                 %i1, 8, %i1
1525
1526          add                 %l5, %i2, %i2
1527          andcc               %i2, 1, %g0                   ! is number of blocks even?
1528          membar              #StoreLoad|#StoreStore
1529          bnz,pt              %icc, .L128_cbc_dec_loop
1530          srl                 %i2, 0, %i2
1531          brnz,pn             %i2, .L128_cbc_dec_loop2x
1532          nop
1533          st                  %f12, [%i4 + 0]     ! write out ivec
1534          st                  %f13, [%i4 + 4]
1535          st                  %f14, [%i4 + 8]
1536          st                  %f15, [%i4 + 12]
1537          ret
1538          restore
1539.type     cmll128_t4_cbc_decrypt,#function
1540.size     cmll128_t4_cbc_decrypt,.-cmll128_t4_cbc_decrypt
1541.globl    cmll256_t4_cbc_decrypt
1542.align    32
1543cmll256_t4_cbc_decrypt:
1544          save                %sp, -STACK_FRAME, %sp
1545          cmp                 %i2, 0
1546          be,pn               SIZE_T_CC, .L256_cbc_dec_abort
1547          srln                %i2, 0, %i2                   ! needed on v8+, "nop" on v9
1548          sub                 %i0, %i1, %l5       ! %i0!=%i1
1549          ld                  [%i4 + 0], %f12     ! load ivec
1550          ld                  [%i4 + 4], %f13
1551          ld                  [%i4 + 8], %f14
1552          ld                  [%i4 + 12], %f15
1553          prefetch  [%i0], 20
1554          prefetch  [%i0 + 63], 20
1555          call                _cmll256_load_deckey
1556          and                 %i0, 7, %l0
1557          andn                %i0, 7, %i0
1558          sll                 %l0, 3, %l0
1559          mov                 64, %l1
1560          mov                 0xff, %l3
1561          sub                 %l1, %l0, %l1
1562          and                 %i1, 7, %l2
1563          cmp                 %i2, 255
1564          movrnz              %l2, 0, %l5                   ! if (    %i1&7 ||
1565          movleu              SIZE_T_CC, 0, %l5   !         %i2<256 ||
1566          brnz,pn             %l5, .L256cbc_dec_blk         !         %i0==%i1)
1567          srl                 %l3, %l2, %l3
1568
1569          andcc               %i2, 16, %g0                  ! is number of blocks even?
1570          srlx                %i2, 4, %i2
1571          .word     0xb3b64340 !alignaddrl        %i1,%g0,%i1
1572          bz                  %icc, .L256_cbc_dec_loop2x
1573          prefetch  [%i1], 22
1574.L256_cbc_dec_loop:
1575          ldx                 [%i0 + 0], %o0
1576          brz,pt              %l0, 4f
1577          ldx                 [%i0 + 8], %o1
1578
1579          ldx                 [%i0 + 16], %o2
1580          sllx                %o0, %l0, %o0
1581          srlx                %o1, %l1, %g1
1582          sllx                %o1, %l0, %o1
1583          or                  %g1, %o0, %o0
1584          srlx                %o2, %l1, %o2
1585          or                  %o2, %o1, %o1
15864:
1587          xor                 %g4, %o0, %o2                 ! ^= rk[0]
1588          xor                 %g5, %o1, %o3
1589          .word     0x81b0230a !movxtod %o2,%f0
1590          .word     0x85b0230b !movxtod %o3,%f2
1591
1592          prefetch  [%i1 + 63], 22
1593          prefetch  [%i0 + 16+63], 20
1594          call                _cmll256_decrypt_1x
1595          add                 %i0, 16, %i0
1596
1597          .word     0x81b30d80 !fxor    %f12,%f0,%f0                  ! ^= ivec
1598          .word     0x85b38d82 !fxor    %f14,%f2,%f2
1599          .word     0x99b02308 !movxtod %o0,%f12
1600          .word     0x9db02309 !movxtod %o1,%f14
1601
1602          brnz,pn             %l2, 2f
1603          sub                 %i2, 1, %i2
1604
1605          std                 %f0, [%i1 + 0]
1606          std                 %f2, [%i1 + 8]
1607          brnz,pt             %i2, .L256_cbc_dec_loop2x
1608          add                 %i1, 16, %i1
1609          st                  %f12, [%i4 + 0]
1610          st                  %f13, [%i4 + 4]
1611          st                  %f14, [%i4 + 8]
1612          st                  %f15, [%i4 + 12]
1613.L256_cbc_dec_abort:
1614          ret
1615          restore
1616
1617.align    16
16182:        ldxa                [%i0]0x82, %o0                ! avoid read-after-write hazard
1619                                                            ! and ~3x deterioration
1620                                                            ! in inp==out case
1621          .word     0x89b00900 !faligndata        %f0,%f0,%f4                   ! handle unaligned output
1622          .word     0x8db00902 !faligndata        %f0,%f2,%f6
1623          .word     0x91b08902 !faligndata        %f2,%f2,%f8
1624
1625          stda                %f4, [%i1 + %l3]0xc0          ! partial store
1626          std                 %f6, [%i1 + 8]
1627          add                 %i1, 16, %i1
1628          orn                 %g0, %l3, %l3
1629          stda                %f8, [%i1 + %l3]0xc0          ! partial store
1630
1631          brnz,pt             %i2, .L256_cbc_dec_loop2x+4
1632          orn                 %g0, %l3, %l3
1633          st                  %f12, [%i4 + 0]
1634          st                  %f13, [%i4 + 4]
1635          st                  %f14, [%i4 + 8]
1636          st                  %f15, [%i4 + 12]
1637          ret
1638          restore
1639
1640!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1641.align    32
1642.L256_cbc_dec_loop2x:
1643          ldx                 [%i0 + 0], %o0
1644          ldx                 [%i0 + 8], %o1
1645          ldx                 [%i0 + 16], %o2
1646          brz,pt              %l0, 4f
1647          ldx                 [%i0 + 24], %o3
1648
1649          ldx                 [%i0 + 32], %o4
1650          sllx                %o0, %l0, %o0
1651          srlx                %o1, %l1, %g1
1652          or                  %g1, %o0, %o0
1653          sllx                %o1, %l0, %o1
1654          srlx                %o2, %l1, %g1
1655          or                  %g1, %o1, %o1
1656          sllx                %o2, %l0, %o2
1657          srlx                %o3, %l1, %g1
1658          or                  %g1, %o2, %o2
1659          sllx                %o3, %l0, %o3
1660          srlx                %o4, %l1, %o4
1661          or                  %o4, %o3, %o3
16624:
1663          xor                 %g4, %o0, %o4                 ! ^= rk[0]
1664          xor                 %g5, %o1, %o5
1665          .word     0x81b0230c !movxtod %o4,%f0
1666          .word     0x85b0230d !movxtod %o5,%f2
1667          xor                 %g4, %o2, %o4
1668          xor                 %g5, %o3, %o5
1669          .word     0x89b0230c !movxtod %o4,%f4
1670          .word     0x8db0230d !movxtod %o5,%f6
1671
1672          prefetch  [%i1 + 63], 22
1673          prefetch  [%i0 + 32+63], 20
1674          call                _cmll256_decrypt_2x
1675          add                 %i0, 32, %i0
1676
1677          .word     0x91b02308 !movxtod %o0,%f8
1678          .word     0x95b02309 !movxtod %o1,%f10
1679          .word     0x81b30d80 !fxor    %f12,%f0,%f0                  ! ^= ivec
1680          .word     0x85b38d82 !fxor    %f14,%f2,%f2
1681          .word     0x99b0230a !movxtod %o2,%f12
1682          .word     0x9db0230b !movxtod %o3,%f14
1683          .word     0x89b20d84 !fxor    %f8,%f4,%f4
1684          .word     0x8db28d86 !fxor    %f10,%f6,%f6
1685
1686          brnz,pn             %l2, 2f
1687          sub                 %i2, 2, %i2
1688
1689          std                 %f0, [%i1 + 0]
1690          std                 %f2, [%i1 + 8]
1691          std                 %f4, [%i1 + 16]
1692          std                 %f6, [%i1 + 24]
1693          brnz,pt             %i2, .L256_cbc_dec_loop2x
1694          add                 %i1, 32, %i1
1695          st                  %f12, [%i4 + 0]
1696          st                  %f13, [%i4 + 4]
1697          st                  %f14, [%i4 + 8]
1698          st                  %f15, [%i4 + 12]
1699          ret
1700          restore
1701
1702.align    16
17032:        ldxa                [%i0]0x82, %o0                ! avoid read-after-write hazard
1704                                                            ! and ~3x deterioration
1705                                                            ! in inp==out case
1706          .word     0x91b00900 !faligndata        %f0,%f0,%f8                   ! handle unaligned output
1707          .word     0x81b00902 !faligndata        %f0,%f2,%f0
1708          .word     0x85b08904 !faligndata        %f2,%f4,%f2
1709          .word     0x89b10906 !faligndata        %f4,%f6,%f4
1710          .word     0x8db18906 !faligndata        %f6,%f6,%f6
1711          stda                %f8, [%i1 + %l3]0xc0          ! partial store
1712          std                 %f0, [%i1 + 8]
1713          std                 %f2, [%i1 + 16]
1714          std                 %f4, [%i1 + 24]
1715          add                 %i1, 32, %i1
1716          orn                 %g0, %l3, %l3
1717          stda                %f6, [%i1 + %l3]0xc0          ! partial store
1718
1719          brnz,pt             %i2, .L256_cbc_dec_loop2x+4
1720          orn                 %g0, %l3, %l3
1721          st                  %f12, [%i4 + 0]
1722          st                  %f13, [%i4 + 4]
1723          st                  %f14, [%i4 + 8]
1724          st                  %f15, [%i4 + 12]
1725          ret
1726          restore
1727
1728!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1729.align    32
1730.L256cbc_dec_blk:
1731          add       %i1, %i2, %l5
1732          and       %l5, 63, %l5        ! tail
1733          sub       %i2, %l5, %i2
1734          add       %l5, 15, %l5        ! round up to 16n
1735          srlx      %i2, 4, %i2
1736          srl       %l5, 4, %l5
1737          sub       %i2, 1, %i2
1738          add       %l5, 1, %l5
1739
1740.L256_cbc_dec_blk_loop2x:
1741          ldx                 [%i0 + 0], %o0
1742          ldx                 [%i0 + 8], %o1
1743          ldx                 [%i0 + 16], %o2
1744          brz,pt              %l0, 5f
1745          ldx                 [%i0 + 24], %o3
1746
1747          ldx                 [%i0 + 32], %o4
1748          sllx                %o0, %l0, %o0
1749          srlx                %o1, %l1, %g1
1750          or                  %g1, %o0, %o0
1751          sllx                %o1, %l0, %o1
1752          srlx                %o2, %l1, %g1
1753          or                  %g1, %o1, %o1
1754          sllx                %o2, %l0, %o2
1755          srlx                %o3, %l1, %g1
1756          or                  %g1, %o2, %o2
1757          sllx                %o3, %l0, %o3
1758          srlx                %o4, %l1, %o4
1759          or                  %o4, %o3, %o3
17605:
1761          xor                 %g4, %o0, %o4                 ! ^= rk[0]
1762          xor                 %g5, %o1, %o5
1763          .word     0x81b0230c !movxtod %o4,%f0
1764          .word     0x85b0230d !movxtod %o5,%f2
1765          xor                 %g4, %o2, %o4
1766          xor                 %g5, %o3, %o5
1767          .word     0x89b0230c !movxtod %o4,%f4
1768          .word     0x8db0230d !movxtod %o5,%f6
1769
1770          prefetch  [%i0 + 32+63], 20
1771          call                _cmll256_decrypt_2x
1772          add                 %i0, 32, %i0
1773          subcc               %i2, 2, %i2
1774
1775          .word     0x91b02308 !movxtod %o0,%f8
1776          .word     0x95b02309 !movxtod %o1,%f10
1777          .word     0x81b30d80 !fxor    %f12,%f0,%f0                  ! ^= ivec
1778          .word     0x85b38d82 !fxor    %f14,%f2,%f2
1779          .word     0x99b0230a !movxtod %o2,%f12
1780          .word     0x9db0230b !movxtod %o3,%f14
1781          .word     0x89b20d84 !fxor    %f8,%f4,%f4
1782          .word     0x8db28d86 !fxor    %f10,%f6,%f6
1783
1784          stda                %f0, [%i1]0xe2                ! ASI_BLK_INIT, T4-specific
1785          add                 %i1, 8, %i1
1786          stda                %f2, [%i1]0xe2                ! ASI_BLK_INIT, T4-specific
1787          add                 %i1, 8, %i1
1788          stda                %f4, [%i1]0xe2                ! ASI_BLK_INIT, T4-specific
1789          add                 %i1, 8, %i1
1790          stda                %f6, [%i1]0xe2                ! ASI_BLK_INIT, T4-specific
1791          bgu,pt              SIZE_T_CC, .L256_cbc_dec_blk_loop2x
1792          add                 %i1, 8, %i1
1793
1794          add                 %l5, %i2, %i2
1795          andcc               %i2, 1, %g0                   ! is number of blocks even?
1796          membar              #StoreLoad|#StoreStore
1797          bnz,pt              %icc, .L256_cbc_dec_loop
1798          srl                 %i2, 0, %i2
1799          brnz,pn             %i2, .L256_cbc_dec_loop2x
1800          nop
1801          st                  %f12, [%i4 + 0]     ! write out ivec
1802          st                  %f13, [%i4 + 4]
1803          st                  %f14, [%i4 + 8]
1804          st                  %f15, [%i4 + 12]
1805          ret
1806          restore
1807.type     cmll256_t4_cbc_decrypt,#function
1808.size     cmll256_t4_cbc_decrypt,.-cmll256_t4_cbc_decrypt
1809.globl    cmll128_t4_ctr32_encrypt
1810.align    32
1811cmll128_t4_ctr32_encrypt:
1812          save                %sp, -STACK_FRAME, %sp
1813          srln                %i2, 0, %i2                   ! needed on v8+, "nop" on v9
1814
1815          prefetch  [%i0], 20
1816          prefetch  [%i0 + 63], 20
1817          call                _cmll128_load_enckey
1818          sllx                %i2, 4, %i2
1819
1820          ld                  [%i4 + 0], %l4      ! counter
1821          ld                  [%i4 + 4], %l5
1822          ld                  [%i4 + 8], %l6
1823          ld                  [%i4 + 12], %l7
1824
1825          sllx                %l4, 32, %o5
1826          or                  %l5, %o5, %o5
1827          sllx                %l6, 32, %g1
1828          xor                 %o5, %g4, %g4                 ! ^= rk[0]
1829          xor                 %g1, %g5, %g5
1830          .word     0x9db02304 !movxtod %g4,%f14            ! most significant 64 bits
1831
1832          sub                 %i0, %i1, %l5       ! %i0!=%i1
1833          and                 %i0, 7, %l0
1834          andn                %i0, 7, %i0
1835          sll                 %l0, 3, %l0
1836          mov                 64, %l1
1837          mov                 0xff, %l3
1838          sub                 %l1, %l0, %l1
1839          and                 %i1, 7, %l2
1840          cmp                 %i2, 255
1841          movrnz              %l2, 0, %l5                   ! if (    %i1&7 ||
1842          movleu              SIZE_T_CC, 0, %l5   !         %i2<256 ||
1843          brnz,pn             %l5, .L128_ctr32_blk          !         %i0==%i1)
1844          srl                 %l3, %l2, %l3
1845
1846          andcc               %i2, 16, %g0                  ! is number of blocks even?
1847          .word     0xb3b64340 !alignaddrl        %i1,%g0,%i1
1848          bz                  %icc, .L128_ctr32_loop2x
1849          srlx                %i2, 4, %i2
1850.L128_ctr32_loop:
1851          ldx                 [%i0 + 0], %o0
1852          brz,pt              %l0, 4f
1853          ldx                 [%i0 + 8], %o1
1854
1855          ldx                 [%i0 + 16], %o2
1856          sllx                %o0, %l0, %o0
1857          srlx                %o1, %l1, %g1
1858          sllx                %o1, %l0, %o1
1859          or                  %g1, %o0, %o0
1860          srlx                %o2, %l1, %o2
1861          or                  %o2, %o1, %o1
18624:
1863          xor                 %g5, %l7, %g1                 ! ^= rk[0]
1864          add                 %l7, 1, %l7
1865          .word     0x85b02301 !movxtod %g1,%f2
1866          srl                 %l7, 0, %l7                   ! clruw
1867          prefetch  [%i1 + 63], 22
1868          prefetch  [%i0 + 16+63], 20
1869          .word     0x84cc1d82 !camellia_f        %f16,%f2,%f14,%f2
1870          .word     0x80cc858e !camellia_f        %f18,%f14,%f2,%f0
1871          call                _cmll128_encrypt_1x+8
1872          add                 %i0, 16, %i0
1873
1874          .word     0x95b02308 !movxtod %o0,%f10
1875          .word     0x99b02309 !movxtod %o1,%f12
1876          .word     0x81b28d80 !fxor    %f10,%f0,%f0                  ! ^= inp
1877          .word     0x85b30d82 !fxor    %f12,%f2,%f2
1878
1879          brnz,pn             %l2, 2f
1880          sub                 %i2, 1, %i2
1881
1882          std                 %f0, [%i1 + 0]
1883          std                 %f2, [%i1 + 8]
1884          brnz,pt             %i2, .L128_ctr32_loop2x
1885          add                 %i1, 16, %i1
1886
1887          ret
1888          restore
1889
1890.align    16
18912:        ldxa                [%i0]0x82, %o0                ! avoid read-after-write hazard
1892                                                            ! and ~3x deterioration
1893                                                            ! in inp==out case
1894          .word     0x89b00900 !faligndata        %f0,%f0,%f4                   ! handle unaligned output
1895          .word     0x8db00902 !faligndata        %f0,%f2,%f6
1896          .word     0x91b08902 !faligndata        %f2,%f2,%f8
1897          stda                %f4, [%i1 + %l3]0xc0          ! partial store
1898          std                 %f6, [%i1 + 8]
1899          add                 %i1, 16, %i1
1900          orn                 %g0, %l3, %l3
1901          stda                %f8, [%i1 + %l3]0xc0          ! partial store
1902
1903          brnz,pt             %i2, .L128_ctr32_loop2x+4
1904          orn                 %g0, %l3, %l3
1905
1906          ret
1907          restore
1908
1909!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1910.align    32
1911.L128_ctr32_loop2x:
1912          ldx                 [%i0 + 0], %o0
1913          ldx                 [%i0 + 8], %o1
1914          ldx                 [%i0 + 16], %o2
1915          brz,pt              %l0, 4f
1916          ldx                 [%i0 + 24], %o3
1917
1918          ldx                 [%i0 + 32], %o4
1919          sllx                %o0, %l0, %o0
1920          srlx                %o1, %l1, %g1
1921          or                  %g1, %o0, %o0
1922          sllx                %o1, %l0, %o1
1923          srlx                %o2, %l1, %g1
1924          or                  %g1, %o1, %o1
1925          sllx                %o2, %l0, %o2
1926          srlx                %o3, %l1, %g1
1927          or                  %g1, %o2, %o2
1928          sllx                %o3, %l0, %o3
1929          srlx                %o4, %l1, %o4
1930          or                  %o4, %o3, %o3
19314:
1932          xor                 %g5, %l7, %g1                 ! ^= rk[0]
1933          add                 %l7, 1, %l7
1934          .word     0x85b02301 !movxtod %g1,%f2
1935          srl                 %l7, 0, %l7                   ! clruw
1936          xor                 %g5, %l7, %g1
1937          add                 %l7, 1, %l7
1938          .word     0x8db02301 !movxtod %g1,%f6
1939          srl                 %l7, 0, %l7                   ! clruw
1940          prefetch  [%i1 + 63], 22
1941          prefetch  [%i0 + 32+63], 20
1942          .word     0x84cc1d82 !camellia_f        %f16,%f2,%f14,%f2
1943          .word     0x8ccc1d86 !camellia_f        %f16,%f6,%f14,%f6
1944          .word     0x80cc858e !camellia_f        %f18,%f14,%f2,%f0
1945          .word     0x88cc8d8e !camellia_f        %f18,%f14,%f6,%f4
1946          call                _cmll128_encrypt_2x+16
1947          add                 %i0, 32, %i0
1948
1949          .word     0x91b02308 !movxtod %o0,%f8
1950          .word     0x95b02309 !movxtod %o1,%f10
1951          .word     0x99b0230a !movxtod %o2,%f12
1952          .word     0x81b20d80 !fxor    %f8,%f0,%f0                   ! ^= inp
1953          .word     0x91b0230b !movxtod %o3,%f8
1954          .word     0x85b28d82 !fxor    %f10,%f2,%f2
1955          .word     0x89b30d84 !fxor    %f12,%f4,%f4
1956          .word     0x8db20d86 !fxor    %f8,%f6,%f6
1957
1958          brnz,pn             %l2, 2f
1959          sub                 %i2, 2, %i2
1960
1961          std                 %f0, [%i1 + 0]
1962          std                 %f2, [%i1 + 8]
1963          std                 %f4, [%i1 + 16]
1964          std                 %f6, [%i1 + 24]
1965          brnz,pt             %i2, .L128_ctr32_loop2x
1966          add                 %i1, 32, %i1
1967
1968          ret
1969          restore
1970
1971.align    16
19722:        ldxa                [%i0]0x82, %o0                ! avoid read-after-write hazard
1973                                                            ! and ~3x deterioration
1974                                                            ! in inp==out case
1975          .word     0x91b00900 !faligndata        %f0,%f0,%f8                   ! handle unaligned output
1976          .word     0x81b00902 !faligndata        %f0,%f2,%f0
1977          .word     0x85b08904 !faligndata        %f2,%f4,%f2
1978          .word     0x89b10906 !faligndata        %f4,%f6,%f4
1979          .word     0x8db18906 !faligndata        %f6,%f6,%f6
1980
1981          stda                %f8, [%i1 + %l3]0xc0          ! partial store
1982          std                 %f0, [%i1 + 8]
1983          std                 %f2, [%i1 + 16]
1984          std                 %f4, [%i1 + 24]
1985          add                 %i1, 32, %i1
1986          orn                 %g0, %l3, %l3
1987          stda                %f6, [%i1 + %l3]0xc0          ! partial store
1988
1989          brnz,pt             %i2, .L128_ctr32_loop2x+4
1990          orn                 %g0, %l3, %l3
1991
1992          ret
1993          restore
1994
1995!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1996.align    32
1997.L128_ctr32_blk:
1998          add       %i1, %i2, %l5
1999          and       %l5, 63, %l5        ! tail
2000          sub       %i2, %l5, %i2
2001          add       %l5, 15, %l5        ! round up to 16n
2002          srlx      %i2, 4, %i2
2003          srl       %l5, 4, %l5
2004          sub       %i2, 1, %i2
2005          add       %l5, 1, %l5
2006
2007.L128_ctr32_blk_loop2x:
2008          ldx                 [%i0 + 0], %o0
2009          ldx                 [%i0 + 8], %o1
2010          ldx                 [%i0 + 16], %o2
2011          brz,pt              %l0, 5f
2012          ldx                 [%i0 + 24], %o3
2013
2014          ldx                 [%i0 + 32], %o4
2015          sllx                %o0, %l0, %o0
2016          srlx                %o1, %l1, %g1
2017          or                  %g1, %o0, %o0
2018          sllx                %o1, %l0, %o1
2019          srlx                %o2, %l1, %g1
2020          or                  %g1, %o1, %o1
2021          sllx                %o2, %l0, %o2
2022          srlx                %o3, %l1, %g1
2023          or                  %g1, %o2, %o2
2024          sllx                %o3, %l0, %o3
2025          srlx                %o4, %l1, %o4
2026          or                  %o4, %o3, %o3
20275:
2028          xor                 %g5, %l7, %g1                 ! ^= rk[0]
2029          add                 %l7, 1, %l7
2030          .word     0x85b02301 !movxtod %g1,%f2
2031          srl                 %l7, 0, %l7                   ! clruw
2032          xor                 %g5, %l7, %g1
2033          add                 %l7, 1, %l7
2034          .word     0x8db02301 !movxtod %g1,%f6
2035          srl                 %l7, 0, %l7                   ! clruw
2036          prefetch  [%i0 + 32+63], 20
2037          .word     0x84cc1d82 !camellia_f        %f16,%f2,%f14,%f2
2038          .word     0x8ccc1d86 !camellia_f        %f16,%f6,%f14,%f6
2039          .word     0x80cc858e !camellia_f        %f18,%f14,%f2,%f0
2040          .word     0x88cc8d8e !camellia_f        %f18,%f14,%f6,%f4
2041          call                _cmll128_encrypt_2x+16
2042          add                 %i0, 32, %i0
2043          subcc               %i2, 2, %i2
2044
2045          .word     0x91b02308 !movxtod %o0,%f8
2046          .word     0x95b02309 !movxtod %o1,%f10
2047          .word     0x99b0230a !movxtod %o2,%f12
2048          .word     0x81b20d80 !fxor    %f8,%f0,%f0                   ! ^= inp
2049          .word     0x91b0230b !movxtod %o3,%f8
2050          .word     0x85b28d82 !fxor    %f10,%f2,%f2
2051          .word     0x89b30d84 !fxor    %f12,%f4,%f4
2052          .word     0x8db20d86 !fxor    %f8,%f6,%f6
2053
2054          stda                %f0, [%i1]0xe2                ! ASI_BLK_INIT, T4-specific
2055          add                 %i1, 8, %i1
2056          stda                %f2, [%i1]0xe2                ! ASI_BLK_INIT, T4-specific
2057          add                 %i1, 8, %i1
2058          stda                %f4, [%i1]0xe2                ! ASI_BLK_INIT, T4-specific
2059          add                 %i1, 8, %i1
2060          stda                %f6, [%i1]0xe2                ! ASI_BLK_INIT, T4-specific
2061          bgu,pt              SIZE_T_CC, .L128_ctr32_blk_loop2x
2062          add                 %i1, 8, %i1
2063
2064          add                 %l5, %i2, %i2
2065          andcc               %i2, 1, %g0                   ! is number of blocks even?
2066          membar              #StoreLoad|#StoreStore
2067          bnz,pt              %icc, .L128_ctr32_loop
2068          srl                 %i2, 0, %i2
2069          brnz,pn             %i2, .L128_ctr32_loop2x
2070          nop
2071
2072          ret
2073          restore
2074.type     cmll128_t4_ctr32_encrypt,#function
2075.size     cmll128_t4_ctr32_encrypt,.-cmll128_t4_ctr32_encrypt
2076.globl    cmll256_t4_ctr32_encrypt
2077.align    32
2078cmll256_t4_ctr32_encrypt:
2079          save                %sp, -STACK_FRAME, %sp
2080          srln                %i2, 0, %i2                   ! needed on v8+, "nop" on v9
2081
2082          prefetch  [%i0], 20
2083          prefetch  [%i0 + 63], 20
2084          call                _cmll256_load_enckey
2085          sllx                %i2, 4, %i2
2086
2087          ld                  [%i4 + 0], %l4      ! counter
2088          ld                  [%i4 + 4], %l5
2089          ld                  [%i4 + 8], %l6
2090          ld                  [%i4 + 12], %l7
2091
2092          sllx                %l4, 32, %o5
2093          or                  %l5, %o5, %o5
2094          sllx                %l6, 32, %g1
2095          xor                 %o5, %g4, %g4                 ! ^= rk[0]
2096          xor                 %g1, %g5, %g5
2097          .word     0x9db02304 !movxtod %g4,%f14            ! most significant 64 bits
2098
2099          sub                 %i0, %i1, %l5       ! %i0!=%i1
2100          and                 %i0, 7, %l0
2101          andn                %i0, 7, %i0
2102          sll                 %l0, 3, %l0
2103          mov                 64, %l1
2104          mov                 0xff, %l3
2105          sub                 %l1, %l0, %l1
2106          and                 %i1, 7, %l2
2107          cmp                 %i2, 255
2108          movrnz              %l2, 0, %l5                   ! if (    %i1&7 ||
2109          movleu              SIZE_T_CC, 0, %l5   !         %i2<256 ||
2110          brnz,pn             %l5, .L256_ctr32_blk          !         %i0==%i1)
2111          srl                 %l3, %l2, %l3
2112
2113          andcc               %i2, 16, %g0                  ! is number of blocks even?
2114          .word     0xb3b64340 !alignaddrl        %i1,%g0,%i1
2115          bz                  %icc, .L256_ctr32_loop2x
2116          srlx                %i2, 4, %i2
2117.L256_ctr32_loop:
2118          ldx                 [%i0 + 0], %o0
2119          brz,pt              %l0, 4f
2120          ldx                 [%i0 + 8], %o1
2121
2122          ldx                 [%i0 + 16], %o2
2123          sllx                %o0, %l0, %o0
2124          srlx                %o1, %l1, %g1
2125          sllx                %o1, %l0, %o1
2126          or                  %g1, %o0, %o0
2127          srlx                %o2, %l1, %o2
2128          or                  %o2, %o1, %o1
21294:
2130          xor                 %g5, %l7, %g1                 ! ^= rk[0]
2131          add                 %l7, 1, %l7
2132          .word     0x85b02301 !movxtod %g1,%f2
2133          srl                 %l7, 0, %l7                   ! clruw
2134          prefetch  [%i1 + 63], 22
2135          prefetch  [%i0 + 16+63], 20
2136          .word     0x84cc1d82 !camellia_f        %f16,%f2,%f14,%f2
2137          .word     0x80cc858e !camellia_f        %f18,%f14,%f2,%f0
2138          call                _cmll256_encrypt_1x+8
2139          add                 %i0, 16, %i0
2140
2141          .word     0x95b02308 !movxtod %o0,%f10
2142          .word     0x99b02309 !movxtod %o1,%f12
2143          .word     0x81b28d80 !fxor    %f10,%f0,%f0                  ! ^= inp
2144          .word     0x85b30d82 !fxor    %f12,%f2,%f2
2145
2146          brnz,pn             %l2, 2f
2147          sub                 %i2, 1, %i2
2148
2149          std                 %f0, [%i1 + 0]
2150          std                 %f2, [%i1 + 8]
2151          brnz,pt             %i2, .L256_ctr32_loop2x
2152          add                 %i1, 16, %i1
2153
2154          ret
2155          restore
2156
2157.align    16
21582:        ldxa                [%i0]0x82, %o0                ! avoid read-after-write hazard
2159                                                            ! and ~3x deterioration
2160                                                            ! in inp==out case
2161          .word     0x89b00900 !faligndata        %f0,%f0,%f4                   ! handle unaligned output
2162          .word     0x8db00902 !faligndata        %f0,%f2,%f6
2163          .word     0x91b08902 !faligndata        %f2,%f2,%f8
2164          stda                %f4, [%i1 + %l3]0xc0          ! partial store
2165          std                 %f6, [%i1 + 8]
2166          add                 %i1, 16, %i1
2167          orn                 %g0, %l3, %l3
2168          stda                %f8, [%i1 + %l3]0xc0          ! partial store
2169
2170          brnz,pt             %i2, .L256_ctr32_loop2x+4
2171          orn                 %g0, %l3, %l3
2172
2173          ret
2174          restore
2175
2176!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2177.align    32
2178.L256_ctr32_loop2x:
2179          ldx                 [%i0 + 0], %o0
2180          ldx                 [%i0 + 8], %o1
2181          ldx                 [%i0 + 16], %o2
2182          brz,pt              %l0, 4f
2183          ldx                 [%i0 + 24], %o3
2184
2185          ldx                 [%i0 + 32], %o4
2186          sllx                %o0, %l0, %o0
2187          srlx                %o1, %l1, %g1
2188          or                  %g1, %o0, %o0
2189          sllx                %o1, %l0, %o1
2190          srlx                %o2, %l1, %g1
2191          or                  %g1, %o1, %o1
2192          sllx                %o2, %l0, %o2
2193          srlx                %o3, %l1, %g1
2194          or                  %g1, %o2, %o2
2195          sllx                %o3, %l0, %o3
2196          srlx                %o4, %l1, %o4
2197          or                  %o4, %o3, %o3
21984:
2199          xor                 %g5, %l7, %g1                 ! ^= rk[0]
2200          add                 %l7, 1, %l7
2201          .word     0x85b02301 !movxtod %g1,%f2
2202          srl                 %l7, 0, %l7                   ! clruw
2203          xor                 %g5, %l7, %g1
2204          add                 %l7, 1, %l7
2205          .word     0x8db02301 !movxtod %g1,%f6
2206          srl                 %l7, 0, %l7                   ! clruw
2207          prefetch  [%i1 + 63], 22
2208          prefetch  [%i0 + 32+63], 20
2209          .word     0x84cc1d82 !camellia_f        %f16,%f2,%f14,%f2
2210          .word     0x8ccc1d86 !camellia_f        %f16,%f6,%f14,%f6
2211          .word     0x80cc858e !camellia_f        %f18,%f14,%f2,%f0
2212          .word     0x88cc8d8e !camellia_f        %f18,%f14,%f6,%f4
2213          call                _cmll256_encrypt_2x+16
2214          add                 %i0, 32, %i0
2215
2216          .word     0x91b02308 !movxtod %o0,%f8
2217          .word     0x95b02309 !movxtod %o1,%f10
2218          .word     0x99b0230a !movxtod %o2,%f12
2219          .word     0x81b20d80 !fxor    %f8,%f0,%f0                   ! ^= inp
2220          .word     0x91b0230b !movxtod %o3,%f8
2221          .word     0x85b28d82 !fxor    %f10,%f2,%f2
2222          .word     0x89b30d84 !fxor    %f12,%f4,%f4
2223          .word     0x8db20d86 !fxor    %f8,%f6,%f6
2224
2225          brnz,pn             %l2, 2f
2226          sub                 %i2, 2, %i2
2227
2228          std                 %f0, [%i1 + 0]
2229          std                 %f2, [%i1 + 8]
2230          std                 %f4, [%i1 + 16]
2231          std                 %f6, [%i1 + 24]
2232          brnz,pt             %i2, .L256_ctr32_loop2x
2233          add                 %i1, 32, %i1
2234
2235          ret
2236          restore
2237
2238.align    16
22392:        ldxa                [%i0]0x82, %o0                ! avoid read-after-write hazard
2240                                                            ! and ~3x deterioration
2241                                                            ! in inp==out case
2242          .word     0x91b00900 !faligndata        %f0,%f0,%f8                   ! handle unaligned output
2243          .word     0x81b00902 !faligndata        %f0,%f2,%f0
2244          .word     0x85b08904 !faligndata        %f2,%f4,%f2
2245          .word     0x89b10906 !faligndata        %f4,%f6,%f4
2246          .word     0x8db18906 !faligndata        %f6,%f6,%f6
2247
2248          stda                %f8, [%i1 + %l3]0xc0          ! partial store
2249          std                 %f0, [%i1 + 8]
2250          std                 %f2, [%i1 + 16]
2251          std                 %f4, [%i1 + 24]
2252          add                 %i1, 32, %i1
2253          orn                 %g0, %l3, %l3
2254          stda                %f6, [%i1 + %l3]0xc0          ! partial store
2255
2256          brnz,pt             %i2, .L256_ctr32_loop2x+4
2257          orn                 %g0, %l3, %l3
2258
2259          ret
2260          restore
2261
2262!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2263.align    32
2264.L256_ctr32_blk:
2265          add       %i1, %i2, %l5
2266          and       %l5, 63, %l5        ! tail
2267          sub       %i2, %l5, %i2
2268          add       %l5, 15, %l5        ! round up to 16n
2269          srlx      %i2, 4, %i2
2270          srl       %l5, 4, %l5
2271          sub       %i2, 1, %i2
2272          add       %l5, 1, %l5
2273
2274.L256_ctr32_blk_loop2x:
2275          ldx                 [%i0 + 0], %o0
2276          ldx                 [%i0 + 8], %o1
2277          ldx                 [%i0 + 16], %o2
2278          brz,pt              %l0, 5f
2279          ldx                 [%i0 + 24], %o3
2280
2281          ldx                 [%i0 + 32], %o4
2282          sllx                %o0, %l0, %o0
2283          srlx                %o1, %l1, %g1
2284          or                  %g1, %o0, %o0
2285          sllx                %o1, %l0, %o1
2286          srlx                %o2, %l1, %g1
2287          or                  %g1, %o1, %o1
2288          sllx                %o2, %l0, %o2
2289          srlx                %o3, %l1, %g1
2290          or                  %g1, %o2, %o2
2291          sllx                %o3, %l0, %o3
2292          srlx                %o4, %l1, %o4
2293          or                  %o4, %o3, %o3
22945:
2295          xor                 %g5, %l7, %g1                 ! ^= rk[0]
2296          add                 %l7, 1, %l7
2297          .word     0x85b02301 !movxtod %g1,%f2
2298          srl                 %l7, 0, %l7                   ! clruw
2299          xor                 %g5, %l7, %g1
2300          add                 %l7, 1, %l7
2301          .word     0x8db02301 !movxtod %g1,%f6
2302          srl                 %l7, 0, %l7                   ! clruw
2303          prefetch  [%i0 + 32+63], 20
2304          .word     0x84cc1d82 !camellia_f        %f16,%f2,%f14,%f2
2305          .word     0x8ccc1d86 !camellia_f        %f16,%f6,%f14,%f6
2306          .word     0x80cc858e !camellia_f        %f18,%f14,%f2,%f0
2307          .word     0x88cc8d8e !camellia_f        %f18,%f14,%f6,%f4
2308          call                _cmll256_encrypt_2x+16
2309          add                 %i0, 32, %i0
2310          subcc               %i2, 2, %i2
2311
2312          .word     0x91b02308 !movxtod %o0,%f8
2313          .word     0x95b02309 !movxtod %o1,%f10
2314          .word     0x99b0230a !movxtod %o2,%f12
2315          .word     0x81b20d80 !fxor    %f8,%f0,%f0                   ! ^= inp
2316          .word     0x91b0230b !movxtod %o3,%f8
2317          .word     0x85b28d82 !fxor    %f10,%f2,%f2
2318          .word     0x89b30d84 !fxor    %f12,%f4,%f4
2319          .word     0x8db20d86 !fxor    %f8,%f6,%f6
2320
2321          stda                %f0, [%i1]0xe2                ! ASI_BLK_INIT, T4-specific
2322          add                 %i1, 8, %i1
2323          stda                %f2, [%i1]0xe2                ! ASI_BLK_INIT, T4-specific
2324          add                 %i1, 8, %i1
2325          stda                %f4, [%i1]0xe2                ! ASI_BLK_INIT, T4-specific
2326          add                 %i1, 8, %i1
2327          stda                %f6, [%i1]0xe2                ! ASI_BLK_INIT, T4-specific
2328          bgu,pt              SIZE_T_CC, .L256_ctr32_blk_loop2x
2329          add                 %i1, 8, %i1
2330
2331          add                 %l5, %i2, %i2
2332          andcc               %i2, 1, %g0                   ! is number of blocks even?
2333          membar              #StoreLoad|#StoreStore
2334          bnz,pt              %icc, .L256_ctr32_loop
2335          srl                 %i2, 0, %i2
2336          brnz,pn             %i2, .L256_ctr32_loop2x
2337          nop
2338
2339          ret
2340          restore
2341.type     cmll256_t4_ctr32_encrypt,#function
2342.size     cmll256_t4_ctr32_encrypt,.-cmll256_t4_ctr32_encrypt
2343