summaryrefslogtreecommitdiff
path: root/arch/x86/crypto/salsa20-i586-asm_32.S
diff options
context:
space:
mode:
authorAndré Fabian Silva Delgado <emulatorman@parabola.nu>2015-08-05 17:04:01 -0300
committerAndré Fabian Silva Delgado <emulatorman@parabola.nu>2015-08-05 17:04:01 -0300
commit57f0f512b273f60d52568b8c6b77e17f5636edc0 (patch)
tree5e910f0e82173f4ef4f51111366a3f1299037a7b /arch/x86/crypto/salsa20-i586-asm_32.S
Initial import
Diffstat (limited to 'arch/x86/crypto/salsa20-i586-asm_32.S')
-rw-r--r--arch/x86/crypto/salsa20-i586-asm_32.S1114
1 files changed, 1114 insertions, 0 deletions
diff --git a/arch/x86/crypto/salsa20-i586-asm_32.S b/arch/x86/crypto/salsa20-i586-asm_32.S
new file mode 100644
index 000000000..329452b8f
--- /dev/null
+++ b/arch/x86/crypto/salsa20-i586-asm_32.S
@@ -0,0 +1,1114 @@
+# salsa20_pm.s version 20051229
+# D. J. Bernstein
+# Public domain.
+
+#include <linux/linkage.h>
+
+.text
+
+# enter salsa20_encrypt_bytes
+ENTRY(salsa20_encrypt_bytes)
+ mov %esp,%eax
+ and $31,%eax
+ add $256,%eax
+ sub %eax,%esp
+ # eax_stack = eax
+ movl %eax,80(%esp)
+ # ebx_stack = ebx
+ movl %ebx,84(%esp)
+ # esi_stack = esi
+ movl %esi,88(%esp)
+ # edi_stack = edi
+ movl %edi,92(%esp)
+ # ebp_stack = ebp
+ movl %ebp,96(%esp)
+ # x = arg1
+ movl 4(%esp,%eax),%edx
+ # m = arg2
+ movl 8(%esp,%eax),%esi
+ # out = arg3
+ movl 12(%esp,%eax),%edi
+ # bytes = arg4
+ movl 16(%esp,%eax),%ebx
+ # bytes -= 0
+ sub $0,%ebx
+ # goto done if unsigned<=
+ jbe ._done
+._start:
+ # in0 = *(uint32 *) (x + 0)
+ movl 0(%edx),%eax
+ # in1 = *(uint32 *) (x + 4)
+ movl 4(%edx),%ecx
+ # in2 = *(uint32 *) (x + 8)
+ movl 8(%edx),%ebp
+ # j0 = in0
+ movl %eax,164(%esp)
+ # in3 = *(uint32 *) (x + 12)
+ movl 12(%edx),%eax
+ # j1 = in1
+ movl %ecx,168(%esp)
+ # in4 = *(uint32 *) (x + 16)
+ movl 16(%edx),%ecx
+ # j2 = in2
+ movl %ebp,172(%esp)
+ # in5 = *(uint32 *) (x + 20)
+ movl 20(%edx),%ebp
+ # j3 = in3
+ movl %eax,176(%esp)
+ # in6 = *(uint32 *) (x + 24)
+ movl 24(%edx),%eax
+ # j4 = in4
+ movl %ecx,180(%esp)
+ # in7 = *(uint32 *) (x + 28)
+ movl 28(%edx),%ecx
+ # j5 = in5
+ movl %ebp,184(%esp)
+ # in8 = *(uint32 *) (x + 32)
+ movl 32(%edx),%ebp
+ # j6 = in6
+ movl %eax,188(%esp)
+ # in9 = *(uint32 *) (x + 36)
+ movl 36(%edx),%eax
+ # j7 = in7
+ movl %ecx,192(%esp)
+ # in10 = *(uint32 *) (x + 40)
+ movl 40(%edx),%ecx
+ # j8 = in8
+ movl %ebp,196(%esp)
+ # in11 = *(uint32 *) (x + 44)
+ movl 44(%edx),%ebp
+ # j9 = in9
+ movl %eax,200(%esp)
+ # in12 = *(uint32 *) (x + 48)
+ movl 48(%edx),%eax
+ # j10 = in10
+ movl %ecx,204(%esp)
+ # in13 = *(uint32 *) (x + 52)
+ movl 52(%edx),%ecx
+ # j11 = in11
+ movl %ebp,208(%esp)
+ # in14 = *(uint32 *) (x + 56)
+ movl 56(%edx),%ebp
+ # j12 = in12
+ movl %eax,212(%esp)
+ # in15 = *(uint32 *) (x + 60)
+ movl 60(%edx),%eax
+ # j13 = in13
+ movl %ecx,216(%esp)
+ # j14 = in14
+ movl %ebp,220(%esp)
+ # j15 = in15
+ movl %eax,224(%esp)
+ # x_backup = x
+ movl %edx,64(%esp)
+._bytesatleast1:
+ # bytes - 64
+ cmp $64,%ebx
+ # goto nocopy if unsigned>=
+ jae ._nocopy
+ # ctarget = out
+ movl %edi,228(%esp)
+ # out = &tmp
+ leal 0(%esp),%edi
+ # i = bytes
+ mov %ebx,%ecx
+ # while (i) { *out++ = *m++; --i }
+ rep movsb
+ # out = &tmp
+ leal 0(%esp),%edi
+ # m = &tmp
+ leal 0(%esp),%esi
+._nocopy:
+ # out_backup = out
+ movl %edi,72(%esp)
+ # m_backup = m
+ movl %esi,68(%esp)
+ # bytes_backup = bytes
+ movl %ebx,76(%esp)
+ # in0 = j0
+ movl 164(%esp),%eax
+ # in1 = j1
+ movl 168(%esp),%ecx
+ # in2 = j2
+ movl 172(%esp),%edx
+ # in3 = j3
+ movl 176(%esp),%ebx
+ # x0 = in0
+ movl %eax,100(%esp)
+ # x1 = in1
+ movl %ecx,104(%esp)
+ # x2 = in2
+ movl %edx,108(%esp)
+ # x3 = in3
+ movl %ebx,112(%esp)
+ # in4 = j4
+ movl 180(%esp),%eax
+ # in5 = j5
+ movl 184(%esp),%ecx
+ # in6 = j6
+ movl 188(%esp),%edx
+ # in7 = j7
+ movl 192(%esp),%ebx
+ # x4 = in4
+ movl %eax,116(%esp)
+ # x5 = in5
+ movl %ecx,120(%esp)
+ # x6 = in6
+ movl %edx,124(%esp)
+ # x7 = in7
+ movl %ebx,128(%esp)
+ # in8 = j8
+ movl 196(%esp),%eax
+ # in9 = j9
+ movl 200(%esp),%ecx
+ # in10 = j10
+ movl 204(%esp),%edx
+ # in11 = j11
+ movl 208(%esp),%ebx
+ # x8 = in8
+ movl %eax,132(%esp)
+ # x9 = in9
+ movl %ecx,136(%esp)
+ # x10 = in10
+ movl %edx,140(%esp)
+ # x11 = in11
+ movl %ebx,144(%esp)
+ # in12 = j12
+ movl 212(%esp),%eax
+ # in13 = j13
+ movl 216(%esp),%ecx
+ # in14 = j14
+ movl 220(%esp),%edx
+ # in15 = j15
+ movl 224(%esp),%ebx
+ # x12 = in12
+ movl %eax,148(%esp)
+ # x13 = in13
+ movl %ecx,152(%esp)
+ # x14 = in14
+ movl %edx,156(%esp)
+ # x15 = in15
+ movl %ebx,160(%esp)
+ # i = 20
+ mov $20,%ebp
+ # p = x0
+ movl 100(%esp),%eax
+ # s = x5
+ movl 120(%esp),%ecx
+ # t = x10
+ movl 140(%esp),%edx
+ # w = x15
+ movl 160(%esp),%ebx
+._mainloop:
+ # x0 = p
+ movl %eax,100(%esp)
+ # x10 = t
+ movl %edx,140(%esp)
+ # p += x12
+ addl 148(%esp),%eax
+ # x5 = s
+ movl %ecx,120(%esp)
+ # t += x6
+ addl 124(%esp),%edx
+ # x15 = w
+ movl %ebx,160(%esp)
+ # r = x1
+ movl 104(%esp),%esi
+ # r += s
+ add %ecx,%esi
+ # v = x11
+ movl 144(%esp),%edi
+ # v += w
+ add %ebx,%edi
+ # p <<<= 7
+ rol $7,%eax
+ # p ^= x4
+ xorl 116(%esp),%eax
+ # t <<<= 7
+ rol $7,%edx
+ # t ^= x14
+ xorl 156(%esp),%edx
+ # r <<<= 7
+ rol $7,%esi
+ # r ^= x9
+ xorl 136(%esp),%esi
+ # v <<<= 7
+ rol $7,%edi
+ # v ^= x3
+ xorl 112(%esp),%edi
+ # x4 = p
+ movl %eax,116(%esp)
+ # x14 = t
+ movl %edx,156(%esp)
+ # p += x0
+ addl 100(%esp),%eax
+ # x9 = r
+ movl %esi,136(%esp)
+ # t += x10
+ addl 140(%esp),%edx
+ # x3 = v
+ movl %edi,112(%esp)
+ # p <<<= 9
+ rol $9,%eax
+ # p ^= x8
+ xorl 132(%esp),%eax
+ # t <<<= 9
+ rol $9,%edx
+ # t ^= x2
+ xorl 108(%esp),%edx
+ # s += r
+ add %esi,%ecx
+ # s <<<= 9
+ rol $9,%ecx
+ # s ^= x13
+ xorl 152(%esp),%ecx
+ # w += v
+ add %edi,%ebx
+ # w <<<= 9
+ rol $9,%ebx
+ # w ^= x7
+ xorl 128(%esp),%ebx
+ # x8 = p
+ movl %eax,132(%esp)
+ # x2 = t
+ movl %edx,108(%esp)
+ # p += x4
+ addl 116(%esp),%eax
+ # x13 = s
+ movl %ecx,152(%esp)
+ # t += x14
+ addl 156(%esp),%edx
+ # x7 = w
+ movl %ebx,128(%esp)
+ # p <<<= 13
+ rol $13,%eax
+ # p ^= x12
+ xorl 148(%esp),%eax
+ # t <<<= 13
+ rol $13,%edx
+ # t ^= x6
+ xorl 124(%esp),%edx
+ # r += s
+ add %ecx,%esi
+ # r <<<= 13
+ rol $13,%esi
+ # r ^= x1
+ xorl 104(%esp),%esi
+ # v += w
+ add %ebx,%edi
+ # v <<<= 13
+ rol $13,%edi
+ # v ^= x11
+ xorl 144(%esp),%edi
+ # x12 = p
+ movl %eax,148(%esp)
+ # x6 = t
+ movl %edx,124(%esp)
+ # p += x8
+ addl 132(%esp),%eax
+ # x1 = r
+ movl %esi,104(%esp)
+ # t += x2
+ addl 108(%esp),%edx
+ # x11 = v
+ movl %edi,144(%esp)
+ # p <<<= 18
+ rol $18,%eax
+ # p ^= x0
+ xorl 100(%esp),%eax
+ # t <<<= 18
+ rol $18,%edx
+ # t ^= x10
+ xorl 140(%esp),%edx
+ # s += r
+ add %esi,%ecx
+ # s <<<= 18
+ rol $18,%ecx
+ # s ^= x5
+ xorl 120(%esp),%ecx
+ # w += v
+ add %edi,%ebx
+ # w <<<= 18
+ rol $18,%ebx
+ # w ^= x15
+ xorl 160(%esp),%ebx
+ # x0 = p
+ movl %eax,100(%esp)
+ # x10 = t
+ movl %edx,140(%esp)
+ # p += x3
+ addl 112(%esp),%eax
+ # p <<<= 7
+ rol $7,%eax
+ # x5 = s
+ movl %ecx,120(%esp)
+ # t += x9
+ addl 136(%esp),%edx
+ # x15 = w
+ movl %ebx,160(%esp)
+ # r = x4
+ movl 116(%esp),%esi
+ # r += s
+ add %ecx,%esi
+ # v = x14
+ movl 156(%esp),%edi
+ # v += w
+ add %ebx,%edi
+ # p ^= x1
+ xorl 104(%esp),%eax
+ # t <<<= 7
+ rol $7,%edx
+ # t ^= x11
+ xorl 144(%esp),%edx
+ # r <<<= 7
+ rol $7,%esi
+ # r ^= x6
+ xorl 124(%esp),%esi
+ # v <<<= 7
+ rol $7,%edi
+ # v ^= x12
+ xorl 148(%esp),%edi
+ # x1 = p
+ movl %eax,104(%esp)
+ # x11 = t
+ movl %edx,144(%esp)
+ # p += x0
+ addl 100(%esp),%eax
+ # x6 = r
+ movl %esi,124(%esp)
+ # t += x10
+ addl 140(%esp),%edx
+ # x12 = v
+ movl %edi,148(%esp)
+ # p <<<= 9
+ rol $9,%eax
+ # p ^= x2
+ xorl 108(%esp),%eax
+ # t <<<= 9
+ rol $9,%edx
+ # t ^= x8
+ xorl 132(%esp),%edx
+ # s += r
+ add %esi,%ecx
+ # s <<<= 9
+ rol $9,%ecx
+ # s ^= x7
+ xorl 128(%esp),%ecx
+ # w += v
+ add %edi,%ebx
+ # w <<<= 9
+ rol $9,%ebx
+ # w ^= x13
+ xorl 152(%esp),%ebx
+ # x2 = p
+ movl %eax,108(%esp)
+ # x8 = t
+ movl %edx,132(%esp)
+ # p += x1
+ addl 104(%esp),%eax
+ # x7 = s
+ movl %ecx,128(%esp)
+ # t += x11
+ addl 144(%esp),%edx
+ # x13 = w
+ movl %ebx,152(%esp)
+ # p <<<= 13
+ rol $13,%eax
+ # p ^= x3
+ xorl 112(%esp),%eax
+ # t <<<= 13
+ rol $13,%edx
+ # t ^= x9
+ xorl 136(%esp),%edx
+ # r += s
+ add %ecx,%esi
+ # r <<<= 13
+ rol $13,%esi
+ # r ^= x4
+ xorl 116(%esp),%esi
+ # v += w
+ add %ebx,%edi
+ # v <<<= 13
+ rol $13,%edi
+ # v ^= x14
+ xorl 156(%esp),%edi
+ # x3 = p
+ movl %eax,112(%esp)
+ # x9 = t
+ movl %edx,136(%esp)
+ # p += x2
+ addl 108(%esp),%eax
+ # x4 = r
+ movl %esi,116(%esp)
+ # t += x8
+ addl 132(%esp),%edx
+ # x14 = v
+ movl %edi,156(%esp)
+ # p <<<= 18
+ rol $18,%eax
+ # p ^= x0
+ xorl 100(%esp),%eax
+ # t <<<= 18
+ rol $18,%edx
+ # t ^= x10
+ xorl 140(%esp),%edx
+ # s += r
+ add %esi,%ecx
+ # s <<<= 18
+ rol $18,%ecx
+ # s ^= x5
+ xorl 120(%esp),%ecx
+ # w += v
+ add %edi,%ebx
+ # w <<<= 18
+ rol $18,%ebx
+ # w ^= x15
+ xorl 160(%esp),%ebx
+ # x0 = p
+ movl %eax,100(%esp)
+ # x10 = t
+ movl %edx,140(%esp)
+ # p += x12
+ addl 148(%esp),%eax
+ # x5 = s
+ movl %ecx,120(%esp)
+ # t += x6
+ addl 124(%esp),%edx
+ # x15 = w
+ movl %ebx,160(%esp)
+ # r = x1
+ movl 104(%esp),%esi
+ # r += s
+ add %ecx,%esi
+ # v = x11
+ movl 144(%esp),%edi
+ # v += w
+ add %ebx,%edi
+ # p <<<= 7
+ rol $7,%eax
+ # p ^= x4
+ xorl 116(%esp),%eax
+ # t <<<= 7
+ rol $7,%edx
+ # t ^= x14
+ xorl 156(%esp),%edx
+ # r <<<= 7
+ rol $7,%esi
+ # r ^= x9
+ xorl 136(%esp),%esi
+ # v <<<= 7
+ rol $7,%edi
+ # v ^= x3
+ xorl 112(%esp),%edi
+ # x4 = p
+ movl %eax,116(%esp)
+ # x14 = t
+ movl %edx,156(%esp)
+ # p += x0
+ addl 100(%esp),%eax
+ # x9 = r
+ movl %esi,136(%esp)
+ # t += x10
+ addl 140(%esp),%edx
+ # x3 = v
+ movl %edi,112(%esp)
+ # p <<<= 9
+ rol $9,%eax
+ # p ^= x8
+ xorl 132(%esp),%eax
+ # t <<<= 9
+ rol $9,%edx
+ # t ^= x2
+ xorl 108(%esp),%edx
+ # s += r
+ add %esi,%ecx
+ # s <<<= 9
+ rol $9,%ecx
+ # s ^= x13
+ xorl 152(%esp),%ecx
+ # w += v
+ add %edi,%ebx
+ # w <<<= 9
+ rol $9,%ebx
+ # w ^= x7
+ xorl 128(%esp),%ebx
+ # x8 = p
+ movl %eax,132(%esp)
+ # x2 = t
+ movl %edx,108(%esp)
+ # p += x4
+ addl 116(%esp),%eax
+ # x13 = s
+ movl %ecx,152(%esp)
+ # t += x14
+ addl 156(%esp),%edx
+ # x7 = w
+ movl %ebx,128(%esp)
+ # p <<<= 13
+ rol $13,%eax
+ # p ^= x12
+ xorl 148(%esp),%eax
+ # t <<<= 13
+ rol $13,%edx
+ # t ^= x6
+ xorl 124(%esp),%edx
+ # r += s
+ add %ecx,%esi
+ # r <<<= 13
+ rol $13,%esi
+ # r ^= x1
+ xorl 104(%esp),%esi
+ # v += w
+ add %ebx,%edi
+ # v <<<= 13
+ rol $13,%edi
+ # v ^= x11
+ xorl 144(%esp),%edi
+ # x12 = p
+ movl %eax,148(%esp)
+ # x6 = t
+ movl %edx,124(%esp)
+ # p += x8
+ addl 132(%esp),%eax
+ # x1 = r
+ movl %esi,104(%esp)
+ # t += x2
+ addl 108(%esp),%edx
+ # x11 = v
+ movl %edi,144(%esp)
+ # p <<<= 18
+ rol $18,%eax
+ # p ^= x0
+ xorl 100(%esp),%eax
+ # t <<<= 18
+ rol $18,%edx
+ # t ^= x10
+ xorl 140(%esp),%edx
+ # s += r
+ add %esi,%ecx
+ # s <<<= 18
+ rol $18,%ecx
+ # s ^= x5
+ xorl 120(%esp),%ecx
+ # w += v
+ add %edi,%ebx
+ # w <<<= 18
+ rol $18,%ebx
+ # w ^= x15
+ xorl 160(%esp),%ebx
+ # x0 = p
+ movl %eax,100(%esp)
+ # x10 = t
+ movl %edx,140(%esp)
+ # p += x3
+ addl 112(%esp),%eax
+ # p <<<= 7
+ rol $7,%eax
+ # x5 = s
+ movl %ecx,120(%esp)
+ # t += x9
+ addl 136(%esp),%edx
+ # x15 = w
+ movl %ebx,160(%esp)
+ # r = x4
+ movl 116(%esp),%esi
+ # r += s
+ add %ecx,%esi
+ # v = x14
+ movl 156(%esp),%edi
+ # v += w
+ add %ebx,%edi
+ # p ^= x1
+ xorl 104(%esp),%eax
+ # t <<<= 7
+ rol $7,%edx
+ # t ^= x11
+ xorl 144(%esp),%edx
+ # r <<<= 7
+ rol $7,%esi
+ # r ^= x6
+ xorl 124(%esp),%esi
+ # v <<<= 7
+ rol $7,%edi
+ # v ^= x12
+ xorl 148(%esp),%edi
+ # x1 = p
+ movl %eax,104(%esp)
+ # x11 = t
+ movl %edx,144(%esp)
+ # p += x0
+ addl 100(%esp),%eax
+ # x6 = r
+ movl %esi,124(%esp)
+ # t += x10
+ addl 140(%esp),%edx
+ # x12 = v
+ movl %edi,148(%esp)
+ # p <<<= 9
+ rol $9,%eax
+ # p ^= x2
+ xorl 108(%esp),%eax
+ # t <<<= 9
+ rol $9,%edx
+ # t ^= x8
+ xorl 132(%esp),%edx
+ # s += r
+ add %esi,%ecx
+ # s <<<= 9
+ rol $9,%ecx
+ # s ^= x7
+ xorl 128(%esp),%ecx
+ # w += v
+ add %edi,%ebx
+ # w <<<= 9
+ rol $9,%ebx
+ # w ^= x13
+ xorl 152(%esp),%ebx
+ # x2 = p
+ movl %eax,108(%esp)
+ # x8 = t
+ movl %edx,132(%esp)
+ # p += x1
+ addl 104(%esp),%eax
+ # x7 = s
+ movl %ecx,128(%esp)
+ # t += x11
+ addl 144(%esp),%edx
+ # x13 = w
+ movl %ebx,152(%esp)
+ # p <<<= 13
+ rol $13,%eax
+ # p ^= x3
+ xorl 112(%esp),%eax
+ # t <<<= 13
+ rol $13,%edx
+ # t ^= x9
+ xorl 136(%esp),%edx
+ # r += s
+ add %ecx,%esi
+ # r <<<= 13
+ rol $13,%esi
+ # r ^= x4
+ xorl 116(%esp),%esi
+ # v += w
+ add %ebx,%edi
+ # v <<<= 13
+ rol $13,%edi
+ # v ^= x14
+ xorl 156(%esp),%edi
+ # x3 = p
+ movl %eax,112(%esp)
+ # x9 = t
+ movl %edx,136(%esp)
+ # p += x2
+ addl 108(%esp),%eax
+ # x4 = r
+ movl %esi,116(%esp)
+ # t += x8
+ addl 132(%esp),%edx
+ # x14 = v
+ movl %edi,156(%esp)
+ # p <<<= 18
+ rol $18,%eax
+ # p ^= x0
+ xorl 100(%esp),%eax
+ # t <<<= 18
+ rol $18,%edx
+ # t ^= x10
+ xorl 140(%esp),%edx
+ # s += r
+ add %esi,%ecx
+ # s <<<= 18
+ rol $18,%ecx
+ # s ^= x5
+ xorl 120(%esp),%ecx
+ # w += v
+ add %edi,%ebx
+ # w <<<= 18
+ rol $18,%ebx
+ # w ^= x15
+ xorl 160(%esp),%ebx
+ # i -= 4
+ sub $4,%ebp
+ # goto mainloop if unsigned >
+ ja ._mainloop
+ # x0 = p
+ movl %eax,100(%esp)
+ # x5 = s
+ movl %ecx,120(%esp)
+ # x10 = t
+ movl %edx,140(%esp)
+ # x15 = w
+ movl %ebx,160(%esp)
+ # out = out_backup
+ movl 72(%esp),%edi
+ # m = m_backup
+ movl 68(%esp),%esi
+ # in0 = x0
+ movl 100(%esp),%eax
+ # in1 = x1
+ movl 104(%esp),%ecx
+ # in0 += j0
+ addl 164(%esp),%eax
+ # in1 += j1
+ addl 168(%esp),%ecx
+ # in0 ^= *(uint32 *) (m + 0)
+ xorl 0(%esi),%eax
+ # in1 ^= *(uint32 *) (m + 4)
+ xorl 4(%esi),%ecx
+ # *(uint32 *) (out + 0) = in0
+ movl %eax,0(%edi)
+ # *(uint32 *) (out + 4) = in1
+ movl %ecx,4(%edi)
+ # in2 = x2
+ movl 108(%esp),%eax
+ # in3 = x3
+ movl 112(%esp),%ecx
+ # in2 += j2
+ addl 172(%esp),%eax
+ # in3 += j3
+ addl 176(%esp),%ecx
+ # in2 ^= *(uint32 *) (m + 8)
+ xorl 8(%esi),%eax
+ # in3 ^= *(uint32 *) (m + 12)
+ xorl 12(%esi),%ecx
+ # *(uint32 *) (out + 8) = in2
+ movl %eax,8(%edi)
+ # *(uint32 *) (out + 12) = in3
+ movl %ecx,12(%edi)
+ # in4 = x4
+ movl 116(%esp),%eax
+ # in5 = x5
+ movl 120(%esp),%ecx
+ # in4 += j4
+ addl 180(%esp),%eax
+ # in5 += j5
+ addl 184(%esp),%ecx
+ # in4 ^= *(uint32 *) (m + 16)
+ xorl 16(%esi),%eax
+ # in5 ^= *(uint32 *) (m + 20)
+ xorl 20(%esi),%ecx
+ # *(uint32 *) (out + 16) = in4
+ movl %eax,16(%edi)
+ # *(uint32 *) (out + 20) = in5
+ movl %ecx,20(%edi)
+ # in6 = x6
+ movl 124(%esp),%eax
+ # in7 = x7
+ movl 128(%esp),%ecx
+ # in6 += j6
+ addl 188(%esp),%eax
+ # in7 += j7
+ addl 192(%esp),%ecx
+ # in6 ^= *(uint32 *) (m + 24)
+ xorl 24(%esi),%eax
+ # in7 ^= *(uint32 *) (m + 28)
+ xorl 28(%esi),%ecx
+ # *(uint32 *) (out + 24) = in6
+ movl %eax,24(%edi)
+ # *(uint32 *) (out + 28) = in7
+ movl %ecx,28(%edi)
+ # in8 = x8
+ movl 132(%esp),%eax
+ # in9 = x9
+ movl 136(%esp),%ecx
+ # in8 += j8
+ addl 196(%esp),%eax
+ # in9 += j9
+ addl 200(%esp),%ecx
+ # in8 ^= *(uint32 *) (m + 32)
+ xorl 32(%esi),%eax
+ # in9 ^= *(uint32 *) (m + 36)
+ xorl 36(%esi),%ecx
+ # *(uint32 *) (out + 32) = in8
+ movl %eax,32(%edi)
+ # *(uint32 *) (out + 36) = in9
+ movl %ecx,36(%edi)
+ # in10 = x10
+ movl 140(%esp),%eax
+ # in11 = x11
+ movl 144(%esp),%ecx
+ # in10 += j10
+ addl 204(%esp),%eax
+ # in11 += j11
+ addl 208(%esp),%ecx
+ # in10 ^= *(uint32 *) (m + 40)
+ xorl 40(%esi),%eax
+ # in11 ^= *(uint32 *) (m + 44)
+ xorl 44(%esi),%ecx
+ # *(uint32 *) (out + 40) = in10
+ movl %eax,40(%edi)
+ # *(uint32 *) (out + 44) = in11
+ movl %ecx,44(%edi)
+ # in12 = x12
+ movl 148(%esp),%eax
+ # in13 = x13
+ movl 152(%esp),%ecx
+ # in12 += j12
+ addl 212(%esp),%eax
+ # in13 += j13
+ addl 216(%esp),%ecx
+ # in12 ^= *(uint32 *) (m + 48)
+ xorl 48(%esi),%eax
+ # in13 ^= *(uint32 *) (m + 52)
+ xorl 52(%esi),%ecx
+ # *(uint32 *) (out + 48) = in12
+ movl %eax,48(%edi)
+ # *(uint32 *) (out + 52) = in13
+ movl %ecx,52(%edi)
+ # in14 = x14
+ movl 156(%esp),%eax
+ # in15 = x15
+ movl 160(%esp),%ecx
+ # in14 += j14
+ addl 220(%esp),%eax
+ # in15 += j15
+ addl 224(%esp),%ecx
+ # in14 ^= *(uint32 *) (m + 56)
+ xorl 56(%esi),%eax
+ # in15 ^= *(uint32 *) (m + 60)
+ xorl 60(%esi),%ecx
+ # *(uint32 *) (out + 56) = in14
+ movl %eax,56(%edi)
+ # *(uint32 *) (out + 60) = in15
+ movl %ecx,60(%edi)
+ # bytes = bytes_backup
+ movl 76(%esp),%ebx
+ # in8 = j8
+ movl 196(%esp),%eax
+ # in9 = j9
+ movl 200(%esp),%ecx
+ # in8 += 1
+ add $1,%eax
+ # in9 += 0 + carry
+ adc $0,%ecx
+ # j8 = in8
+ movl %eax,196(%esp)
+ # j9 = in9
+ movl %ecx,200(%esp)
+ # bytes - 64
+ cmp $64,%ebx
+ # goto bytesatleast65 if unsigned>
+ ja ._bytesatleast65
+ # goto bytesatleast64 if unsigned>=
+ jae ._bytesatleast64
+ # m = out
+ mov %edi,%esi
+ # out = ctarget
+ movl 228(%esp),%edi
+ # i = bytes
+ mov %ebx,%ecx
+ # while (i) { *out++ = *m++; --i }
+ rep movsb
+._bytesatleast64:
+ # x = x_backup
+ movl 64(%esp),%eax
+ # in8 = j8
+ movl 196(%esp),%ecx
+ # in9 = j9
+ movl 200(%esp),%edx
+ # *(uint32 *) (x + 32) = in8
+ movl %ecx,32(%eax)
+ # *(uint32 *) (x + 36) = in9
+ movl %edx,36(%eax)
+._done:
+ # eax = eax_stack
+ movl 80(%esp),%eax
+ # ebx = ebx_stack
+ movl 84(%esp),%ebx
+ # esi = esi_stack
+ movl 88(%esp),%esi
+ # edi = edi_stack
+ movl 92(%esp),%edi
+ # ebp = ebp_stack
+ movl 96(%esp),%ebp
+ # leave
+ add %eax,%esp
+ ret
+._bytesatleast65:
+ # bytes -= 64
+ sub $64,%ebx
+ # out += 64
+ add $64,%edi
+ # m += 64
+ add $64,%esi
+ # goto bytesatleast1
+ jmp ._bytesatleast1
+ENDPROC(salsa20_encrypt_bytes)
+
+# enter salsa20_keysetup
+ENTRY(salsa20_keysetup)
+ mov %esp,%eax
+ and $31,%eax
+ add $256,%eax
+ sub %eax,%esp
+ # eax_stack = eax
+ movl %eax,64(%esp)
+ # ebx_stack = ebx
+ movl %ebx,68(%esp)
+ # esi_stack = esi
+ movl %esi,72(%esp)
+ # edi_stack = edi
+ movl %edi,76(%esp)
+ # ebp_stack = ebp
+ movl %ebp,80(%esp)
+ # k = arg2
+ movl 8(%esp,%eax),%ecx
+ # kbits = arg3
+ movl 12(%esp,%eax),%edx
+ # x = arg1
+ movl 4(%esp,%eax),%eax
+ # in1 = *(uint32 *) (k + 0)
+ movl 0(%ecx),%ebx
+ # in2 = *(uint32 *) (k + 4)
+ movl 4(%ecx),%esi
+ # in3 = *(uint32 *) (k + 8)
+ movl 8(%ecx),%edi
+ # in4 = *(uint32 *) (k + 12)
+ movl 12(%ecx),%ebp
+ # *(uint32 *) (x + 4) = in1
+ movl %ebx,4(%eax)
+ # *(uint32 *) (x + 8) = in2
+ movl %esi,8(%eax)
+ # *(uint32 *) (x + 12) = in3
+ movl %edi,12(%eax)
+ # *(uint32 *) (x + 16) = in4
+ movl %ebp,16(%eax)
+ # kbits - 256
+ cmp $256,%edx
+ # goto kbits128 if unsigned<
+ jb ._kbits128
+._kbits256:
+ # in11 = *(uint32 *) (k + 16)
+ movl 16(%ecx),%edx
+ # in12 = *(uint32 *) (k + 20)
+ movl 20(%ecx),%ebx
+ # in13 = *(uint32 *) (k + 24)
+ movl 24(%ecx),%esi
+ # in14 = *(uint32 *) (k + 28)
+ movl 28(%ecx),%ecx
+ # *(uint32 *) (x + 44) = in11
+ movl %edx,44(%eax)
+ # *(uint32 *) (x + 48) = in12
+ movl %ebx,48(%eax)
+ # *(uint32 *) (x + 52) = in13
+ movl %esi,52(%eax)
+ # *(uint32 *) (x + 56) = in14
+ movl %ecx,56(%eax)
+ # in0 = 1634760805
+ mov $1634760805,%ecx
+ # in5 = 857760878
+ mov $857760878,%edx
+ # in10 = 2036477234
+ mov $2036477234,%ebx
+ # in15 = 1797285236
+ mov $1797285236,%esi
+ # *(uint32 *) (x + 0) = in0
+ movl %ecx,0(%eax)
+ # *(uint32 *) (x + 20) = in5
+ movl %edx,20(%eax)
+ # *(uint32 *) (x + 40) = in10
+ movl %ebx,40(%eax)
+ # *(uint32 *) (x + 60) = in15
+ movl %esi,60(%eax)
+ # goto keysetupdone
+ jmp ._keysetupdone
+._kbits128:
+ # in11 = *(uint32 *) (k + 0)
+ movl 0(%ecx),%edx
+ # in12 = *(uint32 *) (k + 4)
+ movl 4(%ecx),%ebx
+ # in13 = *(uint32 *) (k + 8)
+ movl 8(%ecx),%esi
+ # in14 = *(uint32 *) (k + 12)
+ movl 12(%ecx),%ecx
+ # *(uint32 *) (x + 44) = in11
+ movl %edx,44(%eax)
+ # *(uint32 *) (x + 48) = in12
+ movl %ebx,48(%eax)
+ # *(uint32 *) (x + 52) = in13
+ movl %esi,52(%eax)
+ # *(uint32 *) (x + 56) = in14
+ movl %ecx,56(%eax)
+ # in0 = 1634760805
+ mov $1634760805,%ecx
+ # in5 = 824206446
+ mov $824206446,%edx
+ # in10 = 2036477238
+ mov $2036477238,%ebx
+ # in15 = 1797285236
+ mov $1797285236,%esi
+ # *(uint32 *) (x + 0) = in0
+ movl %ecx,0(%eax)
+ # *(uint32 *) (x + 20) = in5
+ movl %edx,20(%eax)
+ # *(uint32 *) (x + 40) = in10
+ movl %ebx,40(%eax)
+ # *(uint32 *) (x + 60) = in15
+ movl %esi,60(%eax)
+._keysetupdone:
+ # eax = eax_stack
+ movl 64(%esp),%eax
+ # ebx = ebx_stack
+ movl 68(%esp),%ebx
+ # esi = esi_stack
+ movl 72(%esp),%esi
+ # edi = edi_stack
+ movl 76(%esp),%edi
+ # ebp = ebp_stack
+ movl 80(%esp),%ebp
+ # leave
+ add %eax,%esp
+ ret
+ENDPROC(salsa20_keysetup)
+
+# enter salsa20_ivsetup
+ENTRY(salsa20_ivsetup)
+ mov %esp,%eax
+ and $31,%eax
+ add $256,%eax
+ sub %eax,%esp
+ # eax_stack = eax
+ movl %eax,64(%esp)
+ # ebx_stack = ebx
+ movl %ebx,68(%esp)
+ # esi_stack = esi
+ movl %esi,72(%esp)
+ # edi_stack = edi
+ movl %edi,76(%esp)
+ # ebp_stack = ebp
+ movl %ebp,80(%esp)
+ # iv = arg2
+ movl 8(%esp,%eax),%ecx
+ # x = arg1
+ movl 4(%esp,%eax),%eax
+ # in6 = *(uint32 *) (iv + 0)
+ movl 0(%ecx),%edx
+ # in7 = *(uint32 *) (iv + 4)
+ movl 4(%ecx),%ecx
+ # in8 = 0
+ mov $0,%ebx
+ # in9 = 0
+ mov $0,%esi
+ # *(uint32 *) (x + 24) = in6
+ movl %edx,24(%eax)
+ # *(uint32 *) (x + 28) = in7
+ movl %ecx,28(%eax)
+ # *(uint32 *) (x + 32) = in8
+ movl %ebx,32(%eax)
+ # *(uint32 *) (x + 36) = in9
+ movl %esi,36(%eax)
+ # eax = eax_stack
+ movl 64(%esp),%eax
+ # ebx = ebx_stack
+ movl 68(%esp),%ebx
+ # esi = esi_stack
+ movl 72(%esp),%esi
+ # edi = edi_stack
+ movl 76(%esp),%edi
+ # ebp = ebp_stack
+ movl 80(%esp),%ebp
+ # leave
+ add %eax,%esp
+ ret
+ENDPROC(salsa20_ivsetup)