diff options
author | André Fabian Silva Delgado <emulatorman@parabola.nu> | 2015-08-05 17:04:01 -0300 |
---|---|---|
committer | André Fabian Silva Delgado <emulatorman@parabola.nu> | 2015-08-05 17:04:01 -0300 |
commit | 57f0f512b273f60d52568b8c6b77e17f5636edc0 (patch) | |
tree | 5e910f0e82173f4ef4f51111366a3f1299037a7b /arch/x86/crypto/salsa20-i586-asm_32.S |
Initial import
Diffstat (limited to 'arch/x86/crypto/salsa20-i586-asm_32.S')
-rw-r--r-- | arch/x86/crypto/salsa20-i586-asm_32.S | 1114 |
1 files changed, 1114 insertions, 0 deletions
diff --git a/arch/x86/crypto/salsa20-i586-asm_32.S b/arch/x86/crypto/salsa20-i586-asm_32.S new file mode 100644 index 000000000..329452b8f --- /dev/null +++ b/arch/x86/crypto/salsa20-i586-asm_32.S @@ -0,0 +1,1114 @@ +# salsa20_pm.s version 20051229 +# D. J. Bernstein +# Public domain. + +#include <linux/linkage.h> + +.text + +# enter salsa20_encrypt_bytes +ENTRY(salsa20_encrypt_bytes) + mov %esp,%eax + and $31,%eax + add $256,%eax + sub %eax,%esp + # eax_stack = eax + movl %eax,80(%esp) + # ebx_stack = ebx + movl %ebx,84(%esp) + # esi_stack = esi + movl %esi,88(%esp) + # edi_stack = edi + movl %edi,92(%esp) + # ebp_stack = ebp + movl %ebp,96(%esp) + # x = arg1 + movl 4(%esp,%eax),%edx + # m = arg2 + movl 8(%esp,%eax),%esi + # out = arg3 + movl 12(%esp,%eax),%edi + # bytes = arg4 + movl 16(%esp,%eax),%ebx + # bytes -= 0 + sub $0,%ebx + # goto done if unsigned<= + jbe ._done +._start: + # in0 = *(uint32 *) (x + 0) + movl 0(%edx),%eax + # in1 = *(uint32 *) (x + 4) + movl 4(%edx),%ecx + # in2 = *(uint32 *) (x + 8) + movl 8(%edx),%ebp + # j0 = in0 + movl %eax,164(%esp) + # in3 = *(uint32 *) (x + 12) + movl 12(%edx),%eax + # j1 = in1 + movl %ecx,168(%esp) + # in4 = *(uint32 *) (x + 16) + movl 16(%edx),%ecx + # j2 = in2 + movl %ebp,172(%esp) + # in5 = *(uint32 *) (x + 20) + movl 20(%edx),%ebp + # j3 = in3 + movl %eax,176(%esp) + # in6 = *(uint32 *) (x + 24) + movl 24(%edx),%eax + # j4 = in4 + movl %ecx,180(%esp) + # in7 = *(uint32 *) (x + 28) + movl 28(%edx),%ecx + # j5 = in5 + movl %ebp,184(%esp) + # in8 = *(uint32 *) (x + 32) + movl 32(%edx),%ebp + # j6 = in6 + movl %eax,188(%esp) + # in9 = *(uint32 *) (x + 36) + movl 36(%edx),%eax + # j7 = in7 + movl %ecx,192(%esp) + # in10 = *(uint32 *) (x + 40) + movl 40(%edx),%ecx + # j8 = in8 + movl %ebp,196(%esp) + # in11 = *(uint32 *) (x + 44) + movl 44(%edx),%ebp + # j9 = in9 + movl %eax,200(%esp) + # in12 = *(uint32 *) (x + 48) + movl 48(%edx),%eax + # j10 = in10 + movl %ecx,204(%esp) + # in13 = *(uint32 *) (x + 52) + movl 52(%edx),%ecx + # j11 = in11 + movl %ebp,208(%esp) + # in14 = *(uint32 *) (x + 56) + movl 56(%edx),%ebp + # j12 = in12 + movl %eax,212(%esp) + # in15 = *(uint32 *) (x + 60) + movl 60(%edx),%eax + # j13 = in13 + movl %ecx,216(%esp) + # j14 = in14 + movl %ebp,220(%esp) + # j15 = in15 + movl %eax,224(%esp) + # x_backup = x + movl %edx,64(%esp) +._bytesatleast1: + # bytes - 64 + cmp $64,%ebx + # goto nocopy if unsigned>= + jae ._nocopy + # ctarget = out + movl %edi,228(%esp) + # out = &tmp + leal 0(%esp),%edi + # i = bytes + mov %ebx,%ecx + # while (i) { *out++ = *m++; --i } + rep movsb + # out = &tmp + leal 0(%esp),%edi + # m = &tmp + leal 0(%esp),%esi +._nocopy: + # out_backup = out + movl %edi,72(%esp) + # m_backup = m + movl %esi,68(%esp) + # bytes_backup = bytes + movl %ebx,76(%esp) + # in0 = j0 + movl 164(%esp),%eax + # in1 = j1 + movl 168(%esp),%ecx + # in2 = j2 + movl 172(%esp),%edx + # in3 = j3 + movl 176(%esp),%ebx + # x0 = in0 + movl %eax,100(%esp) + # x1 = in1 + movl %ecx,104(%esp) + # x2 = in2 + movl %edx,108(%esp) + # x3 = in3 + movl %ebx,112(%esp) + # in4 = j4 + movl 180(%esp),%eax + # in5 = j5 + movl 184(%esp),%ecx + # in6 = j6 + movl 188(%esp),%edx + # in7 = j7 + movl 192(%esp),%ebx + # x4 = in4 + movl %eax,116(%esp) + # x5 = in5 + movl %ecx,120(%esp) + # x6 = in6 + movl %edx,124(%esp) + # x7 = in7 + movl %ebx,128(%esp) + # in8 = j8 + movl 196(%esp),%eax + # in9 = j9 + movl 200(%esp),%ecx + # in10 = j10 + movl 204(%esp),%edx + # in11 = j11 + movl 208(%esp),%ebx + # x8 = in8 + movl %eax,132(%esp) + # x9 = in9 + movl %ecx,136(%esp) + # x10 = in10 + movl %edx,140(%esp) + # x11 = in11 + movl %ebx,144(%esp) + # in12 = j12 + movl 212(%esp),%eax + # in13 = j13 + movl 216(%esp),%ecx + # in14 = j14 + movl 220(%esp),%edx + # in15 = j15 + movl 224(%esp),%ebx + # x12 = in12 + movl %eax,148(%esp) + # x13 = in13 + movl %ecx,152(%esp) + # x14 = in14 + movl %edx,156(%esp) + # x15 = in15 + movl %ebx,160(%esp) + # i = 20 + mov $20,%ebp + # p = x0 + movl 100(%esp),%eax + # s = x5 + movl 120(%esp),%ecx + # t = x10 + movl 140(%esp),%edx + # w = x15 + movl 160(%esp),%ebx +._mainloop: + # x0 = p + movl %eax,100(%esp) + # x10 = t + movl %edx,140(%esp) + # p += x12 + addl 148(%esp),%eax + # x5 = s + movl %ecx,120(%esp) + # t += x6 + addl 124(%esp),%edx + # x15 = w + movl %ebx,160(%esp) + # r = x1 + movl 104(%esp),%esi + # r += s + add %ecx,%esi + # v = x11 + movl 144(%esp),%edi + # v += w + add %ebx,%edi + # p <<<= 7 + rol $7,%eax + # p ^= x4 + xorl 116(%esp),%eax + # t <<<= 7 + rol $7,%edx + # t ^= x14 + xorl 156(%esp),%edx + # r <<<= 7 + rol $7,%esi + # r ^= x9 + xorl 136(%esp),%esi + # v <<<= 7 + rol $7,%edi + # v ^= x3 + xorl 112(%esp),%edi + # x4 = p + movl %eax,116(%esp) + # x14 = t + movl %edx,156(%esp) + # p += x0 + addl 100(%esp),%eax + # x9 = r + movl %esi,136(%esp) + # t += x10 + addl 140(%esp),%edx + # x3 = v + movl %edi,112(%esp) + # p <<<= 9 + rol $9,%eax + # p ^= x8 + xorl 132(%esp),%eax + # t <<<= 9 + rol $9,%edx + # t ^= x2 + xorl 108(%esp),%edx + # s += r + add %esi,%ecx + # s <<<= 9 + rol $9,%ecx + # s ^= x13 + xorl 152(%esp),%ecx + # w += v + add %edi,%ebx + # w <<<= 9 + rol $9,%ebx + # w ^= x7 + xorl 128(%esp),%ebx + # x8 = p + movl %eax,132(%esp) + # x2 = t + movl %edx,108(%esp) + # p += x4 + addl 116(%esp),%eax + # x13 = s + movl %ecx,152(%esp) + # t += x14 + addl 156(%esp),%edx + # x7 = w + movl %ebx,128(%esp) + # p <<<= 13 + rol $13,%eax + # p ^= x12 + xorl 148(%esp),%eax + # t <<<= 13 + rol $13,%edx + # t ^= x6 + xorl 124(%esp),%edx + # r += s + add %ecx,%esi + # r <<<= 13 + rol $13,%esi + # r ^= x1 + xorl 104(%esp),%esi + # v += w + add %ebx,%edi + # v <<<= 13 + rol $13,%edi + # v ^= x11 + xorl 144(%esp),%edi + # x12 = p + movl %eax,148(%esp) + # x6 = t + movl %edx,124(%esp) + # p += x8 + addl 132(%esp),%eax + # x1 = r + movl %esi,104(%esp) + # t += x2 + addl 108(%esp),%edx + # x11 = v + movl %edi,144(%esp) + # p <<<= 18 + rol $18,%eax + # p ^= x0 + xorl 100(%esp),%eax + # t <<<= 18 + rol $18,%edx + # t ^= x10 + xorl 140(%esp),%edx + # s += r + add %esi,%ecx + # s <<<= 18 + rol $18,%ecx + # s ^= x5 + xorl 120(%esp),%ecx + # w += v + add %edi,%ebx + # w <<<= 18 + rol $18,%ebx + # w ^= x15 + xorl 160(%esp),%ebx + # x0 = p + movl %eax,100(%esp) + # x10 = t + movl %edx,140(%esp) + # p += x3 + addl 112(%esp),%eax + # p <<<= 7 + rol $7,%eax + # x5 = s + movl %ecx,120(%esp) + # t += x9 + addl 136(%esp),%edx + # x15 = w + movl %ebx,160(%esp) + # r = x4 + movl 116(%esp),%esi + # r += s + add %ecx,%esi + # v = x14 + movl 156(%esp),%edi + # v += w + add %ebx,%edi + # p ^= x1 + xorl 104(%esp),%eax + # t <<<= 7 + rol $7,%edx + # t ^= x11 + xorl 144(%esp),%edx + # r <<<= 7 + rol $7,%esi + # r ^= x6 + xorl 124(%esp),%esi + # v <<<= 7 + rol $7,%edi + # v ^= x12 + xorl 148(%esp),%edi + # x1 = p + movl %eax,104(%esp) + # x11 = t + movl %edx,144(%esp) + # p += x0 + addl 100(%esp),%eax + # x6 = r + movl %esi,124(%esp) + # t += x10 + addl 140(%esp),%edx + # x12 = v + movl %edi,148(%esp) + # p <<<= 9 + rol $9,%eax + # p ^= x2 + xorl 108(%esp),%eax + # t <<<= 9 + rol $9,%edx + # t ^= x8 + xorl 132(%esp),%edx + # s += r + add %esi,%ecx + # s <<<= 9 + rol $9,%ecx + # s ^= x7 + xorl 128(%esp),%ecx + # w += v + add %edi,%ebx + # w <<<= 9 + rol $9,%ebx + # w ^= x13 + xorl 152(%esp),%ebx + # x2 = p + movl %eax,108(%esp) + # x8 = t + movl %edx,132(%esp) + # p += x1 + addl 104(%esp),%eax + # x7 = s + movl %ecx,128(%esp) + # t += x11 + addl 144(%esp),%edx + # x13 = w + movl %ebx,152(%esp) + # p <<<= 13 + rol $13,%eax + # p ^= x3 + xorl 112(%esp),%eax + # t <<<= 13 + rol $13,%edx + # t ^= x9 + xorl 136(%esp),%edx + # r += s + add %ecx,%esi + # r <<<= 13 + rol $13,%esi + # r ^= x4 + xorl 116(%esp),%esi + # v += w + add %ebx,%edi + # v <<<= 13 + rol $13,%edi + # v ^= x14 + xorl 156(%esp),%edi + # x3 = p + movl %eax,112(%esp) + # x9 = t + movl %edx,136(%esp) + # p += x2 + addl 108(%esp),%eax + # x4 = r + movl %esi,116(%esp) + # t += x8 + addl 132(%esp),%edx + # x14 = v + movl %edi,156(%esp) + # p <<<= 18 + rol $18,%eax + # p ^= x0 + xorl 100(%esp),%eax + # t <<<= 18 + rol $18,%edx + # t ^= x10 + xorl 140(%esp),%edx + # s += r + add %esi,%ecx + # s <<<= 18 + rol $18,%ecx + # s ^= x5 + xorl 120(%esp),%ecx + # w += v + add %edi,%ebx + # w <<<= 18 + rol $18,%ebx + # w ^= x15 + xorl 160(%esp),%ebx + # x0 = p + movl %eax,100(%esp) + # x10 = t + movl %edx,140(%esp) + # p += x12 + addl 148(%esp),%eax + # x5 = s + movl %ecx,120(%esp) + # t += x6 + addl 124(%esp),%edx + # x15 = w + movl %ebx,160(%esp) + # r = x1 + movl 104(%esp),%esi + # r += s + add %ecx,%esi + # v = x11 + movl 144(%esp),%edi + # v += w + add %ebx,%edi + # p <<<= 7 + rol $7,%eax + # p ^= x4 + xorl 116(%esp),%eax + # t <<<= 7 + rol $7,%edx + # t ^= x14 + xorl 156(%esp),%edx + # r <<<= 7 + rol $7,%esi + # r ^= x9 + xorl 136(%esp),%esi + # v <<<= 7 + rol $7,%edi + # v ^= x3 + xorl 112(%esp),%edi + # x4 = p + movl %eax,116(%esp) + # x14 = t + movl %edx,156(%esp) + # p += x0 + addl 100(%esp),%eax + # x9 = r + movl %esi,136(%esp) + # t += x10 + addl 140(%esp),%edx + # x3 = v + movl %edi,112(%esp) + # p <<<= 9 + rol $9,%eax + # p ^= x8 + xorl 132(%esp),%eax + # t <<<= 9 + rol $9,%edx + # t ^= x2 + xorl 108(%esp),%edx + # s += r + add %esi,%ecx + # s <<<= 9 + rol $9,%ecx + # s ^= x13 + xorl 152(%esp),%ecx + # w += v + add %edi,%ebx + # w <<<= 9 + rol $9,%ebx + # w ^= x7 + xorl 128(%esp),%ebx + # x8 = p + movl %eax,132(%esp) + # x2 = t + movl %edx,108(%esp) + # p += x4 + addl 116(%esp),%eax + # x13 = s + movl %ecx,152(%esp) + # t += x14 + addl 156(%esp),%edx + # x7 = w + movl %ebx,128(%esp) + # p <<<= 13 + rol $13,%eax + # p ^= x12 + xorl 148(%esp),%eax + # t <<<= 13 + rol $13,%edx + # t ^= x6 + xorl 124(%esp),%edx + # r += s + add %ecx,%esi + # r <<<= 13 + rol $13,%esi + # r ^= x1 + xorl 104(%esp),%esi + # v += w + add %ebx,%edi + # v <<<= 13 + rol $13,%edi + # v ^= x11 + xorl 144(%esp),%edi + # x12 = p + movl %eax,148(%esp) + # x6 = t + movl %edx,124(%esp) + # p += x8 + addl 132(%esp),%eax + # x1 = r + movl %esi,104(%esp) + # t += x2 + addl 108(%esp),%edx + # x11 = v + movl %edi,144(%esp) + # p <<<= 18 + rol $18,%eax + # p ^= x0 + xorl 100(%esp),%eax + # t <<<= 18 + rol $18,%edx + # t ^= x10 + xorl 140(%esp),%edx + # s += r + add %esi,%ecx + # s <<<= 18 + rol $18,%ecx + # s ^= x5 + xorl 120(%esp),%ecx + # w += v + add %edi,%ebx + # w <<<= 18 + rol $18,%ebx + # w ^= x15 + xorl 160(%esp),%ebx + # x0 = p + movl %eax,100(%esp) + # x10 = t + movl %edx,140(%esp) + # p += x3 + addl 112(%esp),%eax + # p <<<= 7 + rol $7,%eax + # x5 = s + movl %ecx,120(%esp) + # t += x9 + addl 136(%esp),%edx + # x15 = w + movl %ebx,160(%esp) + # r = x4 + movl 116(%esp),%esi + # r += s + add %ecx,%esi + # v = x14 + movl 156(%esp),%edi + # v += w + add %ebx,%edi + # p ^= x1 + xorl 104(%esp),%eax + # t <<<= 7 + rol $7,%edx + # t ^= x11 + xorl 144(%esp),%edx + # r <<<= 7 + rol $7,%esi + # r ^= x6 + xorl 124(%esp),%esi + # v <<<= 7 + rol $7,%edi + # v ^= x12 + xorl 148(%esp),%edi + # x1 = p + movl %eax,104(%esp) + # x11 = t + movl %edx,144(%esp) + # p += x0 + addl 100(%esp),%eax + # x6 = r + movl %esi,124(%esp) + # t += x10 + addl 140(%esp),%edx + # x12 = v + movl %edi,148(%esp) + # p <<<= 9 + rol $9,%eax + # p ^= x2 + xorl 108(%esp),%eax + # t <<<= 9 + rol $9,%edx + # t ^= x8 + xorl 132(%esp),%edx + # s += r + add %esi,%ecx + # s <<<= 9 + rol $9,%ecx + # s ^= x7 + xorl 128(%esp),%ecx + # w += v + add %edi,%ebx + # w <<<= 9 + rol $9,%ebx + # w ^= x13 + xorl 152(%esp),%ebx + # x2 = p + movl %eax,108(%esp) + # x8 = t + movl %edx,132(%esp) + # p += x1 + addl 104(%esp),%eax + # x7 = s + movl %ecx,128(%esp) + # t += x11 + addl 144(%esp),%edx + # x13 = w + movl %ebx,152(%esp) + # p <<<= 13 + rol $13,%eax + # p ^= x3 + xorl 112(%esp),%eax + # t <<<= 13 + rol $13,%edx + # t ^= x9 + xorl 136(%esp),%edx + # r += s + add %ecx,%esi + # r <<<= 13 + rol $13,%esi + # r ^= x4 + xorl 116(%esp),%esi + # v += w + add %ebx,%edi + # v <<<= 13 + rol $13,%edi + # v ^= x14 + xorl 156(%esp),%edi + # x3 = p + movl %eax,112(%esp) + # x9 = t + movl %edx,136(%esp) + # p += x2 + addl 108(%esp),%eax + # x4 = r + movl %esi,116(%esp) + # t += x8 + addl 132(%esp),%edx + # x14 = v + movl %edi,156(%esp) + # p <<<= 18 + rol $18,%eax + # p ^= x0 + xorl 100(%esp),%eax + # t <<<= 18 + rol $18,%edx + # t ^= x10 + xorl 140(%esp),%edx + # s += r + add %esi,%ecx + # s <<<= 18 + rol $18,%ecx + # s ^= x5 + xorl 120(%esp),%ecx + # w += v + add %edi,%ebx + # w <<<= 18 + rol $18,%ebx + # w ^= x15 + xorl 160(%esp),%ebx + # i -= 4 + sub $4,%ebp + # goto mainloop if unsigned > + ja ._mainloop + # x0 = p + movl %eax,100(%esp) + # x5 = s + movl %ecx,120(%esp) + # x10 = t + movl %edx,140(%esp) + # x15 = w + movl %ebx,160(%esp) + # out = out_backup + movl 72(%esp),%edi + # m = m_backup + movl 68(%esp),%esi + # in0 = x0 + movl 100(%esp),%eax + # in1 = x1 + movl 104(%esp),%ecx + # in0 += j0 + addl 164(%esp),%eax + # in1 += j1 + addl 168(%esp),%ecx + # in0 ^= *(uint32 *) (m + 0) + xorl 0(%esi),%eax + # in1 ^= *(uint32 *) (m + 4) + xorl 4(%esi),%ecx + # *(uint32 *) (out + 0) = in0 + movl %eax,0(%edi) + # *(uint32 *) (out + 4) = in1 + movl %ecx,4(%edi) + # in2 = x2 + movl 108(%esp),%eax + # in3 = x3 + movl 112(%esp),%ecx + # in2 += j2 + addl 172(%esp),%eax + # in3 += j3 + addl 176(%esp),%ecx + # in2 ^= *(uint32 *) (m + 8) + xorl 8(%esi),%eax + # in3 ^= *(uint32 *) (m + 12) + xorl 12(%esi),%ecx + # *(uint32 *) (out + 8) = in2 + movl %eax,8(%edi) + # *(uint32 *) (out + 12) = in3 + movl %ecx,12(%edi) + # in4 = x4 + movl 116(%esp),%eax + # in5 = x5 + movl 120(%esp),%ecx + # in4 += j4 + addl 180(%esp),%eax + # in5 += j5 + addl 184(%esp),%ecx + # in4 ^= *(uint32 *) (m + 16) + xorl 16(%esi),%eax + # in5 ^= *(uint32 *) (m + 20) + xorl 20(%esi),%ecx + # *(uint32 *) (out + 16) = in4 + movl %eax,16(%edi) + # *(uint32 *) (out + 20) = in5 + movl %ecx,20(%edi) + # in6 = x6 + movl 124(%esp),%eax + # in7 = x7 + movl 128(%esp),%ecx + # in6 += j6 + addl 188(%esp),%eax + # in7 += j7 + addl 192(%esp),%ecx + # in6 ^= *(uint32 *) (m + 24) + xorl 24(%esi),%eax + # in7 ^= *(uint32 *) (m + 28) + xorl 28(%esi),%ecx + # *(uint32 *) (out + 24) = in6 + movl %eax,24(%edi) + # *(uint32 *) (out + 28) = in7 + movl %ecx,28(%edi) + # in8 = x8 + movl 132(%esp),%eax + # in9 = x9 + movl 136(%esp),%ecx + # in8 += j8 + addl 196(%esp),%eax + # in9 += j9 + addl 200(%esp),%ecx + # in8 ^= *(uint32 *) (m + 32) + xorl 32(%esi),%eax + # in9 ^= *(uint32 *) (m + 36) + xorl 36(%esi),%ecx + # *(uint32 *) (out + 32) = in8 + movl %eax,32(%edi) + # *(uint32 *) (out + 36) = in9 + movl %ecx,36(%edi) + # in10 = x10 + movl 140(%esp),%eax + # in11 = x11 + movl 144(%esp),%ecx + # in10 += j10 + addl 204(%esp),%eax + # in11 += j11 + addl 208(%esp),%ecx + # in10 ^= *(uint32 *) (m + 40) + xorl 40(%esi),%eax + # in11 ^= *(uint32 *) (m + 44) + xorl 44(%esi),%ecx + # *(uint32 *) (out + 40) = in10 + movl %eax,40(%edi) + # *(uint32 *) (out + 44) = in11 + movl %ecx,44(%edi) + # in12 = x12 + movl 148(%esp),%eax + # in13 = x13 + movl 152(%esp),%ecx + # in12 += j12 + addl 212(%esp),%eax + # in13 += j13 + addl 216(%esp),%ecx + # in12 ^= *(uint32 *) (m + 48) + xorl 48(%esi),%eax + # in13 ^= *(uint32 *) (m + 52) + xorl 52(%esi),%ecx + # *(uint32 *) (out + 48) = in12 + movl %eax,48(%edi) + # *(uint32 *) (out + 52) = in13 + movl %ecx,52(%edi) + # in14 = x14 + movl 156(%esp),%eax + # in15 = x15 + movl 160(%esp),%ecx + # in14 += j14 + addl 220(%esp),%eax + # in15 += j15 + addl 224(%esp),%ecx + # in14 ^= *(uint32 *) (m + 56) + xorl 56(%esi),%eax + # in15 ^= *(uint32 *) (m + 60) + xorl 60(%esi),%ecx + # *(uint32 *) (out + 56) = in14 + movl %eax,56(%edi) + # *(uint32 *) (out + 60) = in15 + movl %ecx,60(%edi) + # bytes = bytes_backup + movl 76(%esp),%ebx + # in8 = j8 + movl 196(%esp),%eax + # in9 = j9 + movl 200(%esp),%ecx + # in8 += 1 + add $1,%eax + # in9 += 0 + carry + adc $0,%ecx + # j8 = in8 + movl %eax,196(%esp) + # j9 = in9 + movl %ecx,200(%esp) + # bytes - 64 + cmp $64,%ebx + # goto bytesatleast65 if unsigned> + ja ._bytesatleast65 + # goto bytesatleast64 if unsigned>= + jae ._bytesatleast64 + # m = out + mov %edi,%esi + # out = ctarget + movl 228(%esp),%edi + # i = bytes + mov %ebx,%ecx + # while (i) { *out++ = *m++; --i } + rep movsb +._bytesatleast64: + # x = x_backup + movl 64(%esp),%eax + # in8 = j8 + movl 196(%esp),%ecx + # in9 = j9 + movl 200(%esp),%edx + # *(uint32 *) (x + 32) = in8 + movl %ecx,32(%eax) + # *(uint32 *) (x + 36) = in9 + movl %edx,36(%eax) +._done: + # eax = eax_stack + movl 80(%esp),%eax + # ebx = ebx_stack + movl 84(%esp),%ebx + # esi = esi_stack + movl 88(%esp),%esi + # edi = edi_stack + movl 92(%esp),%edi + # ebp = ebp_stack + movl 96(%esp),%ebp + # leave + add %eax,%esp + ret +._bytesatleast65: + # bytes -= 64 + sub $64,%ebx + # out += 64 + add $64,%edi + # m += 64 + add $64,%esi + # goto bytesatleast1 + jmp ._bytesatleast1 +ENDPROC(salsa20_encrypt_bytes) + +# enter salsa20_keysetup +ENTRY(salsa20_keysetup) + mov %esp,%eax + and $31,%eax + add $256,%eax + sub %eax,%esp + # eax_stack = eax + movl %eax,64(%esp) + # ebx_stack = ebx + movl %ebx,68(%esp) + # esi_stack = esi + movl %esi,72(%esp) + # edi_stack = edi + movl %edi,76(%esp) + # ebp_stack = ebp + movl %ebp,80(%esp) + # k = arg2 + movl 8(%esp,%eax),%ecx + # kbits = arg3 + movl 12(%esp,%eax),%edx + # x = arg1 + movl 4(%esp,%eax),%eax + # in1 = *(uint32 *) (k + 0) + movl 0(%ecx),%ebx + # in2 = *(uint32 *) (k + 4) + movl 4(%ecx),%esi + # in3 = *(uint32 *) (k + 8) + movl 8(%ecx),%edi + # in4 = *(uint32 *) (k + 12) + movl 12(%ecx),%ebp + # *(uint32 *) (x + 4) = in1 + movl %ebx,4(%eax) + # *(uint32 *) (x + 8) = in2 + movl %esi,8(%eax) + # *(uint32 *) (x + 12) = in3 + movl %edi,12(%eax) + # *(uint32 *) (x + 16) = in4 + movl %ebp,16(%eax) + # kbits - 256 + cmp $256,%edx + # goto kbits128 if unsigned< + jb ._kbits128 +._kbits256: + # in11 = *(uint32 *) (k + 16) + movl 16(%ecx),%edx + # in12 = *(uint32 *) (k + 20) + movl 20(%ecx),%ebx + # in13 = *(uint32 *) (k + 24) + movl 24(%ecx),%esi + # in14 = *(uint32 *) (k + 28) + movl 28(%ecx),%ecx + # *(uint32 *) (x + 44) = in11 + movl %edx,44(%eax) + # *(uint32 *) (x + 48) = in12 + movl %ebx,48(%eax) + # *(uint32 *) (x + 52) = in13 + movl %esi,52(%eax) + # *(uint32 *) (x + 56) = in14 + movl %ecx,56(%eax) + # in0 = 1634760805 + mov $1634760805,%ecx + # in5 = 857760878 + mov $857760878,%edx + # in10 = 2036477234 + mov $2036477234,%ebx + # in15 = 1797285236 + mov $1797285236,%esi + # *(uint32 *) (x + 0) = in0 + movl %ecx,0(%eax) + # *(uint32 *) (x + 20) = in5 + movl %edx,20(%eax) + # *(uint32 *) (x + 40) = in10 + movl %ebx,40(%eax) + # *(uint32 *) (x + 60) = in15 + movl %esi,60(%eax) + # goto keysetupdone + jmp ._keysetupdone +._kbits128: + # in11 = *(uint32 *) (k + 0) + movl 0(%ecx),%edx + # in12 = *(uint32 *) (k + 4) + movl 4(%ecx),%ebx + # in13 = *(uint32 *) (k + 8) + movl 8(%ecx),%esi + # in14 = *(uint32 *) (k + 12) + movl 12(%ecx),%ecx + # *(uint32 *) (x + 44) = in11 + movl %edx,44(%eax) + # *(uint32 *) (x + 48) = in12 + movl %ebx,48(%eax) + # *(uint32 *) (x + 52) = in13 + movl %esi,52(%eax) + # *(uint32 *) (x + 56) = in14 + movl %ecx,56(%eax) + # in0 = 1634760805 + mov $1634760805,%ecx + # in5 = 824206446 + mov $824206446,%edx + # in10 = 2036477238 + mov $2036477238,%ebx + # in15 = 1797285236 + mov $1797285236,%esi + # *(uint32 *) (x + 0) = in0 + movl %ecx,0(%eax) + # *(uint32 *) (x + 20) = in5 + movl %edx,20(%eax) + # *(uint32 *) (x + 40) = in10 + movl %ebx,40(%eax) + # *(uint32 *) (x + 60) = in15 + movl %esi,60(%eax) +._keysetupdone: + # eax = eax_stack + movl 64(%esp),%eax + # ebx = ebx_stack + movl 68(%esp),%ebx + # esi = esi_stack + movl 72(%esp),%esi + # edi = edi_stack + movl 76(%esp),%edi + # ebp = ebp_stack + movl 80(%esp),%ebp + # leave + add %eax,%esp + ret +ENDPROC(salsa20_keysetup) + +# enter salsa20_ivsetup +ENTRY(salsa20_ivsetup) + mov %esp,%eax + and $31,%eax + add $256,%eax + sub %eax,%esp + # eax_stack = eax + movl %eax,64(%esp) + # ebx_stack = ebx + movl %ebx,68(%esp) + # esi_stack = esi + movl %esi,72(%esp) + # edi_stack = edi + movl %edi,76(%esp) + # ebp_stack = ebp + movl %ebp,80(%esp) + # iv = arg2 + movl 8(%esp,%eax),%ecx + # x = arg1 + movl 4(%esp,%eax),%eax + # in6 = *(uint32 *) (iv + 0) + movl 0(%ecx),%edx + # in7 = *(uint32 *) (iv + 4) + movl 4(%ecx),%ecx + # in8 = 0 + mov $0,%ebx + # in9 = 0 + mov $0,%esi + # *(uint32 *) (x + 24) = in6 + movl %edx,24(%eax) + # *(uint32 *) (x + 28) = in7 + movl %ecx,28(%eax) + # *(uint32 *) (x + 32) = in8 + movl %ebx,32(%eax) + # *(uint32 *) (x + 36) = in9 + movl %esi,36(%eax) + # eax = eax_stack + movl 64(%esp),%eax + # ebx = ebx_stack + movl 68(%esp),%ebx + # esi = esi_stack + movl 72(%esp),%esi + # edi = edi_stack + movl 76(%esp),%edi + # ebp = ebp_stack + movl 80(%esp),%ebp + # leave + add %eax,%esp + ret +ENDPROC(salsa20_ivsetup) |