summaryrefslogtreecommitdiff
path: root/arch/blackfin/lib
diff options
context:
space:
mode:
authorAndré Fabian Silva Delgado <emulatorman@parabola.nu>2015-08-05 17:04:01 -0300
committerAndré Fabian Silva Delgado <emulatorman@parabola.nu>2015-08-05 17:04:01 -0300
commit57f0f512b273f60d52568b8c6b77e17f5636edc0 (patch)
tree5e910f0e82173f4ef4f51111366a3f1299037a7b /arch/blackfin/lib
Initial import
Diffstat (limited to 'arch/blackfin/lib')
-rw-r--r--arch/blackfin/lib/Makefile11
-rw-r--r--arch/blackfin/lib/ashldi3.c35
-rw-r--r--arch/blackfin/lib/ashrdi3.c36
-rw-r--r--arch/blackfin/lib/divsi3.S199
-rw-r--r--arch/blackfin/lib/gcclib.h24
-rw-r--r--arch/blackfin/lib/ins.S118
-rw-r--r--arch/blackfin/lib/lshrdi3.c35
-rw-r--r--arch/blackfin/lib/memchr.S47
-rw-r--r--arch/blackfin/lib/memcmp.S92
-rw-r--r--arch/blackfin/lib/memcpy.S124
-rw-r--r--arch/blackfin/lib/memmove.S93
-rw-r--r--arch/blackfin/lib/memset.S87
-rw-r--r--arch/blackfin/lib/modsi3.S57
-rw-r--r--arch/blackfin/lib/muldi3.S74
-rw-r--r--arch/blackfin/lib/outs.S68
-rw-r--r--arch/blackfin/lib/smulsi3_highpart.S38
-rw-r--r--arch/blackfin/lib/strcmp.S43
-rw-r--r--arch/blackfin/lib/strcpy.S35
-rw-r--r--arch/blackfin/lib/strncmp.S52
-rw-r--r--arch/blackfin/lib/strncpy.S85
-rw-r--r--arch/blackfin/lib/udivsi3.S277
-rw-r--r--arch/blackfin/lib/umodsi3.S49
-rw-r--r--arch/blackfin/lib/umulsi3_highpart.S31
23 files changed, 1710 insertions, 0 deletions
diff --git a/arch/blackfin/lib/Makefile b/arch/blackfin/lib/Makefile
new file mode 100644
index 000000000..42c47dc9e
--- /dev/null
+++ b/arch/blackfin/lib/Makefile
@@ -0,0 +1,11 @@
+#
+# arch/blackfin/lib/Makefile
+#
+
+lib-y := \
+ ashldi3.o ashrdi3.o lshrdi3.o \
+ muldi3.o divsi3.o udivsi3.o modsi3.o umodsi3.o \
+ memcpy.o memset.o memcmp.o memchr.o memmove.o \
+ strcmp.o strcpy.o strncmp.o strncpy.o \
+ umulsi3_highpart.o smulsi3_highpart.o \
+ ins.o outs.o
diff --git a/arch/blackfin/lib/ashldi3.c b/arch/blackfin/lib/ashldi3.c
new file mode 100644
index 000000000..ab69d8768
--- /dev/null
+++ b/arch/blackfin/lib/ashldi3.c
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2004-2009 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include "gcclib.h"
+
+#ifdef CONFIG_ARITHMETIC_OPS_L1
+DItype __ashldi3(DItype u, word_type b)__attribute__((l1_text));
+#endif
+
+DItype __ashldi3(DItype u, word_type b)
+{
+ DIunion w;
+ word_type bm;
+ DIunion uu;
+
+ if (b == 0)
+ return u;
+
+ uu.ll = u;
+
+ bm = (sizeof(SItype) * BITS_PER_UNIT) - b;
+ if (bm <= 0) {
+ w.s.low = 0;
+ w.s.high = (USItype) uu.s.low << -bm;
+ } else {
+ USItype carries = (USItype) uu.s.low >> bm;
+ w.s.low = (USItype) uu.s.low << b;
+ w.s.high = ((USItype) uu.s.high << b) | carries;
+ }
+
+ return w.ll;
+}
diff --git a/arch/blackfin/lib/ashrdi3.c b/arch/blackfin/lib/ashrdi3.c
new file mode 100644
index 000000000..b5b351e82
--- /dev/null
+++ b/arch/blackfin/lib/ashrdi3.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2004-2009 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include "gcclib.h"
+
+#ifdef CONFIG_ARITHMETIC_OPS_L1
+DItype __ashrdi3(DItype u, word_type b)__attribute__((l1_text));
+#endif
+
+DItype __ashrdi3(DItype u, word_type b)
+{
+ DIunion w;
+ word_type bm;
+ DIunion uu;
+
+ if (b == 0)
+ return u;
+
+ uu.ll = u;
+
+ bm = (sizeof(SItype) * BITS_PER_UNIT) - b;
+ if (bm <= 0) {
+ /* w.s.high = 1..1 or 0..0 */
+ w.s.high = uu.s.high >> (sizeof(SItype) * BITS_PER_UNIT - 1);
+ w.s.low = uu.s.high >> -bm;
+ } else {
+ USItype carries = (USItype) uu.s.high << bm;
+ w.s.high = uu.s.high >> b;
+ w.s.low = ((USItype) uu.s.low >> b) | carries;
+ }
+
+ return w.ll;
+}
diff --git a/arch/blackfin/lib/divsi3.S b/arch/blackfin/lib/divsi3.S
new file mode 100644
index 000000000..ef2cd99ef
--- /dev/null
+++ b/arch/blackfin/lib/divsi3.S
@@ -0,0 +1,199 @@
+/*
+ * Copyright 2004-2009 Analog Devices Inc.
+ *
+ * Licensed under the Clear BSD license or the GPL-2 (or later)
+ *
+ * 16 / 32 bit signed division.
+ * Special cases :
+ * 1) If(numerator == 0)
+ * return 0
+ * 2) If(denominator ==0)
+ * return positive max = 0x7fffffff
+ * 3) If(numerator == denominator)
+ * return 1
+ * 4) If(denominator ==1)
+ * return numerator
+ * 5) If(denominator == -1)
+ * return -numerator
+ *
+ * Operand : R0 - Numerator (i)
+ * R1 - Denominator (i)
+ * R0 - Quotient (o)
+ * Registers Used : R2-R7,P0-P2
+ *
+ */
+
+.global ___divsi3;
+.type ___divsi3, STT_FUNC;
+
+#ifdef CONFIG_ARITHMETIC_OPS_L1
+.section .l1.text
+#else
+.text
+#endif
+
+.align 2;
+___divsi3 :
+
+
+ R3 = R0 ^ R1;
+ R0 = ABS R0;
+
+ CC = V;
+
+ r3 = rot r3 by -1;
+ r1 = abs r1; /* now both positive, r3.30 means "negate result",
+ ** r3.31 means overflow, add one to result
+ */
+ cc = r0 < r1;
+ if cc jump .Lret_zero;
+ r2 = r1 >> 15;
+ cc = r2;
+ if cc jump .Lidents;
+ r2 = r1 << 16;
+ cc = r2 <= r0;
+ if cc jump .Lidents;
+
+ DIVS(R0, R1);
+ DIVQ(R0, R1);
+ DIVQ(R0, R1);
+ DIVQ(R0, R1);
+ DIVQ(R0, R1);
+ DIVQ(R0, R1);
+ DIVQ(R0, R1);
+ DIVQ(R0, R1);
+ DIVQ(R0, R1);
+ DIVQ(R0, R1);
+ DIVQ(R0, R1);
+ DIVQ(R0, R1);
+ DIVQ(R0, R1);
+ DIVQ(R0, R1);
+ DIVQ(R0, R1);
+ DIVQ(R0, R1);
+ DIVQ(R0, R1);
+
+ R0 = R0.L (Z);
+ r1 = r3 >> 31; /* add overflow issue back in */
+ r0 = r0 + r1;
+ r1 = -r0;
+ cc = bittst(r3, 30);
+ if cc r0 = r1;
+ RTS;
+
+/* Can't use the primitives. Test common identities.
+** If the identity is true, return the value in R2.
+*/
+
+.Lidents:
+ CC = R1 == 0; /* check for divide by zero */
+ IF CC JUMP .Lident_return;
+
+ CC = R0 == 0; /* check for division of zero */
+ IF CC JUMP .Lzero_return;
+
+ CC = R0 == R1; /* check for identical operands */
+ IF CC JUMP .Lident_return;
+
+ CC = R1 == 1; /* check for divide by 1 */
+ IF CC JUMP .Lident_return;
+
+ R2.L = ONES R1;
+ R2 = R2.L (Z);
+ CC = R2 == 1;
+ IF CC JUMP .Lpower_of_two;
+
+ /* Identities haven't helped either.
+ ** Perform the full division process.
+ */
+
+ P1 = 31; /* Set loop counter */
+
+ [--SP] = (R7:5); /* Push registers R5-R7 */
+ R2 = -R1;
+ [--SP] = R2;
+ R2 = R0 << 1; /* R2 lsw of dividend */
+ R6 = R0 ^ R1; /* Get sign */
+ R5 = R6 >> 31; /* Shift sign to LSB */
+
+ R0 = 0 ; /* Clear msw partial remainder */
+ R2 = R2 | R5; /* Shift quotient bit */
+ R6 = R0 ^ R1; /* Get new quotient bit */
+
+ LSETUP(.Llst,.Llend) LC0 = P1; /* Setup loop */
+.Llst: R7 = R2 >> 31; /* record copy of carry from R2 */
+ R2 = R2 << 1; /* Shift 64 bit dividend up by 1 bit */
+ R0 = R0 << 1 || R5 = [SP];
+ R0 = R0 | R7; /* and add carry */
+ CC = R6 < 0; /* Check quotient(AQ) */
+ /* we might be subtracting divisor (AQ==0) */
+ IF CC R5 = R1; /* or we might be adding divisor (AQ==1)*/
+ R0 = R0 + R5; /* do add or subtract, as indicated by AQ */
+ R6 = R0 ^ R1; /* Generate next quotient bit */
+ R5 = R6 >> 31;
+ /* Assume AQ==1, shift in zero */
+ BITTGL(R5,0); /* tweak AQ to be what we want to shift in */
+.Llend: R2 = R2 + R5; /* and then set shifted-in value to
+ ** tweaked AQ.
+ */
+ r1 = r3 >> 31;
+ r2 = r2 + r1;
+ cc = bittst(r3,30);
+ r0 = -r2;
+ if !cc r0 = r2;
+ SP += 4;
+ (R7:5)= [SP++]; /* Pop registers R6-R7 */
+ RTS;
+
+.Lident_return:
+ CC = R1 == 0; /* check for divide by zero => 0x7fffffff */
+ R2 = -1 (X);
+ R2 >>= 1;
+ IF CC JUMP .Ltrue_ident_return;
+
+ CC = R0 == R1; /* check for identical operands => 1 */
+ R2 = 1 (Z);
+ IF CC JUMP .Ltrue_ident_return;
+
+ R2 = R0; /* assume divide by 1 => numerator */
+ /*FALLTHRU*/
+
+.Ltrue_ident_return:
+ R0 = R2; /* Return an identity value */
+ R2 = -R2;
+ CC = bittst(R3,30);
+ IF CC R0 = R2;
+.Lzero_return:
+ RTS; /* ...including zero */
+
+.Lpower_of_two:
+ /* Y has a single bit set, which means it's a power of two.
+ ** That means we can perform the division just by shifting
+ ** X to the right the appropriate number of bits
+ */
+
+ /* signbits returns the number of sign bits, minus one.
+ ** 1=>30, 2=>29, ..., 0x40000000=>0. Which means we need
+ ** to shift right n-signbits spaces. It also means 0x80000000
+ ** is a special case, because that *also* gives a signbits of 0
+ */
+
+ R2 = R0 >> 31;
+ CC = R1 < 0;
+ IF CC JUMP .Ltrue_ident_return;
+
+ R1.l = SIGNBITS R1;
+ R1 = R1.L (Z);
+ R1 += -30;
+ R0 = LSHIFT R0 by R1.L;
+ r1 = r3 >> 31;
+ r0 = r0 + r1;
+ R2 = -R0; // negate result if necessary
+ CC = bittst(R3,30);
+ IF CC R0 = R2;
+ RTS;
+
+.Lret_zero:
+ R0 = 0;
+ RTS;
+
+.size ___divsi3, .-___divsi3
diff --git a/arch/blackfin/lib/gcclib.h b/arch/blackfin/lib/gcclib.h
new file mode 100644
index 000000000..724f07f14
--- /dev/null
+++ b/arch/blackfin/lib/gcclib.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright 2004-2009 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#define BITS_PER_UNIT 8
+#define SI_TYPE_SIZE (sizeof (SItype) * BITS_PER_UNIT)
+
+typedef unsigned int UQItype __attribute__ ((mode(QI)));
+typedef int SItype __attribute__ ((mode(SI)));
+typedef unsigned int USItype __attribute__ ((mode(SI)));
+typedef int DItype __attribute__ ((mode(DI)));
+typedef int word_type __attribute__ ((mode(__word__)));
+typedef unsigned int UDItype __attribute__ ((mode(DI)));
+
+struct DIstruct {
+ SItype low, high;
+};
+
+typedef union {
+ struct DIstruct s;
+ DItype ll;
+} DIunion;
diff --git a/arch/blackfin/lib/ins.S b/arch/blackfin/lib/ins.S
new file mode 100644
index 000000000..d59608dec
--- /dev/null
+++ b/arch/blackfin/lib/ins.S
@@ -0,0 +1,118 @@
+/*
+ * arch/blackfin/lib/ins.S - ins{bwl} using hardware loops
+ *
+ * Copyright 2004-2008 Analog Devices Inc.
+ * Copyright (C) 2005 Bas Vermeulen, BuyWays BV <bas@buyways.nl>
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/linkage.h>
+#include <asm/blackfin.h>
+
+.align 2
+
+#ifdef CONFIG_IPIPE
+# define DO_CLI \
+ [--sp] = rets; \
+ [--sp] = (P5:0); \
+ sp += -12; \
+ call ___ipipe_disable_root_irqs_hw; \
+ sp += 12; \
+ (P5:0) = [sp++];
+# define CLI_INNER_NOP
+#else
+# define DO_CLI cli R3;
+# define CLI_INNER_NOP nop; nop; nop;
+#endif
+
+#ifdef CONFIG_IPIPE
+# define DO_STI \
+ sp += -12; \
+ call ___ipipe_enable_root_irqs_hw; \
+ sp += 12; \
+2: rets = [sp++];
+#else
+# define DO_STI 2: sti R3;
+#endif
+
+#ifdef CONFIG_BFIN_INS_LOWOVERHEAD
+# define CLI_OUTER DO_CLI;
+# define STI_OUTER DO_STI;
+# define CLI_INNER 1:
+# if ANOMALY_05000416
+# define STI_INNER nop; 2: nop;
+# else
+# define STI_INNER 2:
+# endif
+#else
+# define CLI_OUTER
+# define STI_OUTER
+# define CLI_INNER 1: DO_CLI; CLI_INNER_NOP;
+# define STI_INNER DO_STI;
+#endif
+
+/*
+ * Reads on the Blackfin are speculative. In Blackfin terms, this means they
+ * can be interrupted at any time (even after they have been issued on to the
+ * external bus), and re-issued after the interrupt occurs.
+ *
+ * If a FIFO is sitting on the end of the read, it will see two reads,
+ * when the core only sees one. The FIFO receives the read which is cancelled,
+ * and not delivered to the core.
+ *
+ * To solve this, interrupts are turned off before reads occur to I/O space.
+ * There are 3 versions of all these functions
+ * - turns interrupts off every read (higher overhead, but lower latency)
+ * - turns interrupts off every loop (low overhead, but longer latency)
+ * - DMA version, which do not suffer from this issue. DMA versions have
+ * different name (prefixed by dma_ ), and are located in
+ * ../kernel/bfin_dma.c
+ * Using the dma related functions are recommended for transferring large
+ * buffers in/out of FIFOs.
+ */
+
+#define COMMON_INS(func, ops) \
+ENTRY(_ins##func) \
+ P0 = R0; /* P0 = port */ \
+ CLI_OUTER; /* 3 instructions before first read access */ \
+ P1 = R1; /* P1 = address */ \
+ P2 = R2; /* P2 = count */ \
+ SSYNC; \
+ \
+ LSETUP(1f, 2f) LC0 = P2; \
+ CLI_INNER; \
+ ops; \
+ STI_INNER; \
+ \
+ STI_OUTER; \
+ RTS; \
+ENDPROC(_ins##func)
+
+COMMON_INS(l, \
+ R0 = [P0]; \
+ [P1++] = R0; \
+)
+
+COMMON_INS(w, \
+ R0 = W[P0]; \
+ W[P1++] = R0; \
+)
+
+COMMON_INS(w_8, \
+ R0 = W[P0]; \
+ B[P1++] = R0; \
+ R0 = R0 >> 8; \
+ B[P1++] = R0; \
+)
+
+COMMON_INS(b, \
+ R0 = B[P0]; \
+ B[P1++] = R0; \
+)
+
+COMMON_INS(l_16, \
+ R0 = [P0]; \
+ W[P1++] = R0; \
+ R0 = R0 >> 16; \
+ W[P1++] = R0; \
+)
diff --git a/arch/blackfin/lib/lshrdi3.c b/arch/blackfin/lib/lshrdi3.c
new file mode 100644
index 000000000..53f174104
--- /dev/null
+++ b/arch/blackfin/lib/lshrdi3.c
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2004-2009 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include "gcclib.h"
+
+#ifdef CONFIG_ARITHMETIC_OPS_L1
+DItype __lshrdi3(DItype u, word_type b)__attribute__((l1_text));
+#endif
+
+DItype __lshrdi3(DItype u, word_type b)
+{
+ DIunion w;
+ word_type bm;
+ DIunion uu;
+
+ if (b == 0)
+ return u;
+
+ uu.ll = u;
+
+ bm = (sizeof(SItype) * BITS_PER_UNIT) - b;
+ if (bm <= 0) {
+ w.s.high = 0;
+ w.s.low = (USItype) uu.s.high >> -bm;
+ } else {
+ USItype carries = (USItype) uu.s.high << bm;
+ w.s.high = (USItype) uu.s.high >> b;
+ w.s.low = ((USItype) uu.s.low >> b) | carries;
+ }
+
+ return w.ll;
+}
diff --git a/arch/blackfin/lib/memchr.S b/arch/blackfin/lib/memchr.S
new file mode 100644
index 000000000..bcfc8a14c
--- /dev/null
+++ b/arch/blackfin/lib/memchr.S
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2005-2009 Analog Devices Inc.
+ *
+ * Licensed under the Clear BSD license or the GPL-2 (or later)
+ */
+
+#include <linux/linkage.h>
+
+/* void *memchr(const void *s, int c, size_t n);
+ * R0 = address (s)
+ * R1 = sought byte (c)
+ * R2 = count (n)
+ *
+ * Returns pointer to located character.
+ */
+
+.text
+
+.align 2
+
+ENTRY(_memchr)
+ P0 = R0; /* P0 = address */
+ P2 = R2; /* P2 = count */
+ R1 = R1.B(Z);
+ CC = R2 == 0;
+ IF CC JUMP .Lfailed;
+
+.Lbytes:
+ LSETUP (.Lbyte_loop_s, .Lbyte_loop_e) LC0=P2;
+
+.Lbyte_loop_s:
+ R3 = B[P0++](Z);
+ CC = R3 == R1;
+ IF CC JUMP .Lfound;
+.Lbyte_loop_e:
+ NOP;
+
+.Lfailed:
+ R0=0;
+ RTS;
+
+.Lfound:
+ R0 = P0;
+ R0 += -1;
+ RTS;
+
+ENDPROC(_memchr)
diff --git a/arch/blackfin/lib/memcmp.S b/arch/blackfin/lib/memcmp.S
new file mode 100644
index 000000000..2e1c9477f
--- /dev/null
+++ b/arch/blackfin/lib/memcmp.S
@@ -0,0 +1,92 @@
+/*
+ * Copyright 2004-2009 Analog Devices Inc.
+ *
+ * Licensed under the Clear BSD license or the GPL-2 (or later)
+ */
+
+#include <linux/linkage.h>
+
+/* int memcmp(const void *s1, const void *s2, size_t n);
+ * R0 = First Address (s1)
+ * R1 = Second Address (s2)
+ * R2 = count (n)
+ *
+ * Favours word aligned data.
+ */
+
+.text
+
+.align 2
+
+ENTRY(_memcmp)
+ I1 = P3;
+ P0 = R0; /* P0 = s1 address */
+ P3 = R1; /* P3 = s2 Address */
+ P2 = R2 ; /* P2 = count */
+ CC = R2 <= 7(IU);
+ IF CC JUMP .Ltoo_small;
+ I0 = R1; /* s2 */
+ R1 = R1 | R0; /* OR addresses together */
+ R1 <<= 30; /* check bottom two bits */
+ CC = AZ; /* AZ set if zero. */
+ IF !CC JUMP .Lbytes ; /* Jump if addrs not aligned. */
+
+ P1 = P2 >> 2; /* count = n/4 */
+ R3 = 3;
+ R2 = R2 & R3; /* remainder */
+ P2 = R2; /* set remainder */
+
+ LSETUP (.Lquad_loop_s, .Lquad_loop_e) LC0=P1;
+.Lquad_loop_s:
+#if ANOMALY_05000202
+ R0 = [P0++];
+ R1 = [I0++];
+#else
+ MNOP || R0 = [P0++] || R1 = [I0++];
+#endif
+ CC = R0 == R1;
+ IF !CC JUMP .Lquad_different;
+.Lquad_loop_e:
+ NOP;
+
+ P3 = I0; /* s2 */
+.Ltoo_small:
+ CC = P2 == 0; /* Check zero count*/
+ IF CC JUMP .Lfinished; /* very unlikely*/
+
+.Lbytes:
+ LSETUP (.Lbyte_loop_s, .Lbyte_loop_e) LC0=P2;
+.Lbyte_loop_s:
+ R1 = B[P3++](Z); /* *s2 */
+ R0 = B[P0++](Z); /* *s1 */
+ CC = R0 == R1;
+ IF !CC JUMP .Ldifferent;
+.Lbyte_loop_e:
+ NOP;
+
+.Ldifferent:
+ R0 = R0 - R1;
+ P3 = I1;
+ RTS;
+
+.Lquad_different:
+ /* We've read two quads which don't match.
+ * Can't just compare them, because we're
+ * a little-endian machine, so the MSBs of
+ * the regs occur at later addresses in the
+ * string.
+ * Arrange to re-read those two quads again,
+ * byte-by-byte.
+ */
+ P0 += -4; /* back up to the start of the */
+ P3 = I0; /* quads, and increase the*/
+ P2 += 4; /* remainder count*/
+ P3 += -4;
+ JUMP .Lbytes;
+
+.Lfinished:
+ R0 = 0;
+ P3 = I1;
+ RTS;
+
+ENDPROC(_memcmp)
diff --git a/arch/blackfin/lib/memcpy.S b/arch/blackfin/lib/memcpy.S
new file mode 100644
index 000000000..53cb3698a
--- /dev/null
+++ b/arch/blackfin/lib/memcpy.S
@@ -0,0 +1,124 @@
+/*
+ * internal version of memcpy(), issued by the compiler to copy blocks of
+ * data around. This is really memmove() - it has to be able to deal with
+ * possible overlaps, because that ambiguity is when the compiler gives up
+ * and calls a function. We have our own, internal version so that we get
+ * something we trust, even if the user has redefined the normal symbol.
+ *
+ * Copyright 2004-2009 Analog Devices Inc.
+ *
+ * Licensed under the Clear BSD license or the GPL-2 (or later)
+ */
+
+#include <linux/linkage.h>
+
+/* void *memcpy(void *dest, const void *src, size_t n);
+ * R0 = To Address (dest) (leave unchanged to form result)
+ * R1 = From Address (src)
+ * R2 = count
+ *
+ * Note: Favours word alignment
+ */
+
+#ifdef CONFIG_MEMCPY_L1
+.section .l1.text
+#else
+.text
+#endif
+
+.align 2
+
+ENTRY(_memcpy)
+ CC = R2 <= 0; /* length not positive? */
+ IF CC JUMP .L_P1L2147483647; /* Nothing to do */
+
+ P0 = R0 ; /* dst*/
+ P1 = R1 ; /* src*/
+ P2 = R2 ; /* length */
+
+ /* check for overlapping data */
+ CC = R1 < R0; /* src < dst */
+ IF !CC JUMP .Lno_overlap;
+ R3 = R1 + R2;
+ CC = R0 < R3; /* and dst < src+len */
+ IF CC JUMP .Lhas_overlap;
+
+.Lno_overlap:
+ /* Check for aligned data.*/
+
+ R3 = R1 | R0;
+ R1 = 0x3;
+ R3 = R3 & R1;
+ CC = R3; /* low bits set on either address? */
+ IF CC JUMP .Lnot_aligned;
+
+ /* Both addresses are word-aligned, so we can copy
+ at least part of the data using word copies.*/
+ P2 = P2 >> 2;
+ CC = P2 <= 2;
+ IF !CC JUMP .Lmore_than_seven;
+ /* less than eight bytes... */
+ P2 = R2;
+ LSETUP(.Lthree_start, .Lthree_end) LC0=P2;
+.Lthree_start:
+ R3 = B[P1++] (X);
+.Lthree_end:
+ B[P0++] = R3;
+
+ RTS;
+
+.Lmore_than_seven:
+ /* There's at least eight bytes to copy. */
+ P2 += -1; /* because we unroll one iteration */
+ LSETUP(.Lword_loops, .Lword_loope) LC0=P2;
+ I1 = P1;
+ R3 = [I1++];
+#if ANOMALY_05000202
+.Lword_loops:
+ [P0++] = R3;
+.Lword_loope:
+ R3 = [I1++];
+#else
+.Lword_loops:
+.Lword_loope:
+ MNOP || [P0++] = R3 || R3 = [I1++];
+#endif
+ [P0++] = R3;
+ /* Any remaining bytes to copy? */
+ R3 = 0x3;
+ R3 = R2 & R3;
+ CC = R3 == 0;
+ P1 = I1; /* in case there's something left, */
+ IF !CC JUMP .Lbytes_left;
+ RTS;
+.Lbytes_left: P2 = R3;
+.Lnot_aligned:
+ /* From here, we're copying byte-by-byte. */
+ LSETUP (.Lbyte_start, .Lbyte_end) LC0=P2;
+.Lbyte_start:
+ R1 = B[P1++] (X);
+.Lbyte_end:
+ B[P0++] = R1;
+
+.L_P1L2147483647:
+ RTS;
+
+.Lhas_overlap:
+ /* Need to reverse the copying, because the
+ * dst would clobber the src.
+ * Don't bother to work out alignment for
+ * the reverse case.
+ */
+ P0 = P0 + P2;
+ P0 += -1;
+ P1 = P1 + P2;
+ P1 += -1;
+ LSETUP(.Lover_start, .Lover_end) LC0=P2;
+.Lover_start:
+ R1 = B[P1--] (X);
+.Lover_end:
+ B[P0--] = R1;
+
+ RTS;
+
+ENDPROC(_memcpy)
diff --git a/arch/blackfin/lib/memmove.S b/arch/blackfin/lib/memmove.S
new file mode 100644
index 000000000..e0b78208f
--- /dev/null
+++ b/arch/blackfin/lib/memmove.S
@@ -0,0 +1,93 @@
+/*
+ * Copyright 2005-2009 Analog Devices Inc.
+ *
+ * Licensed under the Clear BSD license or the GPL-2 (or later)
+ */
+
+#include <linux/linkage.h>
+
+.align 2
+
+/*
+ * C Library function MEMMOVE
+ * R0 = To Address (leave unchanged to form result)
+ * R1 = From Address
+ * R2 = count
+ * Data may overlap
+ */
+
+ENTRY(_memmove)
+ I1 = P3;
+ P0 = R0; /* P0 = To address */
+ P3 = R1; /* P3 = From Address */
+ P2 = R2; /* P2 = count */
+ CC = P2 == 0; /* Check zero count*/
+ IF CC JUMP .Lfinished; /* very unlikely */
+
+ CC = R1 < R0 (IU); /* From < To */
+ IF !CC JUMP .Lno_overlap;
+ R3 = R1 + R2;
+ CC = R0 <= R3 (IU); /* (From+len) >= To */
+ IF CC JUMP .Loverlap;
+.Lno_overlap:
+ R3 = 11;
+ CC = R2 <= R3;
+ IF CC JUMP .Lbytes;
+ R3 = R1 | R0; /* OR addresses together */
+ R3 <<= 30; /* check bottom two bits */
+ CC = AZ; /* AZ set if zero.*/
+ IF !CC JUMP .Lbytes; /* Jump if addrs not aligned.*/
+
+ I0 = P3;
+ P1 = P2 >> 2; /* count = n/4 */
+ P1 += -1;
+ R3 = 3;
+ R2 = R2 & R3; /* remainder */
+ P2 = R2; /* set remainder */
+ R1 = [I0++];
+
+ LSETUP (.Lquad_loops, .Lquad_loope) LC0=P1;
+#if ANOMALY_05000202
+.Lquad_loops:
+ [P0++] = R1;
+.Lquad_loope:
+ R1 = [I0++];
+#else
+.Lquad_loops:
+.Lquad_loope:
+ MNOP || [P0++] = R1 || R1 = [I0++];
+#endif
+ [P0++] = R1;
+
+ CC = P2 == 0; /* any remaining bytes? */
+ P3 = I0; /* Amend P3 to updated ptr. */
+ IF !CC JUMP .Lbytes;
+ P3 = I1;
+ RTS;
+
+.Lbytes: LSETUP (.Lbyte2_s, .Lbyte2_e) LC0=P2;
+.Lbyte2_s: R1 = B[P3++](Z);
+.Lbyte2_e: B[P0++] = R1;
+
+.Lfinished: P3 = I1;
+ RTS;
+
+.Loverlap:
+ P2 += -1;
+ P0 = P0 + P2;
+ P3 = P3 + P2;
+ R1 = B[P3--] (Z);
+ CC = P2 == 0;
+ IF CC JUMP .Lno_loop;
+#if ANOMALY_05000245
+ NOP;
+ NOP;
+#endif
+ LSETUP (.Lol_s, .Lol_e) LC0 = P2;
+.Lol_s: B[P0--] = R1;
+.Lol_e: R1 = B[P3--] (Z);
+.Lno_loop: B[P0] = R1;
+ P3 = I1;
+ RTS;
+
+ENDPROC(_memmove)
diff --git a/arch/blackfin/lib/memset.S b/arch/blackfin/lib/memset.S
new file mode 100644
index 000000000..cdcf9148e
--- /dev/null
+++ b/arch/blackfin/lib/memset.S
@@ -0,0 +1,87 @@
+/*
+ * Copyright 2004-2009 Analog Devices Inc.
+ *
+ * Licensed under the Clear BSD license or the GPL-2 (or later)
+ */
+
+#include <linux/linkage.h>
+
+.align 2
+
+#ifdef CONFIG_MEMSET_L1
+.section .l1.text
+#else
+.text
+#endif
+
+/*
+ * C Library function MEMSET
+ * R0 = address (leave unchanged to form result)
+ * R1 = filler byte
+ * R2 = count
+ * Favours word aligned data.
+ * The strncpy assumes that I0 and I1 are not used in this function
+ */
+
+ENTRY(_memset)
+ P0 = R0 ; /* P0 = address */
+ P2 = R2 ; /* P2 = count */
+ R3 = R0 + R2; /* end */
+ CC = R2 <= 7(IU);
+ IF CC JUMP .Ltoo_small;
+ R1 = R1.B (Z); /* R1 = fill char */
+ R2 = 3;
+ R2 = R0 & R2; /* addr bottom two bits */
+ CC = R2 == 0; /* AZ set if zero. */
+ IF !CC JUMP .Lforce_align ; /* Jump if addr not aligned. */
+
+.Laligned:
+ P1 = P2 >> 2; /* count = n/4 */
+ R2 = R1 << 8; /* create quad filler */
+ R2.L = R2.L + R1.L(NS);
+ R2.H = R2.L + R1.H(NS);
+ P2 = R3;
+
+ LSETUP (.Lquad_loop , .Lquad_loop) LC0=P1;
+.Lquad_loop:
+ [P0++] = R2;
+
+ CC = P0 == P2;
+ IF !CC JUMP .Lbytes_left;
+ RTS;
+
+.Lbytes_left:
+ R2 = R3; /* end point */
+ R3 = P0; /* current position */
+ R2 = R2 - R3; /* bytes left */
+ P2 = R2;
+
+.Ltoo_small:
+ CC = P2 == 0; /* Check zero count */
+ IF CC JUMP .Lfinished; /* Unusual */
+
+.Lbytes:
+ LSETUP (.Lbyte_loop , .Lbyte_loop) LC0=P2;
+.Lbyte_loop:
+ B[P0++] = R1;
+
+.Lfinished:
+ RTS;
+
+.Lforce_align:
+ CC = BITTST (R0, 0); /* odd byte */
+ R0 = 4;
+ R0 = R0 - R2;
+ P1 = R0;
+ R0 = P0; /* Recover return address */
+ IF !CC JUMP .Lskip1;
+ B[P0++] = R1;
+.Lskip1:
+ CC = R2 <= 2; /* 2 bytes */
+ P2 -= P1; /* reduce count */
+ IF !CC JUMP .Laligned;
+ B[P0++] = R1;
+ B[P0++] = R1;
+ JUMP .Laligned;
+
+ENDPROC(_memset)
diff --git a/arch/blackfin/lib/modsi3.S b/arch/blackfin/lib/modsi3.S
new file mode 100644
index 000000000..f7026ce1f
--- /dev/null
+++ b/arch/blackfin/lib/modsi3.S
@@ -0,0 +1,57 @@
+/*
+ * This program computes 32 bit signed remainder. It calls div32 function
+ * for quotient estimation.
+ * Registers in: R0, R1 = Numerator/ Denominator
+ * Registers out: R0 = Remainder
+ *
+ * Copyright 2004-2009 Analog Devices Inc.
+ *
+ * Licensed under the Clear BSD license or the GPL-2 (or later)
+ */
+
+.global ___modsi3;
+.type ___modsi3, STT_FUNC;
+.extern ___divsi3;
+.type ___divsi3, STT_FUNC;
+
+#ifdef CONFIG_ARITHMETIC_OPS_L1
+.section .l1.text
+#else
+.text
+#endif
+
+___modsi3:
+
+ CC=R0==0;
+ IF CC JUMP .LRETURN_R0; /* Return 0, if numerator == 0 */
+ CC=R1==0;
+ IF CC JUMP .LRETURN_ZERO; /* Return 0, if denominator == 0 */
+ CC=R0==R1;
+ IF CC JUMP .LRETURN_ZERO; /* Return 0, if numerator == denominator */
+ CC = R1 == 1;
+ IF CC JUMP .LRETURN_ZERO; /* Return 0, if denominator == 1 */
+ CC = R1 == -1;
+ IF CC JUMP .LRETURN_ZERO; /* Return 0, if denominator == -1 */
+
+ /* Valid input. Use __divsi3() to compute the quotient, and then
+ * derive the remainder from that. */
+
+ [--SP] = (R7:6); /* Push R7 and R6 */
+ [--SP] = RETS; /* and return address */
+ R7 = R0; /* Copy of R0 */
+ R6 = R1; /* Save for later */
+ SP += -12; /* Should always provide this space */
+ CALL ___divsi3; /* Compute signed quotient using ___divsi3()*/
+ SP += 12;
+ R0 *= R6; /* Quotient * divisor */
+ R0 = R7 - R0; /* Dividend - (quotient * divisor) */
+ RETS = [SP++]; /* Get back return address */
+ (R7:6) = [SP++]; /* Pop registers R7 and R4 */
+ RTS; /* Store remainder */
+
+.LRETURN_ZERO:
+ R0 = 0;
+.LRETURN_R0:
+ RTS;
+
+.size ___modsi3, .-___modsi3
diff --git a/arch/blackfin/lib/muldi3.S b/arch/blackfin/lib/muldi3.S
new file mode 100644
index 000000000..abf9b2a51
--- /dev/null
+++ b/arch/blackfin/lib/muldi3.S
@@ -0,0 +1,74 @@
+/*
+ * Copyright 2008 Analog Devices Inc.
+ *
+ * Licensed under the Clear BSD license or the GPL-2 (or later)
+ */
+
+.align 2
+.global ___muldi3;
+.type ___muldi3, STT_FUNC;
+
+#ifdef CONFIG_ARITHMETIC_OPS_L1
+.section .l1.text
+#else
+.text
+#endif
+
+/*
+ R1:R0 * R3:R2
+ = R1.h:R1.l:R0.h:R0.l * R3.h:R3.l:R2.h:R2.l
+[X] = (R1.h * R3.h) * 2^96
+[X] + (R1.h * R3.l + R1.l * R3.h) * 2^80
+[X] + (R1.h * R2.h + R1.l * R3.l + R3.h * R0.h) * 2^64
+[T1] + (R1.h * R2.l + R3.h * R0.l + R1.l * R2.h + R3.l * R0.h) * 2^48
+[T2] + (R1.l * R2.l + R3.l * R0.l + R0.h * R2.h) * 2^32
+[T3] + (R0.l * R2.h + R2.l * R0.h) * 2^16
+[T4] + (R0.l * R2.l)
+
+ We can discard the first three lines marked "X" since we produce
+ only a 64 bit result. So, we need ten 16-bit multiplies.
+
+ Individual mul-acc results:
+[E1] = R1.h * R2.l + R3.h * R0.l + R1.l * R2.h + R3.l * R0.h
+[E2] = R1.l * R2.l + R3.l * R0.l + R0.h * R2.h
+[E3] = R0.l * R2.h + R2.l * R0.h
+[E4] = R0.l * R2.l
+
+ We also need to add high parts from lower-level results to higher ones:
+ E[n]c = E[n] + (E[n+1]c >> 16), where E4c := E4
+
+ One interesting property is that all parts of the result that depend
+ on the sign of the multiplication are discarded. Those would be the
+ multiplications involving R1.h and R3.h, but only the top 16 bit of
+ the 32 bit result depend on the sign, and since R1.h and R3.h only
+ occur in E1, the top half of these results is cut off.
+ So, we can just use FU mode for all of the 16-bit multiplies, and
+ ignore questions of when to use mixed mode. */
+
+___muldi3:
+ /* [SP] technically is part of the caller's frame, but we can
+ use it as scratch space. */
+ A0 = R2.H * R1.L, A1 = R2.L * R1.H (FU) || R3 = [SP + 12]; /* E1 */
+ A0 += R3.H * R0.L, A1 += R3.L * R0.H (FU) || [SP] = R4; /* E1 */
+ A0 += A1; /* E1 */
+ R4 = A0.w;
+ A0 = R0.l * R3.l (FU); /* E2 */
+ A0 += R2.l * R1.l (FU); /* E2 */
+
+ A1 = R2.L * R0.L (FU); /* E4 */
+ R3 = A1.w;
+ A1 = A1 >> 16; /* E3c */
+ A0 += R2.H * R0.H, A1 += R2.L * R0.H (FU); /* E2, E3c */
+ A1 += R0.L * R2.H (FU); /* E3c */
+ R0 = A1.w;
+ A1 = A1 >> 16; /* E2c */
+ A0 += A1; /* E2c */
+ R1 = A0.w;
+
+ /* low(result) = low(E3c):low(E4) */
+ R0 = PACK (R0.l, R3.l);
+ /* high(result) = E2c + (E1 << 16) */
+ R1.h = R1.h + R4.l (NS) || R4 = [SP];
+ RTS;
+
+.size ___muldi3, .-___muldi3
diff --git a/arch/blackfin/lib/outs.S b/arch/blackfin/lib/outs.S
new file mode 100644
index 000000000..06a5e6744
--- /dev/null
+++ b/arch/blackfin/lib/outs.S
@@ -0,0 +1,68 @@
+/*
+ * Implementation of outs{bwl} for BlackFin processors using zero overhead loops.
+ *
+ * Copyright 2005-2009 Analog Devices Inc.
+ * 2005 BuyWays BV
+ * Bas Vermeulen <bas@buyways.nl>
+ *
+ * Licensed under the GPL-2.
+ */
+
+#include <linux/linkage.h>
+
+.align 2
+
+ENTRY(_outsl)
+ CC = R2 == 0;
+ IF CC JUMP 1f;
+ P0 = R0; /* P0 = port */
+ P1 = R1; /* P1 = address */
+ P2 = R2; /* P2 = count */
+
+ LSETUP( .Llong_loop_s, .Llong_loop_e) LC0 = P2;
+.Llong_loop_s: R0 = [P1++];
+.Llong_loop_e: [P0] = R0;
+1: RTS;
+ENDPROC(_outsl)
+
+ENTRY(_outsw)
+ CC = R2 == 0;
+ IF CC JUMP 1f;
+ P0 = R0; /* P0 = port */
+ P1 = R1; /* P1 = address */
+ P2 = R2; /* P2 = count */
+
+ LSETUP( .Lword_loop_s, .Lword_loop_e) LC0 = P2;
+.Lword_loop_s: R0 = W[P1++];
+.Lword_loop_e: W[P0] = R0;
+1: RTS;
+ENDPROC(_outsw)
+
+ENTRY(_outsb)
+ CC = R2 == 0;
+ IF CC JUMP 1f;
+ P0 = R0; /* P0 = port */
+ P1 = R1; /* P1 = address */
+ P2 = R2; /* P2 = count */
+
+ LSETUP( .Lbyte_loop_s, .Lbyte_loop_e) LC0 = P2;
+.Lbyte_loop_s: R0 = B[P1++];
+.Lbyte_loop_e: B[P0] = R0;
+1: RTS;
+ENDPROC(_outsb)
+
+ENTRY(_outsw_8)
+ CC = R2 == 0;
+ IF CC JUMP 1f;
+ P0 = R0; /* P0 = port */
+ P1 = R1; /* P1 = address */
+ P2 = R2; /* P2 = count */
+
+ LSETUP( .Lword8_loop_s, .Lword8_loop_e) LC0 = P2;
+.Lword8_loop_s: R1 = B[P1++];
+ R0 = B[P1++];
+ R0 = R0 << 8;
+ R0 = R0 + R1;
+.Lword8_loop_e: W[P0] = R0;
+1: RTS;
+ENDPROC(_outsw_8)
diff --git a/arch/blackfin/lib/smulsi3_highpart.S b/arch/blackfin/lib/smulsi3_highpart.S
new file mode 100644
index 000000000..e50d6c4ac
--- /dev/null
+++ b/arch/blackfin/lib/smulsi3_highpart.S
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2007 Analog Devices Inc.
+ *
+ * Licensed under the Clear BSD license or the GPL-2 (or later)
+ */
+
+.align 2
+.global ___smulsi3_highpart;
+.type ___smulsi3_highpart, STT_FUNC;
+
+#ifdef CONFIG_ARITHMETIC_OPS_L1
+.section .l1.text
+#else
+.text
+#endif
+
+___smulsi3_highpart:
+ R2 = R1.L * R0.L (FU);
+ R3 = R1.H * R0.L (IS,M);
+ R0 = R0.H * R1.H, R1 = R0.H * R1.L (IS,M);
+
+ R1.L = R2.H + R1.L;
+ cc = ac0;
+ R2 = cc;
+
+ R1.L = R1.L + R3.L;
+ cc = ac0;
+ R1 >>>= 16;
+ R3 >>>= 16;
+ R1 = R1 + R3;
+ R1 = R1 + R2;
+ R2 = cc;
+ R1 = R1 + R2;
+
+ R0 = R0 + R1;
+ RTS;
+
+.size ___smulsi3_highpart, .-___smulsi3_highpart
diff --git a/arch/blackfin/lib/strcmp.S b/arch/blackfin/lib/strcmp.S
new file mode 100644
index 000000000..9c8b98637
--- /dev/null
+++ b/arch/blackfin/lib/strcmp.S
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2005-2010 Analog Devices Inc.
+ *
+ * Licensed under the Clear BSD license or the GPL-2 (or later)
+ */
+
+#include <linux/linkage.h>
+
+/* void *strcmp(char *s1, const char *s2);
+ * R0 = address (s1)
+ * R1 = address (s2)
+ *
+ * Returns an integer less than, equal to, or greater than zero if s1
+ * (or the first n bytes thereof) is found, respectively, to be less
+ * than, to match, or be greater than s2.
+ */
+
+#ifdef CONFIG_STRCMP_L1
+.section .l1.text
+#else
+.text
+#endif
+
+.align 2
+
+ENTRY(_strcmp)
+ P0 = R0 ; /* s1 */
+ P1 = R1 ; /* s2 */
+
+1:
+ R0 = B[P0++] (Z); /* get *s1 */
+ R1 = B[P1++] (Z); /* get *s2 */
+ CC = R0 == R1; /* compare a byte */
+ if ! cc jump 2f; /* not equal, break out */
+ CC = R0; /* at end of s1? */
+ if cc jump 1b (bp); /* no, keep going */
+ jump.s 3f; /* strings are equal */
+2:
+ R0 = R0 - R1; /* *s1 - *s2 */
+3:
+ RTS;
+
+ENDPROC(_strcmp)
diff --git a/arch/blackfin/lib/strcpy.S b/arch/blackfin/lib/strcpy.S
new file mode 100644
index 000000000..9495aa77c
--- /dev/null
+++ b/arch/blackfin/lib/strcpy.S
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2005-2010 Analog Devices Inc.
+ *
+ * Licensed under the Clear BSD license or the GPL-2 (or later)
+ */
+
+#include <linux/linkage.h>
+
+/* void *strcpy(char *dest, const char *src);
+ * R0 = address (dest)
+ * R1 = address (src)
+ *
+ * Returns a pointer to the destination string dest
+ */
+
+#ifdef CONFIG_STRCPY_L1
+.section .l1.text
+#else
+.text
+#endif
+
+.align 2
+
+ENTRY(_strcpy)
+ P0 = R0 ; /* dst*/
+ P1 = R1 ; /* src*/
+
+1:
+ R1 = B [P1++] (Z);
+ B [P0++] = R1;
+ CC = R1;
+ if cc jump 1b (bp);
+ RTS;
+
+ENDPROC(_strcpy)
diff --git a/arch/blackfin/lib/strncmp.S b/arch/blackfin/lib/strncmp.S
new file mode 100644
index 000000000..3bfaedce8
--- /dev/null
+++ b/arch/blackfin/lib/strncmp.S
@@ -0,0 +1,52 @@
+/*
+ * Copyright 2005-2010 Analog Devices Inc.
+ *
+ * Licensed under the Clear BSD license or the GPL-2 (or later)
+ */
+
+#include <linux/linkage.h>
+
+/* void *strncpy(char *s1, const char *s2, size_t n);
+ * R0 = address (dest)
+ * R1 = address (src)
+ * R2 = size (n)
+ * Returns a pointer to the destination string dest
+ */
+
+#ifdef CONFIG_STRNCMP_L1
+.section .l1.text
+#else
+.text
+#endif
+
+.align 2
+
+ENTRY(_strncmp)
+ CC = R2 == 0;
+ if CC JUMP 5f;
+
+ P0 = R0 ; /* s1 */
+ P1 = R1 ; /* s2 */
+1:
+ R0 = B[P0++] (Z); /* get *s1 */
+ R1 = B[P1++] (Z); /* get *s2 */
+ CC = R0 == R1; /* compare a byte */
+ if ! cc jump 3f; /* not equal, break out */
+ CC = R0; /* at end of s1? */
+ if ! cc jump 4f; /* yes, all done */
+ R2 += -1; /* no, adjust count */
+ CC = R2 == 0;
+ if ! cc jump 1b (bp); /* more to do, keep going */
+2:
+ R0 = 0; /* strings are equal */
+ jump.s 4f;
+3:
+ R0 = R0 - R1; /* *s1 - *s2 */
+4:
+ RTS;
+
+5:
+ R0 = 0;
+ RTS;
+
+ENDPROC(_strncmp)
diff --git a/arch/blackfin/lib/strncpy.S b/arch/blackfin/lib/strncpy.S
new file mode 100644
index 000000000..92fd1823b
--- /dev/null
+++ b/arch/blackfin/lib/strncpy.S
@@ -0,0 +1,85 @@
+/*
+ * Copyright 2005-2010 Analog Devices Inc.
+ *
+ * Licensed under the Clear BSD license or the GPL-2 (or later)
+ */
+
+#include <linux/linkage.h>
+#include <asm/context.S>
+
+/* void *strncpy(char *dest, const char *src, size_t n);
+ * R0 = address (dest)
+ * R1 = address (src)
+ * R2 = size
+ * Returns a pointer (R0) to the destination string dest
+ * we do this by not changing R0
+ */
+
+#ifdef CONFIG_STRNCPY_L1
+.section .l1.text
+#else
+.text
+#endif
+
+.align 2
+
+ENTRY(_strncpy)
+ CC = R2 == 0;
+ if CC JUMP 6f;
+
+ P2 = R2 ; /* size */
+ P0 = R0 ; /* dst*/
+ P1 = R1 ; /* src*/
+
+ LSETUP (1f, 2f) LC0 = P2;
+1:
+ R1 = B [P1++] (Z);
+ B [P0++] = R1;
+ CC = R1 == 0;
+2:
+ if CC jump 3f;
+
+ RTS;
+
+ /* if src is shorter than n, we need to null pad bytes in dest
+ * but, we can get here when the last byte is zero, and we don't
+ * want to copy an extra byte at the end, so we need to check
+ */
+3:
+ R2 = LC0;
+ CC = R2
+ if ! CC jump 6f;
+
+ /* if the required null padded portion is small, do it here, rather than
+ * handling the overhead of memset (which is OK when things are big).
+ */
+ R3 = 0x20;
+ CC = R2 < R3;
+ IF CC jump 4f;
+
+ R2 += -1;
+
+ /* Set things up for memset
+ * R0 = address
+ * R1 = filler byte (this case it's zero, set above)
+ * R2 = count (set above)
+ */
+
+ I1 = R0;
+ R0 = RETS;
+ I0 = R0;
+ R0 = P0;
+ pseudo_long_call _memset, p0;
+ R0 = I0;
+ RETS = R0;
+ R0 = I1;
+ RTS;
+
+4:
+ LSETUP(5f, 5f) LC0;
+5:
+ B [P0++] = R1;
+6:
+ RTS;
+
+ENDPROC(_strncpy)
diff --git a/arch/blackfin/lib/udivsi3.S b/arch/blackfin/lib/udivsi3.S
new file mode 100644
index 000000000..748a6a2e8
--- /dev/null
+++ b/arch/blackfin/lib/udivsi3.S
@@ -0,0 +1,277 @@
+/*
+ * Copyright 2004-2009 Analog Devices Inc.
+ *
+ * Licensed under the Clear BSD license or the GPL-2 (or later)
+ */
+
+#include <linux/linkage.h>
+
+#define CARRY AC0
+
+#ifdef CONFIG_ARITHMETIC_OPS_L1
+.section .l1.text
+#else
+.text
+#endif
+
+
+ENTRY(___udivsi3)
+
+ CC = R0 < R1 (IU); /* If X < Y, always return 0 */
+ IF CC JUMP .Lreturn_ident;
+
+ R2 = R1 << 16;
+ CC = R2 <= R0 (IU);
+ IF CC JUMP .Lidents;
+
+ R2 = R0 >> 31; /* if X is a 31-bit number */
+ R3 = R1 >> 15; /* and Y is a 15-bit number */
+ R2 = R2 | R3; /* then it's okay to use the DIVQ builtins (fallthrough to fast)*/
+ CC = R2;
+ IF CC JUMP .Ly_16bit;
+
+/* METHOD 1: FAST DIVQ
+ We know we have a 31-bit dividend, and 15-bit divisor so we can use the
+ simple divq approach (first setting AQ to 0 - implying unsigned division,
+ then 16 DIVQ's).
+*/
+
+ AQ = CC; /* Clear AQ (CC==0) */
+
+/* ISR States: When dividing two integers (32.0/16.0) using divide primitives,
+ we need to shift the dividend one bit to the left.
+ We have already checked that we have a 31-bit number so we are safe to do
+ that.
+*/
+ R0 <<= 1;
+ DIVQ(R0, R1); // 1
+ DIVQ(R0, R1); // 2
+ DIVQ(R0, R1); // 3
+ DIVQ(R0, R1); // 4
+ DIVQ(R0, R1); // 5
+ DIVQ(R0, R1); // 6
+ DIVQ(R0, R1); // 7
+ DIVQ(R0, R1); // 8
+ DIVQ(R0, R1); // 9
+ DIVQ(R0, R1); // 10
+ DIVQ(R0, R1); // 11
+ DIVQ(R0, R1); // 12
+ DIVQ(R0, R1); // 13
+ DIVQ(R0, R1); // 14
+ DIVQ(R0, R1); // 15
+ DIVQ(R0, R1); // 16
+ R0 = R0.L (Z);
+ RTS;
+
+.Ly_16bit:
+ /* We know that the upper 17 bits of Y might have bits set,
+ ** or that the sign bit of X might have a bit. If Y is a
+ ** 16-bit number, but not bigger, then we can use the builtins
+ ** with a post-divide correction.
+ ** R3 currently holds Y>>15, which means R3's LSB is the
+ ** bit we're interested in.
+ */
+
+ /* According to the ISR, to use the Divide primitives for
+ ** unsigned integer divide, the useable range is 31 bits
+ */
+ CC = ! BITTST(R0, 31);
+
+ /* IF condition is true we can scale our inputs and use the divide primitives,
+ ** with some post-adjustment
+ */
+ R3 += -1; /* if so, Y is 0x00008nnn */
+ CC &= AZ;
+
+ /* If condition is true we can scale our inputs and use the divide primitives,
+ ** with some post-adjustment
+ */
+ R3 = R1 >> 1; /* Pre-scaled divisor for primitive case */
+ R2 = R0 >> 16;
+
+ R2 = R3 - R2; /* shifted divisor < upper 16 bits of dividend */
+ CC &= CARRY;
+ IF CC JUMP .Lshift_and_correct;
+
+ /* Fall through to the identities */
+
+/* METHOD 2: identities and manual calculation
+ We are not able to use the divide primites, but may still catch some special
+ cases.
+*/
+.Lidents:
+ /* Test for common identities. Value to be returned is placed in R2. */
+ CC = R0 == 0; /* 0/Y => 0 */
+ IF CC JUMP .Lreturn_r0;
+ CC = R0 == R1; /* X==Y => 1 */
+ IF CC JUMP .Lreturn_ident;
+ CC = R1 == 1; /* X/1 => X */
+ IF CC JUMP .Lreturn_ident;
+
+ R2.L = ONES R1;
+ R2 = R2.L (Z);
+ CC = R2 == 1;
+ IF CC JUMP .Lpower_of_two;
+
+ [--SP] = (R7:5); /* Push registers R5-R7 */
+
+ /* Idents don't match. Go for the full operation. */
+
+
+ R6 = 2; /* assume we'll shift two */
+ R3 = 1;
+
+ P2 = R1;
+ /* If either R0 or R1 have sign set, */
+ /* divide them by two, and note it's */
+ /* been done. */
+ CC = R1 < 0;
+ R2 = R1 >> 1;
+ IF CC R1 = R2; /* Possibly-shifted R1 */
+ IF !CC R6 = R3; /* R1 doesn't, so at most 1 shifted */
+
+ P0 = 0;
+ R3 = -R1;
+ [--SP] = R3;
+ R2 = R0 >> 1;
+ R2 = R0 >> 1;
+ CC = R0 < 0;
+ IF CC P0 = R6; /* Number of values divided */
+ IF !CC R2 = R0; /* Shifted R0 */
+
+ /* P0 is 0, 1 (NR/=2) or 2 (NR/=2, DR/=2) */
+
+ /* r2 holds Copy dividend */
+ R3 = 0; /* Clear partial remainder */
+ R7 = 0; /* Initialise quotient bit */
+
+ P1 = 32; /* Set loop counter */
+ LSETUP(.Lulst, .Lulend) LC0 = P1; /* Set loop counter */
+.Lulst: R6 = R2 >> 31; /* R6 = sign bit of R2, for carry */
+ R2 = R2 << 1; /* Shift 64 bit dividend up by 1 bit */
+ R3 = R3 << 1 || R5 = [SP];
+ R3 = R3 | R6; /* Include any carry */
+ CC = R7 < 0; /* Check quotient(AQ) */
+ /* If AQ==0, we'll sub divisor */
+ IF CC R5 = R1; /* and if AQ==1, we'll add it. */
+ R3 = R3 + R5; /* Add/sub divsor to partial remainder */
+ R7 = R3 ^ R1; /* Generate next quotient bit */
+
+ R5 = R7 >> 31; /* Get AQ */
+ BITTGL(R5, 0); /* Invert it, to get what we'll shift */
+.Lulend: R2 = R2 + R5; /* and "shift" it in. */
+
+ CC = P0 == 0; /* Check how many inputs we shifted */
+ IF CC JUMP .Lno_mult; /* if none... */
+ R6 = R2 << 1;
+ CC = P0 == 1;
+ IF CC R2 = R6; /* if 1, Q = Q*2 */
+ IF !CC R1 = P2; /* if 2, restore stored divisor */
+
+ R3 = R2; /* Copy of R2 */
+ R3 *= R1; /* Q * divisor */
+ R5 = R0 - R3; /* Z = (dividend - Q * divisor) */
+ CC = R1 <= R5 (IU); /* Check if divisor <= Z? */
+ R6 = CC; /* if yes, R6 = 1 */
+ R2 = R2 + R6; /* if yes, add one to quotient(Q) */
+.Lno_mult:
+ SP += 4;
+ (R7:5) = [SP++]; /* Pop registers R5-R7 */
+ R0 = R2; /* Store quotient */
+ RTS;
+
+.Lreturn_ident:
+ CC = R0 < R1 (IU); /* If X < Y, always return 0 */
+ R2 = 0;
+ IF CC JUMP .Ltrue_return_ident;
+ R2 = -1 (X); /* X/0 => 0xFFFFFFFF */
+ CC = R1 == 0;
+ IF CC JUMP .Ltrue_return_ident;
+ R2 = -R2; /* R2 now 1 */
+ CC = R0 == R1; /* X==Y => 1 */
+ IF CC JUMP .Ltrue_return_ident;
+ R2 = R0; /* X/1 => X */
+ /*FALLTHRU*/
+
+.Ltrue_return_ident:
+ R0 = R2;
+.Lreturn_r0:
+ RTS;
+
+.Lpower_of_two:
+ /* Y has a single bit set, which means it's a power of two.
+ ** That means we can perform the division just by shifting
+ ** X to the right the appropriate number of bits
+ */
+
+ /* signbits returns the number of sign bits, minus one.
+ ** 1=>30, 2=>29, ..., 0x40000000=>0. Which means we need
+ ** to shift right n-signbits spaces. It also means 0x80000000
+ ** is a special case, because that *also* gives a signbits of 0
+ */
+
+ R2 = R0 >> 31;
+ CC = R1 < 0;
+ IF CC JUMP .Ltrue_return_ident;
+
+ R1.l = SIGNBITS R1;
+ R1 = R1.L (Z);
+ R1 += -30;
+ R0 = LSHIFT R0 by R1.L;
+ RTS;
+
+/* METHOD 3: PRESCALE AND USE THE DIVIDE PRIMITIVES WITH SOME POST-CORRECTION
+ Two scaling operations are required to use the divide primitives with a
+ divisor > 0x7FFFF.
+ Firstly (as in method 1) we need to shift the dividend 1 to the left for
+ integer division.
+ Secondly we need to shift both the divisor and dividend 1 to the right so
+ both are in range for the primitives.
+ The left/right shift of the dividend does nothing so we can skip it.
+*/
+.Lshift_and_correct:
+ R2 = R0;
+ // R3 is already R1 >> 1
+ CC=!CC;
+ AQ = CC; /* Clear AQ, got here with CC = 0 */
+ DIVQ(R2, R3); // 1
+ DIVQ(R2, R3); // 2
+ DIVQ(R2, R3); // 3
+ DIVQ(R2, R3); // 4
+ DIVQ(R2, R3); // 5
+ DIVQ(R2, R3); // 6
+ DIVQ(R2, R3); // 7
+ DIVQ(R2, R3); // 8
+ DIVQ(R2, R3); // 9
+ DIVQ(R2, R3); // 10
+ DIVQ(R2, R3); // 11
+ DIVQ(R2, R3); // 12
+ DIVQ(R2, R3); // 13
+ DIVQ(R2, R3); // 14
+ DIVQ(R2, R3); // 15
+ DIVQ(R2, R3); // 16
+
+ /* According to the Instruction Set Reference:
+ To divide by a divisor > 0x7FFF,
+ 1. prescale and perform divide to obtain quotient (Q) (done above),
+ 2. multiply quotient by unscaled divisor (result M)
+ 3. subtract the product from the divident to get an error (E = X - M)
+ 4. if E < divisor (Y) subtract 1, if E > divisor (Y) add 1, else return quotient (Q)
+ */
+ R3 = R2.L (Z); /* Q = X' / Y' */
+ R2 = R3; /* Preserve Q */
+ R2 *= R1; /* M = Q * Y */
+ R2 = R0 - R2; /* E = X - M */
+ R0 = R3; /* Copy Q into result reg */
+
+/* Correction: If result of the multiply is negative, we overflowed
+ and need to correct the result by subtracting 1 from the result.*/
+ R3 = 0xFFFF (Z);
+ R2 = R2 >> 16; /* E >> 16 */
+ CC = R2 == R3;
+ R3 = 1 ;
+ R1 = R0 - R3;
+ IF CC R0 = R1;
+ RTS;
+
+ENDPROC(___udivsi3)
diff --git a/arch/blackfin/lib/umodsi3.S b/arch/blackfin/lib/umodsi3.S
new file mode 100644
index 000000000..3794c00d8
--- /dev/null
+++ b/arch/blackfin/lib/umodsi3.S
@@ -0,0 +1,49 @@
+/*
+ * libgcc1 routines for Blackfin 5xx
+ *
+ * Copyright 2004-2009 Analog Devices Inc.
+ *
+ * Licensed under the Clear BSD license or the GPL-2 (or later)
+ */
+
+#ifdef CONFIG_ARITHMETIC_OPS_L1
+.section .l1.text
+#else
+.text
+#endif
+
+.extern ___udivsi3;
+.type ___udivsi3, STT_FUNC;
+.globl ___umodsi3
+.type ___umodsi3, STT_FUNC;
+___umodsi3:
+
+ CC=R0==0;
+ IF CC JUMP .LRETURN_R0; /* Return 0, if NR == 0 */
+ CC= R1==0;
+ IF CC JUMP .LRETURN_ZERO_VAL; /* Return 0, if DR == 0 */
+ CC=R0==R1;
+ IF CC JUMP .LRETURN_ZERO_VAL; /* Return 0, if NR == DR */
+ CC = R1 == 1;
+ IF CC JUMP .LRETURN_ZERO_VAL; /* Return 0, if DR == 1 */
+ CC = R0<R1 (IU);
+ IF CC JUMP .LRETURN_R0; /* Return dividend (R0),IF NR<DR */
+
+ [--SP] = (R7:6); /* Push registers and */
+ [--SP] = RETS; /* Return address */
+ R7 = R0; /* Copy of R0 */
+ R6 = R1;
+ SP += -12; /* Should always provide this space */
+ CALL ___udivsi3; /* Compute unsigned quotient using ___udiv32()*/
+ SP += 12;
+ R0 *= R6; /* Quotient * divisor */
+ R0 = R7 - R0; /* Dividend - (quotient * divisor) */
+ RETS = [SP++]; /* Pop return address */
+ ( R7:6) = [SP++]; /* And registers */
+ RTS; /* Return remainder */
+.LRETURN_ZERO_VAL:
+ R0 = 0;
+.LRETURN_R0:
+ RTS;
+
+.size ___umodsi3, .-___umodsi3
diff --git a/arch/blackfin/lib/umulsi3_highpart.S b/arch/blackfin/lib/umulsi3_highpart.S
new file mode 100644
index 000000000..0dcace96e
--- /dev/null
+++ b/arch/blackfin/lib/umulsi3_highpart.S
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2007 Analog Devices Inc.
+ *
+ * Licensed under the Clear BSD license or the GPL-2 (or later)
+ */
+
+.align 2
+.global ___umulsi3_highpart;
+.type ___umulsi3_highpart, STT_FUNC;
+
+#ifdef CONFIG_ARITHMETIC_OPS_L1
+.section .l1.text
+#else
+.text
+#endif
+
+___umulsi3_highpart:
+ R2 = R1.H * R0.H, R3 = R1.L * R0.H (FU);
+ R0 = R1.L * R0.L, R1 = R1.H * R0.L (FU);
+ R0 >>= 16;
+ /* Unsigned multiplication has the nice property that we can
+ ignore carry on this first addition. */
+ R0 = R0 + R3;
+ R0 = R0 + R1;
+ cc = ac0;
+ R1 = cc;
+ R1 = PACK(R1.l,R0.h);
+ R0 = R1 + R2;
+ RTS;
+
+.size ___umulsi3_highpart, .-___umulsi3_highpart