diff options
Diffstat (limited to 'lib/mpi')
-rw-r--r-- | lib/mpi/Makefile | 22 | ||||
-rw-r--r-- | lib/mpi/generic_mpih-add1.c | 61 | ||||
-rw-r--r-- | lib/mpi/generic_mpih-lshift.c | 63 | ||||
-rw-r--r-- | lib/mpi/generic_mpih-mul1.c | 57 | ||||
-rw-r--r-- | lib/mpi/generic_mpih-mul2.c | 60 | ||||
-rw-r--r-- | lib/mpi/generic_mpih-mul3.c | 61 | ||||
-rw-r--r-- | lib/mpi/generic_mpih-rshift.c | 63 | ||||
-rw-r--r-- | lib/mpi/generic_mpih-sub1.c | 60 | ||||
-rw-r--r-- | lib/mpi/longlong.h | 1379 | ||||
-rw-r--r-- | lib/mpi/mpi-bit.c | 56 | ||||
-rw-r--r-- | lib/mpi/mpi-cmp.c | 68 | ||||
-rw-r--r-- | lib/mpi/mpi-inline.h | 122 | ||||
-rw-r--r-- | lib/mpi/mpi-internal.h | 257 | ||||
-rw-r--r-- | lib/mpi/mpi-pow.c | 323 | ||||
-rw-r--r-- | lib/mpi/mpicoder.c | 260 | ||||
-rw-r--r-- | lib/mpi/mpih-cmp.c | 56 | ||||
-rw-r--r-- | lib/mpi/mpih-div.c | 236 | ||||
-rw-r--r-- | lib/mpi/mpih-mul.c | 497 | ||||
-rw-r--r-- | lib/mpi/mpiutil.c | 126 |
19 files changed, 3827 insertions, 0 deletions
diff --git a/lib/mpi/Makefile b/lib/mpi/Makefile new file mode 100644 index 000000000..019a68c90 --- /dev/null +++ b/lib/mpi/Makefile @@ -0,0 +1,22 @@ +# +# MPI multiprecision maths library (from gpg) +# + +obj-$(CONFIG_MPILIB) = mpi.o + +mpi-y = \ + generic_mpih-lshift.o \ + generic_mpih-mul1.o \ + generic_mpih-mul2.o \ + generic_mpih-mul3.o \ + generic_mpih-rshift.o \ + generic_mpih-sub1.o \ + generic_mpih-add1.o \ + mpicoder.o \ + mpi-bit.o \ + mpi-cmp.o \ + mpih-cmp.o \ + mpih-div.o \ + mpih-mul.o \ + mpi-pow.o \ + mpiutil.o diff --git a/lib/mpi/generic_mpih-add1.c b/lib/mpi/generic_mpih-add1.c new file mode 100644 index 000000000..c94c7dd34 --- /dev/null +++ b/lib/mpi/generic_mpih-add1.c @@ -0,0 +1,61 @@ +/* mpihelp-add_1.c - MPI helper functions + * Copyright (C) 1994, 1996, 1997, 1998, + * 2000 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + +#include "mpi-internal.h" +#include "longlong.h" + +mpi_limb_t +mpihelp_add_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_ptr_t s2_ptr, mpi_size_t size) +{ + mpi_limb_t x, y, cy; + mpi_size_t j; + + /* The loop counter and index J goes from -SIZE to -1. This way + the loop becomes faster. */ + j = -size; + + /* Offset the base pointers to compensate for the negative indices. */ + s1_ptr -= j; + s2_ptr -= j; + res_ptr -= j; + + cy = 0; + do { + y = s2_ptr[j]; + x = s1_ptr[j]; + y += cy; /* add previous carry to one addend */ + cy = y < cy; /* get out carry from that addition */ + y += x; /* add other addend */ + cy += y < x; /* get out carry from that add, combine */ + res_ptr[j] = y; + } while (++j); + + return cy; +} diff --git a/lib/mpi/generic_mpih-lshift.c b/lib/mpi/generic_mpih-lshift.c new file mode 100644 index 000000000..863189272 --- /dev/null +++ b/lib/mpi/generic_mpih-lshift.c @@ -0,0 +1,63 @@ +/* mpihelp-lshift.c - MPI helper functions + * Copyright (C) 1994, 1996, 1998, 2001 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + +#include "mpi-internal.h" + +/* Shift U (pointed to by UP and USIZE digits long) CNT bits to the left + * and store the USIZE least significant digits of the result at WP. + * Return the bits shifted out from the most significant digit. + * + * Argument constraints: + * 1. 0 < CNT < BITS_PER_MP_LIMB + * 2. If the result is to be written over the input, WP must be >= UP. + */ + +mpi_limb_t +mpihelp_lshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, unsigned int cnt) +{ + mpi_limb_t high_limb, low_limb; + unsigned sh_1, sh_2; + mpi_size_t i; + mpi_limb_t retval; + + sh_1 = cnt; + wp += 1; + sh_2 = BITS_PER_MPI_LIMB - sh_1; + i = usize - 1; + low_limb = up[i]; + retval = low_limb >> sh_2; + high_limb = low_limb; + while (--i >= 0) { + low_limb = up[i]; + wp[i] = (high_limb << sh_1) | (low_limb >> sh_2); + high_limb = low_limb; + } + wp[i] = high_limb << sh_1; + + return retval; +} diff --git a/lib/mpi/generic_mpih-mul1.c b/lib/mpi/generic_mpih-mul1.c new file mode 100644 index 000000000..1668dfd90 --- /dev/null +++ b/lib/mpi/generic_mpih-mul1.c @@ -0,0 +1,57 @@ +/* mpihelp-mul_1.c - MPI helper functions + * Copyright (C) 1994, 1996, 1997, 1998, 2001 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + +#include "mpi-internal.h" +#include "longlong.h" + +mpi_limb_t +mpihelp_mul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, + mpi_limb_t s2_limb) +{ + mpi_limb_t cy_limb; + mpi_size_t j; + mpi_limb_t prod_high, prod_low; + + /* The loop counter and index J goes from -S1_SIZE to -1. This way + * the loop becomes faster. */ + j = -s1_size; + + /* Offset the base pointers to compensate for the negative indices. */ + s1_ptr -= j; + res_ptr -= j; + + cy_limb = 0; + do { + umul_ppmm(prod_high, prod_low, s1_ptr[j], s2_limb); + prod_low += cy_limb; + cy_limb = (prod_low < cy_limb ? 1 : 0) + prod_high; + res_ptr[j] = prod_low; + } while (++j); + + return cy_limb; +} diff --git a/lib/mpi/generic_mpih-mul2.c b/lib/mpi/generic_mpih-mul2.c new file mode 100644 index 000000000..8a7b29ee1 --- /dev/null +++ b/lib/mpi/generic_mpih-mul2.c @@ -0,0 +1,60 @@ +/* mpihelp-mul_2.c - MPI helper functions + * Copyright (C) 1994, 1996, 1997, 1998, 2001 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + +#include "mpi-internal.h" +#include "longlong.h" + +mpi_limb_t +mpihelp_addmul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_size_t s1_size, mpi_limb_t s2_limb) +{ + mpi_limb_t cy_limb; + mpi_size_t j; + mpi_limb_t prod_high, prod_low; + mpi_limb_t x; + + /* The loop counter and index J goes from -SIZE to -1. This way + * the loop becomes faster. */ + j = -s1_size; + res_ptr -= j; + s1_ptr -= j; + + cy_limb = 0; + do { + umul_ppmm(prod_high, prod_low, s1_ptr[j], s2_limb); + + prod_low += cy_limb; + cy_limb = (prod_low < cy_limb ? 1 : 0) + prod_high; + + x = res_ptr[j]; + prod_low = x + prod_low; + cy_limb += prod_low < x ? 1 : 0; + res_ptr[j] = prod_low; + } while (++j); + return cy_limb; +} diff --git a/lib/mpi/generic_mpih-mul3.c b/lib/mpi/generic_mpih-mul3.c new file mode 100644 index 000000000..f96df327b --- /dev/null +++ b/lib/mpi/generic_mpih-mul3.c @@ -0,0 +1,61 @@ +/* mpihelp-mul_3.c - MPI helper functions + * Copyright (C) 1994, 1996, 1997, 1998, 2001 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + +#include "mpi-internal.h" +#include "longlong.h" + +mpi_limb_t +mpihelp_submul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_size_t s1_size, mpi_limb_t s2_limb) +{ + mpi_limb_t cy_limb; + mpi_size_t j; + mpi_limb_t prod_high, prod_low; + mpi_limb_t x; + + /* The loop counter and index J goes from -SIZE to -1. This way + * the loop becomes faster. */ + j = -s1_size; + res_ptr -= j; + s1_ptr -= j; + + cy_limb = 0; + do { + umul_ppmm(prod_high, prod_low, s1_ptr[j], s2_limb); + + prod_low += cy_limb; + cy_limb = (prod_low < cy_limb ? 1 : 0) + prod_high; + + x = res_ptr[j]; + prod_low = x - prod_low; + cy_limb += prod_low > x ? 1 : 0; + res_ptr[j] = prod_low; + } while (++j); + + return cy_limb; +} diff --git a/lib/mpi/generic_mpih-rshift.c b/lib/mpi/generic_mpih-rshift.c new file mode 100644 index 000000000..ffa328818 --- /dev/null +++ b/lib/mpi/generic_mpih-rshift.c @@ -0,0 +1,63 @@ +/* mpih-rshift.c - MPI helper functions + * Copyright (C) 1994, 1996, 1998, 1999, + * 2000, 2001 Free Software Foundation, Inc. + * + * This file is part of GNUPG + * + * GNUPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GNUPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + +#include "mpi-internal.h" + +/* Shift U (pointed to by UP and USIZE limbs long) CNT bits to the right + * and store the USIZE least significant limbs of the result at WP. + * The bits shifted out to the right are returned. + * + * Argument constraints: + * 1. 0 < CNT < BITS_PER_MP_LIMB + * 2. If the result is to be written over the input, WP must be <= UP. + */ + +mpi_limb_t +mpihelp_rshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, unsigned cnt) +{ + mpi_limb_t high_limb, low_limb; + unsigned sh_1, sh_2; + mpi_size_t i; + mpi_limb_t retval; + + sh_1 = cnt; + wp -= 1; + sh_2 = BITS_PER_MPI_LIMB - sh_1; + high_limb = up[0]; + retval = high_limb << sh_2; + low_limb = high_limb; + for (i = 1; i < usize; i++) { + high_limb = up[i]; + wp[i] = (low_limb >> sh_1) | (high_limb << sh_2); + low_limb = high_limb; + } + wp[i] = low_limb >> sh_1; + + return retval; +} diff --git a/lib/mpi/generic_mpih-sub1.c b/lib/mpi/generic_mpih-sub1.c new file mode 100644 index 000000000..5d98ab7d6 --- /dev/null +++ b/lib/mpi/generic_mpih-sub1.c @@ -0,0 +1,60 @@ +/* mpihelp-add_2.c - MPI helper functions + * Copyright (C) 1994, 1996, 1997, 1998, 2001 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + +#include "mpi-internal.h" +#include "longlong.h" + +mpi_limb_t +mpihelp_sub_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_ptr_t s2_ptr, mpi_size_t size) +{ + mpi_limb_t x, y, cy; + mpi_size_t j; + + /* The loop counter and index J goes from -SIZE to -1. This way + the loop becomes faster. */ + j = -size; + + /* Offset the base pointers to compensate for the negative indices. */ + s1_ptr -= j; + s2_ptr -= j; + res_ptr -= j; + + cy = 0; + do { + y = s2_ptr[j]; + x = s1_ptr[j]; + y += cy; /* add previous carry to subtrahend */ + cy = y < cy; /* get out carry from that addition */ + y = x - y; /* main subtract */ + cy += y > x; /* get out carry from the subtract, combine */ + res_ptr[j] = y; + } while (++j); + + return cy; +} diff --git a/lib/mpi/longlong.h b/lib/mpi/longlong.h new file mode 100644 index 000000000..a89d04159 --- /dev/null +++ b/lib/mpi/longlong.h @@ -0,0 +1,1379 @@ +/* longlong.h -- definitions for mixed size 32/64 bit arithmetic. + * Note: I added some stuff for use with gnupg + * + * Copyright (C) 1991, 1992, 1993, 1994, 1996, 1998, + * 2000, 2001, 2002, 2003 Free Software Foundation, Inc. + * + * This file is free software; you can redistribute it and/or modify + * it under the terms of the GNU Library General Public License as published by + * the Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This file is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public + * License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this file; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. */ + +#include <asm-generic/bitops/count_zeros.h> + +/* You have to define the following before including this file: + * + * UWtype -- An unsigned type, default type for operations (typically a "word") + * UHWtype -- An unsigned type, at least half the size of UWtype. + * UDWtype -- An unsigned type, at least twice as large a UWtype + * W_TYPE_SIZE -- size in bits of UWtype + * + * SItype, USItype -- Signed and unsigned 32 bit types. + * DItype, UDItype -- Signed and unsigned 64 bit types. + * + * On a 32 bit machine UWtype should typically be USItype; + * on a 64 bit machine, UWtype should typically be UDItype. +*/ + +#define __BITS4 (W_TYPE_SIZE / 4) +#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2)) +#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1)) +#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2)) + +/* This is used to make sure no undesirable sharing between different libraries + that use this file takes place. */ +#ifndef __MPN +#define __MPN(x) __##x +#endif + +/* Define auxiliary asm macros. + * + * 1) umul_ppmm(high_prod, low_prod, multipler, multiplicand) multiplies two + * UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype + * word product in HIGH_PROD and LOW_PROD. + * + * 2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a + * UDWtype product. This is just a variant of umul_ppmm. + + * 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator, + * denominator) divides a UDWtype, composed by the UWtype integers + * HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient + * in QUOTIENT and the remainder in REMAINDER. HIGH_NUMERATOR must be less + * than DENOMINATOR for correct operation. If, in addition, the most + * significant bit of DENOMINATOR must be 1, then the pre-processor symbol + * UDIV_NEEDS_NORMALIZATION is defined to 1. + * 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator, + * denominator). Like udiv_qrnnd but the numbers are signed. The quotient + * is rounded towards 0. + * + * 5) count_leading_zeros(count, x) counts the number of zero-bits from the + * msb to the first non-zero bit in the UWtype X. This is the number of + * steps X needs to be shifted left to set the msb. Undefined for X == 0, + * unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value. + * + * 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts + * from the least significant end. + * + * 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1, + * high_addend_2, low_addend_2) adds two UWtype integers, composed by + * HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2 + * respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow + * (i.e. carry out) is not stored anywhere, and is lost. + * + * 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend, + * high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers, + * composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and + * LOW_SUBTRAHEND_2 respectively. The result is placed in HIGH_DIFFERENCE + * and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere, + * and is lost. + * + * If any of these macros are left undefined for a particular CPU, + * C macros are used. */ + +/* The CPUs come in alphabetical order below. + * + * Please add support for more CPUs here, or improve the current support + * for the CPUs below! */ + +#if defined(__GNUC__) && !defined(NO_ASM) + +/* We sometimes need to clobber "cc" with gcc2, but that would not be + understood by gcc1. Use cpp to avoid major code duplication. */ +#if __GNUC__ < 2 +#define __CLOBBER_CC +#define __AND_CLOBBER_CC +#else /* __GNUC__ >= 2 */ +#define __CLOBBER_CC : "cc" +#define __AND_CLOBBER_CC , "cc" +#endif /* __GNUC__ < 2 */ + +/*************************************** + ************** A29K ***************** + ***************************************/ +#if (defined(__a29k__) || defined(_AM29K)) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("add %1,%4,%5\n" \ + "addc %0,%2,%3" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%r" ((USItype)(ah)), \ + "rI" ((USItype)(bh)), \ + "%r" ((USItype)(al)), \ + "rI" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("sub %1,%4,%5\n" \ + "subc %0,%2,%3" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "r" ((USItype)(ah)), \ + "rI" ((USItype)(bh)), \ + "r" ((USItype)(al)), \ + "rI" ((USItype)(bl))) +#define umul_ppmm(xh, xl, m0, m1) \ +do { \ + USItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("multiplu %0,%1,%2" \ + : "=r" ((USItype)(xl)) \ + : "r" (__m0), \ + "r" (__m1)); \ + __asm__ ("multmu %0,%1,%2" \ + : "=r" ((USItype)(xh)) \ + : "r" (__m0), \ + "r" (__m1)); \ +} while (0) +#define udiv_qrnnd(q, r, n1, n0, d) \ + __asm__ ("dividu %0,%3,%4" \ + : "=r" ((USItype)(q)), \ + "=q" ((USItype)(r)) \ + : "1" ((USItype)(n1)), \ + "r" ((USItype)(n0)), \ + "r" ((USItype)(d))) +#endif /* __a29k__ */ + +#if defined(__alpha) && W_TYPE_SIZE == 64 +#define umul_ppmm(ph, pl, m0, m1) \ +do { \ + UDItype __m0 = (m0), __m1 = (m1); \ + (ph) = __builtin_alpha_umulh(__m0, __m1); \ + (pl) = __m0 * __m1; \ +} while (0) +#define UMUL_TIME 46 +#ifndef LONGLONG_STANDALONE +#define udiv_qrnnd(q, r, n1, n0, d) \ +do { UDItype __r; \ + (q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \ + (r) = __r; \ +} while (0) +extern UDItype __udiv_qrnnd(UDItype *, UDItype, UDItype, UDItype); +#define UDIV_TIME 220 +#endif /* LONGLONG_STANDALONE */ +#endif /* __alpha */ + +/*************************************** + ************** ARM ****************** + ***************************************/ +#if defined(__arm__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("adds %1, %4, %5\n" \ + "adc %0, %2, %3" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%r" ((USItype)(ah)), \ + "rI" ((USItype)(bh)), \ + "%r" ((USItype)(al)), \ + "rI" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subs %1, %4, %5\n" \ + "sbc %0, %2, %3" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "r" ((USItype)(ah)), \ + "rI" ((USItype)(bh)), \ + "r" ((USItype)(al)), \ + "rI" ((USItype)(bl))) +#if defined __ARM_ARCH_2__ || defined __ARM_ARCH_3__ +#define umul_ppmm(xh, xl, a, b) \ + __asm__ ("%@ Inlined umul_ppmm\n" \ + "mov %|r0, %2, lsr #16 @ AAAA\n" \ + "mov %|r2, %3, lsr #16 @ BBBB\n" \ + "bic %|r1, %2, %|r0, lsl #16 @ aaaa\n" \ + "bic %0, %3, %|r2, lsl #16 @ bbbb\n" \ + "mul %1, %|r1, %|r2 @ aaaa * BBBB\n" \ + "mul %|r2, %|r0, %|r2 @ AAAA * BBBB\n" \ + "mul %|r1, %0, %|r1 @ aaaa * bbbb\n" \ + "mul %0, %|r0, %0 @ AAAA * bbbb\n" \ + "adds %|r0, %1, %0 @ central sum\n" \ + "addcs %|r2, %|r2, #65536\n" \ + "adds %1, %|r1, %|r0, lsl #16\n" \ + "adc %0, %|r2, %|r0, lsr #16" \ + : "=&r" ((USItype)(xh)), \ + "=r" ((USItype)(xl)) \ + : "r" ((USItype)(a)), \ + "r" ((USItype)(b)) \ + : "r0", "r1", "r2") +#else +#define umul_ppmm(xh, xl, a, b) \ + __asm__ ("%@ Inlined umul_ppmm\n" \ + "umull %r1, %r0, %r2, %r3" \ + : "=&r" ((USItype)(xh)), \ + "=r" ((USItype)(xl)) \ + : "r" ((USItype)(a)), \ + "r" ((USItype)(b)) \ + : "r0", "r1") +#endif +#define UMUL_TIME 20 +#define UDIV_TIME 100 +#endif /* __arm__ */ + +/*************************************** + ************** CLIPPER ************** + ***************************************/ +#if defined(__clipper__) && W_TYPE_SIZE == 32 +#define umul_ppmm(w1, w0, u, v) \ + ({union {UDItype __ll; \ + struct {USItype __l, __h; } __i; \ + } __xx; \ + __asm__ ("mulwux %2,%0" \ + : "=r" (__xx.__ll) \ + : "%0" ((USItype)(u)), \ + "r" ((USItype)(v))); \ + (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; }) +#define smul_ppmm(w1, w0, u, v) \ + ({union {DItype __ll; \ + struct {SItype __l, __h; } __i; \ + } __xx; \ + __asm__ ("mulwx %2,%0" \ + : "=r" (__xx.__ll) \ + : "%0" ((SItype)(u)), \ + "r" ((SItype)(v))); \ + (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; }) +#define __umulsidi3(u, v) \ + ({UDItype __w; \ + __asm__ ("mulwux %2,%0" \ + : "=r" (__w) \ + : "%0" ((USItype)(u)), \ + "r" ((USItype)(v))); \ + __w; }) +#endif /* __clipper__ */ + +/*************************************** + ************** GMICRO *************** + ***************************************/ +#if defined(__gmicro__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("add.w %5,%1\n" \ + "addx %3,%0" \ + : "=g" ((USItype)(sh)), \ + "=&g" ((USItype)(sl)) \ + : "%0" ((USItype)(ah)), \ + "g" ((USItype)(bh)), \ + "%1" ((USItype)(al)), \ + "g" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("sub.w %5,%1\n" \ + "subx %3,%0" \ + : "=g" ((USItype)(sh)), \ + "=&g" ((USItype)(sl)) \ + : "0" ((USItype)(ah)), \ + "g" ((USItype)(bh)), \ + "1" ((USItype)(al)), \ + "g" ((USItype)(bl))) +#define umul_ppmm(ph, pl, m0, m1) \ + __asm__ ("mulx %3,%0,%1" \ + : "=g" ((USItype)(ph)), \ + "=r" ((USItype)(pl)) \ + : "%0" ((USItype)(m0)), \ + "g" ((USItype)(m1))) +#define udiv_qrnnd(q, r, nh, nl, d) \ + __asm__ ("divx %4,%0,%1" \ + : "=g" ((USItype)(q)), \ + "=r" ((USItype)(r)) \ + : "1" ((USItype)(nh)), \ + "0" ((USItype)(nl)), \ + "g" ((USItype)(d))) +#endif + +/*************************************** + ************** HPPA ***************** + ***************************************/ +#if defined(__hppa) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("add %4,%5,%1\n" \ + "addc %2,%3,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%rM" ((USItype)(ah)), \ + "rM" ((USItype)(bh)), \ + "%rM" ((USItype)(al)), \ + "rM" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("sub %4,%5,%1\n" \ + "subb %2,%3,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "rM" ((USItype)(ah)), \ + "rM" ((USItype)(bh)), \ + "rM" ((USItype)(al)), \ + "rM" ((USItype)(bl))) +#if 0 && defined(_PA_RISC1_1) +/* xmpyu uses floating point register which is not allowed in Linux kernel. */ +#define umul_ppmm(wh, wl, u, v) \ +do { \ + union {UDItype __ll; \ + struct {USItype __h, __l; } __i; \ + } __xx; \ + __asm__ ("xmpyu %1,%2,%0" \ + : "=*f" (__xx.__ll) \ + : "*f" ((USItype)(u)), \ + "*f" ((USItype)(v))); \ + (wh) = __xx.__i.__h; \ + (wl) = __xx.__i.__l; \ +} while (0) +#define UMUL_TIME 8 +#define UDIV_TIME 60 +#else +#define UMUL_TIME 40 +#define UDIV_TIME 80 +#endif +#if 0 /* #ifndef LONGLONG_STANDALONE */ +#define udiv_qrnnd(q, r, n1, n0, d) \ +do { USItype __r; \ + (q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \ + (r) = __r; \ +} while (0) +extern USItype __udiv_qrnnd(); +#endif /* LONGLONG_STANDALONE */ +#endif /* hppa */ + +/*************************************** + ************** I370 ***************** + ***************************************/ +#if (defined(__i370__) || defined(__mvs__)) && W_TYPE_SIZE == 32 +#define umul_ppmm(xh, xl, m0, m1) \ +do { \ + union {UDItype __ll; \ + struct {USItype __h, __l; } __i; \ + } __xx; \ + USItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("mr %0,%3" \ + : "=r" (__xx.__i.__h), \ + "=r" (__xx.__i.__l) \ + : "%1" (__m0), \ + "r" (__m1)); \ + (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ + (xh) += ((((SItype) __m0 >> 31) & __m1) \ + + (((SItype) __m1 >> 31) & __m0)); \ +} while (0) +#define smul_ppmm(xh, xl, m0, m1) \ +do { \ + union {DItype __ll; \ + struct {USItype __h, __l; } __i; \ + } __xx; \ + __asm__ ("mr %0,%3" \ + : "=r" (__xx.__i.__h), \ + "=r" (__xx.__i.__l) \ + : "%1" (m0), \ + "r" (m1)); \ + (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ +} while (0) +#define sdiv_qrnnd(q, r, n1, n0, d) \ +do { \ + union {DItype __ll; \ + struct {USItype __h, __l; } __i; \ + } __xx; \ + __xx.__i.__h = n1; __xx.__i.__l = n0; \ + __asm__ ("dr %0,%2" \ + : "=r" (__xx.__ll) \ + : "0" (__xx.__ll), "r" (d)); \ + (q) = __xx.__i.__l; (r) = __xx.__i.__h; \ +} while (0) +#endif + +/*************************************** + ************** I386 ***************** + ***************************************/ +#undef __i386__ +#if (defined(__i386__) || defined(__i486__)) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("addl %5,%1\n" \ + "adcl %3,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%0" ((USItype)(ah)), \ + "g" ((USItype)(bh)), \ + "%1" ((USItype)(al)), \ + "g" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subl %5,%1\n" \ + "sbbl %3,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "0" ((USItype)(ah)), \ + "g" ((USItype)(bh)), \ + "1" ((USItype)(al)), \ + "g" ((USItype)(bl))) +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("mull %3" \ + : "=a" ((USItype)(w0)), \ + "=d" ((USItype)(w1)) \ + : "%0" ((USItype)(u)), \ + "rm" ((USItype)(v))) +#define udiv_qrnnd(q, r, n1, n0, d) \ + __asm__ ("divl %4" \ + : "=a" ((USItype)(q)), \ + "=d" ((USItype)(r)) \ + : "0" ((USItype)(n0)), \ + "1" ((USItype)(n1)), \ + "rm" ((USItype)(d))) +#ifndef UMUL_TIME +#define UMUL_TIME 40 +#endif +#ifndef UDIV_TIME +#define UDIV_TIME 40 +#endif +#endif /* 80x86 */ + +/*************************************** + ************** I860 ***************** + ***************************************/ +#if defined(__i860__) && W_TYPE_SIZE == 32 +#define rshift_rhlc(r, h, l, c) \ + __asm__ ("shr %3,r0,r0\n" \ + "shrd %1,%2,%0" \ + "=r" (r) : "r" (h), "r" (l), "rn" (c)) +#endif /* i860 */ + +/*************************************** + ************** I960 ***************** + ***************************************/ +#if defined(__i960__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("cmpo 1,0\n" \ + "addc %5,%4,%1\n" \ + "addc %3,%2,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%dI" ((USItype)(ah)), \ + "dI" ((USItype)(bh)), \ + "%dI" ((USItype)(al)), \ + "dI" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("cmpo 0,0\n" \ + "subc %5,%4,%1\n" \ + "subc %3,%2,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "dI" ((USItype)(ah)), \ + "dI" ((USItype)(bh)), \ + "dI" ((USItype)(al)), \ + "dI" ((USItype)(bl))) +#define umul_ppmm(w1, w0, u, v) \ + ({union {UDItype __ll; \ + struct {USItype __l, __h; } __i; \ + } __xx; \ + __asm__ ("emul %2,%1,%0" \ + : "=d" (__xx.__ll) \ + : "%dI" ((USItype)(u)), \ + "dI" ((USItype)(v))); \ + (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; }) +#define __umulsidi3(u, v) \ + ({UDItype __w; \ + __asm__ ("emul %2,%1,%0" \ + : "=d" (__w) \ + : "%dI" ((USItype)(u)), \ + "dI" ((USItype)(v))); \ + __w; }) +#define udiv_qrnnd(q, r, nh, nl, d) \ +do { \ + union {UDItype __ll; \ + struct {USItype __l, __h; } __i; \ + } __nn; \ + __nn.__i.__h = (nh); __nn.__i.__l = (nl); \ + __asm__ ("ediv %d,%n,%0" \ + : "=d" (__rq.__ll) \ + : "dI" (__nn.__ll), \ + "dI" ((USItype)(d))); \ + (r) = __rq.__i.__l; (q) = __rq.__i.__h; \ +} while (0) +#if defined(__i960mx) /* what is the proper symbol to test??? */ +#define rshift_rhlc(r, h, l, c) \ +do { \ + union {UDItype __ll; \ + struct {USItype __l, __h; } __i; \ + } __nn; \ + __nn.__i.__h = (h); __nn.__i.__l = (l); \ + __asm__ ("shre %2,%1,%0" \ + : "=d" (r) : "dI" (__nn.__ll), "dI" (c)); \ +} +#endif /* i960mx */ +#endif /* i960 */ + +/*************************************** + ************** 68000 **************** + ***************************************/ +#if (defined(__mc68000__) || defined(__mc68020__) || defined(__NeXT__) || defined(mc68020)) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("add%.l %5,%1\n" \ + "addx%.l %3,%0" \ + : "=d" ((USItype)(sh)), \ + "=&d" ((USItype)(sl)) \ + : "%0" ((USItype)(ah)), \ + "d" ((USItype)(bh)), \ + "%1" ((USItype)(al)), \ + "g" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("sub%.l %5,%1\n" \ + "subx%.l %3,%0" \ + : "=d" ((USItype)(sh)), \ + "=&d" ((USItype)(sl)) \ + : "0" ((USItype)(ah)), \ + "d" ((USItype)(bh)), \ + "1" ((USItype)(al)), \ + "g" ((USItype)(bl))) +#if (defined(__mc68020__) || defined(__NeXT__) || defined(mc68020)) +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("mulu%.l %3,%1:%0" \ + : "=d" ((USItype)(w0)), \ + "=d" ((USItype)(w1)) \ + : "%0" ((USItype)(u)), \ + "dmi" ((USItype)(v))) +#define UMUL_TIME 45 +#define udiv_qrnnd(q, r, n1, n0, d) \ + __asm__ ("divu%.l %4,%1:%0" \ + : "=d" ((USItype)(q)), \ + "=d" ((USItype)(r)) \ + : "0" ((USItype)(n0)), \ + "1" ((USItype)(n1)), \ + "dmi" ((USItype)(d))) +#define UDIV_TIME 90 +#define sdiv_qrnnd(q, r, n1, n0, d) \ + __asm__ ("divs%.l %4,%1:%0" \ + : "=d" ((USItype)(q)), \ + "=d" ((USItype)(r)) \ + : "0" ((USItype)(n0)), \ + "1" ((USItype)(n1)), \ + "dmi" ((USItype)(d))) +#else /* not mc68020 */ +#define umul_ppmm(xh, xl, a, b) \ +do { USItype __umul_tmp1, __umul_tmp2; \ + __asm__ ("| Inlined umul_ppmm\n" \ + "move%.l %5,%3\n" \ + "move%.l %2,%0\n" \ + "move%.w %3,%1\n" \ + "swap %3\n" \ + "swap %0\n" \ + "mulu %2,%1\n" \ + "mulu %3,%0\n" \ + "mulu %2,%3\n" \ + "swap %2\n" \ + "mulu %5,%2\n" \ + "add%.l %3,%2\n" \ + "jcc 1f\n" \ + "add%.l %#0x10000,%0\n" \ + "1: move%.l %2,%3\n" \ + "clr%.w %2\n" \ + "swap %2\n" \ + "swap %3\n" \ + "clr%.w %3\n" \ + "add%.l %3,%1\n" \ + "addx%.l %2,%0\n" \ + "| End inlined umul_ppmm" \ + : "=&d" ((USItype)(xh)), "=&d" ((USItype)(xl)), \ + "=d" (__umul_tmp1), "=&d" (__umul_tmp2) \ + : "%2" ((USItype)(a)), "d" ((USItype)(b))); \ +} while (0) +#define UMUL_TIME 100 +#define UDIV_TIME 400 +#endif /* not mc68020 */ +#endif /* mc68000 */ + +/*************************************** + ************** 88000 **************** + ***************************************/ +#if defined(__m88000__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("addu.co %1,%r4,%r5\n" \ + "addu.ci %0,%r2,%r3" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%rJ" ((USItype)(ah)), \ + "rJ" ((USItype)(bh)), \ + "%rJ" ((USItype)(al)), \ + "rJ" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subu.co %1,%r4,%r5\n" \ + "subu.ci %0,%r2,%r3" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "rJ" ((USItype)(ah)), \ + "rJ" ((USItype)(bh)), \ + "rJ" ((USItype)(al)), \ + "rJ" ((USItype)(bl))) +#if defined(__m88110__) +#define umul_ppmm(wh, wl, u, v) \ +do { \ + union {UDItype __ll; \ + struct {USItype __h, __l; } __i; \ + } __x; \ + __asm__ ("mulu.d %0,%1,%2" : "=r" (__x.__ll) : "r" (u), "r" (v)); \ + (wh) = __x.__i.__h; \ + (wl) = __x.__i.__l; \ +} while (0) +#define udiv_qrnnd(q, r, n1, n0, d) \ + ({union {UDItype __ll; \ + struct {USItype __h, __l; } __i; \ + } __x, __q; \ + __x.__i.__h = (n1); __x.__i.__l = (n0); \ + __asm__ ("divu.d %0,%1,%2" \ + : "=r" (__q.__ll) : "r" (__x.__ll), "r" (d)); \ + (r) = (n0) - __q.__l * (d); (q) = __q.__l; }) +#define UMUL_TIME 5 +#define UDIV_TIME 25 +#else +#define UMUL_TIME 17 +#define UDIV_TIME 150 +#endif /* __m88110__ */ +#endif /* __m88000__ */ + +/*************************************** + ************** MIPS ***************** + ***************************************/ +#if defined(__mips__) && W_TYPE_SIZE == 32 +#if (__GNUC__ >= 5) || (__GNUC__ >= 4 && __GNUC_MINOR__ >= 4) +#define umul_ppmm(w1, w0, u, v) \ +do { \ + UDItype __ll = (UDItype)(u) * (v); \ + w1 = __ll >> 32; \ + w0 = __ll; \ +} while (0) +#elif __GNUC__ > 2 || __GNUC_MINOR__ >= 7 +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("multu %2,%3" \ + : "=l" ((USItype)(w0)), \ + "=h" ((USItype)(w1)) \ + : "d" ((USItype)(u)), \ + "d" ((USItype)(v))) +#else +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("multu %2,%3\n" \ + "mflo %0\n" \ + "mfhi %1" \ + : "=d" ((USItype)(w0)), \ + "=d" ((USItype)(w1)) \ + : "d" ((USItype)(u)), \ + "d" ((USItype)(v))) +#endif +#define UMUL_TIME 10 +#define UDIV_TIME 100 +#endif /* __mips__ */ + +/*************************************** + ************** MIPS/64 ************** + ***************************************/ +#if (defined(__mips) && __mips >= 3) && W_TYPE_SIZE == 64 +#if (__GNUC__ >= 5) || (__GNUC__ >= 4 && __GNUC_MINOR__ >= 4) +#define umul_ppmm(w1, w0, u, v) \ +do { \ + typedef unsigned int __ll_UTItype __attribute__((mode(TI))); \ + __ll_UTItype __ll = (__ll_UTItype)(u) * (v); \ + w1 = __ll >> 64; \ + w0 = __ll; \ +} while (0) +#elif __GNUC__ > 2 || __GNUC_MINOR__ >= 7 +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("dmultu %2,%3" \ + : "=l" ((UDItype)(w0)), \ + "=h" ((UDItype)(w1)) \ + : "d" ((UDItype)(u)), \ + "d" ((UDItype)(v))) +#else +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("dmultu %2,%3\n" \ + "mflo %0\n" \ + "mfhi %1" \ + : "=d" ((UDItype)(w0)), \ + "=d" ((UDItype)(w1)) \ + : "d" ((UDItype)(u)), \ + "d" ((UDItype)(v))) +#endif +#define UMUL_TIME 20 +#define UDIV_TIME 140 +#endif /* __mips__ */ + +/*************************************** + ************** 32000 **************** + ***************************************/ +#if defined(__ns32000__) && W_TYPE_SIZE == 32 +#define umul_ppmm(w1, w0, u, v) \ + ({union {UDItype __ll; \ + struct {USItype __l, __h; } __i; \ + } __xx; \ + __asm__ ("meid %2,%0" \ + : "=g" (__xx.__ll) \ + : "%0" ((USItype)(u)), \ + "g" ((USItype)(v))); \ + (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; }) +#define __umulsidi3(u, v) \ + ({UDItype __w; \ + __asm__ ("meid %2,%0" \ + : "=g" (__w) \ + : "%0" ((USItype)(u)), \ + "g" ((USItype)(v))); \ + __w; }) +#define udiv_qrnnd(q, r, n1, n0, d) \ + ({union {UDItype __ll; \ + struct {USItype __l, __h; } __i; \ + } __xx; \ + __xx.__i.__h = (n1); __xx.__i.__l = (n0); \ + __asm__ ("deid %2,%0" \ + : "=g" (__xx.__ll) \ + : "0" (__xx.__ll), \ + "g" ((USItype)(d))); \ + (r) = __xx.__i.__l; (q) = __xx.__i.__h; }) +#endif /* __ns32000__ */ + +/*************************************** + ************** PPC ****************** + ***************************************/ +#if (defined(_ARCH_PPC) || defined(_IBMR2)) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ +do { \ + if (__builtin_constant_p(bh) && (bh) == 0) \ + __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%r" ((USItype)(ah)), \ + "%r" ((USItype)(al)), \ + "rI" ((USItype)(bl))); \ + else if (__builtin_constant_p(bh) && (bh) == ~(USItype) 0) \ + __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%r" ((USItype)(ah)), \ + "%r" ((USItype)(al)), \ + "rI" ((USItype)(bl))); \ + else \ + __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%r" ((USItype)(ah)), \ + "r" ((USItype)(bh)), \ + "%r" ((USItype)(al)), \ + "rI" ((USItype)(bl))); \ +} while (0) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ +do { \ + if (__builtin_constant_p(ah) && (ah) == 0) \ + __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "r" ((USItype)(bh)), \ + "rI" ((USItype)(al)), \ + "r" ((USItype)(bl))); \ + else if (__builtin_constant_p(ah) && (ah) == ~(USItype) 0) \ + __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "r" ((USItype)(bh)), \ + "rI" ((USItype)(al)), \ + "r" ((USItype)(bl))); \ + else if (__builtin_constant_p(bh) && (bh) == 0) \ + __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "r" ((USItype)(ah)), \ + "rI" ((USItype)(al)), \ + "r" ((USItype)(bl))); \ + else if (__builtin_constant_p(bh) && (bh) == ~(USItype) 0) \ + __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "r" ((USItype)(ah)), \ + "rI" ((USItype)(al)), \ + "r" ((USItype)(bl))); \ + else \ + __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "r" ((USItype)(ah)), \ + "r" ((USItype)(bh)), \ + "rI" ((USItype)(al)), \ + "r" ((USItype)(bl))); \ +} while (0) +#if defined(_ARCH_PPC) +#define umul_ppmm(ph, pl, m0, m1) \ +do { \ + USItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("mulhwu %0,%1,%2" \ + : "=r" ((USItype) ph) \ + : "%r" (__m0), \ + "r" (__m1)); \ + (pl) = __m0 * __m1; \ +} while (0) +#define UMUL_TIME 15 +#define smul_ppmm(ph, pl, m0, m1) \ +do { \ + SItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("mulhw %0,%1,%2" \ + : "=r" ((SItype) ph) \ + : "%r" (__m0), \ + "r" (__m1)); \ + (pl) = __m0 * __m1; \ +} while (0) +#define SMUL_TIME 14 +#define UDIV_TIME 120 +#else +#define umul_ppmm(xh, xl, m0, m1) \ +do { \ + USItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("mul %0,%2,%3" \ + : "=r" ((USItype)(xh)), \ + "=q" ((USItype)(xl)) \ + : "r" (__m0), \ + "r" (__m1)); \ + (xh) += ((((SItype) __m0 >> 31) & __m1) \ + + (((SItype) __m1 >> 31) & __m0)); \ +} while (0) +#define UMUL_TIME 8 +#define smul_ppmm(xh, xl, m0, m1) \ + __asm__ ("mul %0,%2,%3" \ + : "=r" ((SItype)(xh)), \ + "=q" ((SItype)(xl)) \ + : "r" (m0), \ + "r" (m1)) +#define SMUL_TIME 4 +#define sdiv_qrnnd(q, r, nh, nl, d) \ + __asm__ ("div %0,%2,%4" \ + : "=r" ((SItype)(q)), "=q" ((SItype)(r)) \ + : "r" ((SItype)(nh)), "1" ((SItype)(nl)), "r" ((SItype)(d))) +#define UDIV_TIME 100 +#endif +#endif /* Power architecture variants. */ + +/*************************************** + ************** PYR ****************** + ***************************************/ +#if defined(__pyr__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("addw %5,%1\n" \ + "addwc %3,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%0" ((USItype)(ah)), \ + "g" ((USItype)(bh)), \ + "%1" ((USItype)(al)), \ + "g" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subw %5,%1\n" \ + "subwb %3,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "0" ((USItype)(ah)), \ + "g" ((USItype)(bh)), \ + "1" ((USItype)(al)), \ + "g" ((USItype)(bl))) + /* This insn works on Pyramids with AP, XP, or MI CPUs, but not with SP. */ +#define umul_ppmm(w1, w0, u, v) \ + ({union {UDItype __ll; \ + struct {USItype __h, __l; } __i; \ + } __xx; \ + __asm__ ("movw %1,%R0\n" \ + "uemul %2,%0" \ + : "=&r" (__xx.__ll) \ + : "g" ((USItype) (u)), \ + "g" ((USItype)(v))); \ + (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; }) +#endif /* __pyr__ */ + +/*************************************** + ************** RT/ROMP ************** + ***************************************/ +#if defined(__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("a %1,%5\n" \ + "ae %0,%3" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%0" ((USItype)(ah)), \ + "r" ((USItype)(bh)), \ + "%1" ((USItype)(al)), \ + "r" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("s %1,%5\n" \ + "se %0,%3" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "0" ((USItype)(ah)), \ + "r" ((USItype)(bh)), \ + "1" ((USItype)(al)), \ + "r" ((USItype)(bl))) +#define umul_ppmm(ph, pl, m0, m1) \ +do { \ + USItype __m0 = (m0), __m1 = (m1); \ + __asm__ ( \ + "s r2,r2\n" \ + "mts r10,%2\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "m r2,%3\n" \ + "cas %0,r2,r0\n" \ + "mfs r10,%1" \ + : "=r" ((USItype)(ph)), \ + "=r" ((USItype)(pl)) \ + : "%r" (__m0), \ + "r" (__m1) \ + : "r2"); \ + (ph) += ((((SItype) __m0 >> 31) & __m1) \ + + (((SItype) __m1 >> 31) & __m0)); \ +} while (0) +#define UMUL_TIME 20 +#define UDIV_TIME 200 +#endif /* RT/ROMP */ + +/*************************************** + ************** SH2 ****************** + ***************************************/ +#if (defined(__sh2__) || defined(__sh3__) || defined(__SH4__)) \ + && W_TYPE_SIZE == 32 +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ( \ + "dmulu.l %2,%3\n" \ + "sts macl,%1\n" \ + "sts mach,%0" \ + : "=r" ((USItype)(w1)), \ + "=r" ((USItype)(w0)) \ + : "r" ((USItype)(u)), \ + "r" ((USItype)(v)) \ + : "macl", "mach") +#define UMUL_TIME 5 +#endif + +/*************************************** + ************** SPARC **************** + ***************************************/ +#if defined(__sparc__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("addcc %r4,%5,%1\n" \ + "addx %r2,%3,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%rJ" ((USItype)(ah)), \ + "rI" ((USItype)(bh)), \ + "%rJ" ((USItype)(al)), \ + "rI" ((USItype)(bl)) \ + __CLOBBER_CC) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subcc %r4,%5,%1\n" \ + "subx %r2,%3,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "rJ" ((USItype)(ah)), \ + "rI" ((USItype)(bh)), \ + "rJ" ((USItype)(al)), \ + "rI" ((USItype)(bl)) \ + __CLOBBER_CC) +#if defined(__sparc_v8__) +/* Don't match immediate range because, 1) it is not often useful, + 2) the 'I' flag thinks of the range as a 13 bit signed interval, + while we want to match a 13 bit interval, sign extended to 32 bits, + but INTERPRETED AS UNSIGNED. */ +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("umul %2,%3,%1;rd %%y,%0" \ + : "=r" ((USItype)(w1)), \ + "=r" ((USItype)(w0)) \ + : "r" ((USItype)(u)), \ + "r" ((USItype)(v))) +#define UMUL_TIME 5 +#ifndef SUPERSPARC /* SuperSPARC's udiv only handles 53 bit dividends */ +#define udiv_qrnnd(q, r, n1, n0, d) \ +do { \ + USItype __q; \ + __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \ + : "=r" ((USItype)(__q)) \ + : "r" ((USItype)(n1)), \ + "r" ((USItype)(n0)), \ + "r" ((USItype)(d))); \ + (r) = (n0) - __q * (d); \ + (q) = __q; \ +} while (0) +#define UDIV_TIME 25 +#endif /* SUPERSPARC */ +#else /* ! __sparc_v8__ */ +#if defined(__sparclite__) +/* This has hardware multiply but not divide. It also has two additional + instructions scan (ffs from high bit) and divscc. */ +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("umul %2,%3,%1;rd %%y,%0" \ + : "=r" ((USItype)(w1)), \ + "=r" ((USItype)(w0)) \ + : "r" ((USItype)(u)), \ + "r" ((USItype)(v))) +#define UMUL_TIME 5 +#define udiv_qrnnd(q, r, n1, n0, d) \ + __asm__ ("! Inlined udiv_qrnnd\n" \ + "wr %%g0,%2,%%y ! Not a delayed write for sparclite\n" \ + "tst %%g0\n" \ + "divscc %3,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%%g1\n" \ + "divscc %%g1,%4,%0\n" \ + "rd %%y,%1\n" \ + "bl,a 1f\n" \ + "add %1,%4,%1\n" \ + "1: ! End of inline udiv_qrnnd" \ + : "=r" ((USItype)(q)), \ + "=r" ((USItype)(r)) \ + : "r" ((USItype)(n1)), \ + "r" ((USItype)(n0)), \ + "rI" ((USItype)(d)) \ + : "%g1" __AND_CLOBBER_CC) +#define UDIV_TIME 37 +#endif /* __sparclite__ */ +#endif /* __sparc_v8__ */ + /* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd. */ +#ifndef umul_ppmm +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("! Inlined umul_ppmm\n" \ + "wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr\n" \ + "sra %3,31,%%g2 ! Don't move this insn\n" \ + "and %2,%%g2,%%g2 ! Don't move this insn\n" \ + "andcc %%g0,0,%%g1 ! Don't move this insn\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,%3,%%g1\n" \ + "mulscc %%g1,0,%%g1\n" \ + "add %%g1,%%g2,%0\n" \ + "rd %%y,%1" \ + : "=r" ((USItype)(w1)), \ + "=r" ((USItype)(w0)) \ + : "%rI" ((USItype)(u)), \ + "r" ((USItype)(v)) \ + : "%g1", "%g2" __AND_CLOBBER_CC) +#define UMUL_TIME 39 /* 39 instructions */ +/* It's quite necessary to add this much assembler for the sparc. + The default udiv_qrnnd (in C) is more than 10 times slower! */ +#define udiv_qrnnd(q, r, n1, n0, d) \ + __asm__ ("! Inlined udiv_qrnnd\n\t" \ + "mov 32,%%g1\n\t" \ + "subcc %1,%2,%%g0\n\t" \ + "1: bcs 5f\n\t" \ + "addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n\t" \ + "sub %1,%2,%1 ! this kills msb of n\n\t" \ + "addx %1,%1,%1 ! so this can't give carry\n\t" \ + "subcc %%g1,1,%%g1\n\t" \ + "2: bne 1b\n\t" \ + "subcc %1,%2,%%g0\n\t" \ + "bcs 3f\n\t" \ + "addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n\t" \ + "b 3f\n\t" \ + "sub %1,%2,%1 ! this kills msb of n\n\t" \ + "4: sub %1,%2,%1\n\t" \ + "5: addxcc %1,%1,%1\n\t" \ + "bcc 2b\n\t" \ + "subcc %%g1,1,%%g1\n\t" \ + "! Got carry from n. Subtract next step to cancel this carry.\n\t" \ + "bne 4b\n\t" \ + "addcc %0,%0,%0 ! shift n1n0 and a 0-bit in lsb\n\t" \ + "sub %1,%2,%1\n\t" \ + "3: xnor %0,0,%0\n\t" \ + "! End of inline udiv_qrnnd\n" \ + : "=&r" ((USItype)(q)), \ + "=&r" ((USItype)(r)) \ + : "r" ((USItype)(d)), \ + "1" ((USItype)(n1)), \ + "0" ((USItype)(n0)) : "%g1", "cc") +#define UDIV_TIME (3+7*32) /* 7 instructions/iteration. 32 iterations. */ +#endif +#endif /* __sparc__ */ + +/*************************************** + ************** VAX ****************** + ***************************************/ +#if defined(__vax__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("addl2 %5,%1\n" \ + "adwc %3,%0" \ + : "=g" ((USItype)(sh)), \ + "=&g" ((USItype)(sl)) \ + : "%0" ((USItype)(ah)), \ + "g" ((USItype)(bh)), \ + "%1" ((USItype)(al)), \ + "g" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subl2 %5,%1\n" \ + "sbwc %3,%0" \ + : "=g" ((USItype)(sh)), \ + "=&g" ((USItype)(sl)) \ + : "0" ((USItype)(ah)), \ + "g" ((USItype)(bh)), \ + "1" ((USItype)(al)), \ + "g" ((USItype)(bl))) +#define umul_ppmm(xh, xl, m0, m1) \ +do { \ + union {UDItype __ll; \ + struct {USItype __l, __h; } __i; \ + } __xx; \ + USItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("emul %1,%2,$0,%0" \ + : "=g" (__xx.__ll) \ + : "g" (__m0), \ + "g" (__m1)); \ + (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ + (xh) += ((((SItype) __m0 >> 31) & __m1) \ + + (((SItype) __m1 >> 31) & __m0)); \ +} while (0) +#define sdiv_qrnnd(q, r, n1, n0, d) \ +do { \ + union {DItype __ll; \ + struct {SItype __l, __h; } __i; \ + } __xx; \ + __xx.__i.__h = n1; __xx.__i.__l = n0; \ + __asm__ ("ediv %3,%2,%0,%1" \ + : "=g" (q), "=g" (r) \ + : "g" (__xx.__ll), "g" (d)); \ +} while (0) +#endif /* __vax__ */ + +/*************************************** + ************** Z8000 **************** + ***************************************/ +#if defined(__z8000__) && W_TYPE_SIZE == 16 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("add %H1,%H5\n\tadc %H0,%H3" \ + : "=r" ((unsigned int)(sh)), \ + "=&r" ((unsigned int)(sl)) \ + : "%0" ((unsigned int)(ah)), \ + "r" ((unsigned int)(bh)), \ + "%1" ((unsigned int)(al)), \ + "rQR" ((unsigned int)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("sub %H1,%H5\n\tsbc %H0,%H3" \ + : "=r" ((unsigned int)(sh)), \ + "=&r" ((unsigned int)(sl)) \ + : "0" ((unsigned int)(ah)), \ + "r" ((unsigned int)(bh)), \ + "1" ((unsigned int)(al)), \ + "rQR" ((unsigned int)(bl))) +#define umul_ppmm(xh, xl, m0, m1) \ +do { \ + union {long int __ll; \ + struct {unsigned int __h, __l; } __i; \ + } __xx; \ + unsigned int __m0 = (m0), __m1 = (m1); \ + __asm__ ("mult %S0,%H3" \ + : "=r" (__xx.__i.__h), \ + "=r" (__xx.__i.__l) \ + : "%1" (__m0), \ + "rQR" (__m1)); \ + (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ + (xh) += ((((signed int) __m0 >> 15) & __m1) \ + + (((signed int) __m1 >> 15) & __m0)); \ +} while (0) +#endif /* __z8000__ */ + +#endif /* __GNUC__ */ + +/*************************************** + *********** Generic Versions ******** + ***************************************/ +#if !defined(umul_ppmm) && defined(__umulsidi3) +#define umul_ppmm(ph, pl, m0, m1) \ +{ \ + UDWtype __ll = __umulsidi3(m0, m1); \ + ph = (UWtype) (__ll >> W_TYPE_SIZE); \ + pl = (UWtype) __ll; \ +} +#endif + +#if !defined(__umulsidi3) +#define __umulsidi3(u, v) \ + ({UWtype __hi, __lo; \ + umul_ppmm(__hi, __lo, u, v); \ + ((UDWtype) __hi << W_TYPE_SIZE) | __lo; }) +#endif + + /* If this machine has no inline assembler, use C macros. */ + +#if !defined(add_ssaaaa) +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ +do { \ + UWtype __x; \ + __x = (al) + (bl); \ + (sh) = (ah) + (bh) + (__x < (al)); \ + (sl) = __x; \ +} while (0) +#endif + +#if !defined(sub_ddmmss) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ +do { \ + UWtype __x; \ + __x = (al) - (bl); \ + (sh) = (ah) - (bh) - (__x > (al)); \ + (sl) = __x; \ +} while (0) +#endif + +#if !defined(umul_ppmm) +#define umul_ppmm(w1, w0, u, v) \ +do { \ + UWtype __x0, __x1, __x2, __x3; \ + UHWtype __ul, __vl, __uh, __vh; \ + UWtype __u = (u), __v = (v); \ + \ + __ul = __ll_lowpart(__u); \ + __uh = __ll_highpart(__u); \ + __vl = __ll_lowpart(__v); \ + __vh = __ll_highpart(__v); \ + \ + __x0 = (UWtype) __ul * __vl; \ + __x1 = (UWtype) __ul * __vh; \ + __x2 = (UWtype) __uh * __vl; \ + __x3 = (UWtype) __uh * __vh; \ + \ + __x1 += __ll_highpart(__x0);/* this can't give carry */ \ + __x1 += __x2; /* but this indeed can */ \ + if (__x1 < __x2) /* did we get it? */ \ + __x3 += __ll_B; /* yes, add it in the proper pos. */ \ + \ + (w1) = __x3 + __ll_highpart(__x1); \ + (w0) = (__ll_lowpart(__x1) << W_TYPE_SIZE/2) + __ll_lowpart(__x0); \ +} while (0) +#endif + +#if !defined(umul_ppmm) +#define smul_ppmm(w1, w0, u, v) \ +do { \ + UWtype __w1; \ + UWtype __m0 = (u), __m1 = (v); \ + umul_ppmm(__w1, w0, __m0, __m1); \ + (w1) = __w1 - (-(__m0 >> (W_TYPE_SIZE - 1)) & __m1) \ + - (-(__m1 >> (W_TYPE_SIZE - 1)) & __m0); \ +} while (0) +#endif + + /* Define this unconditionally, so it can be used for debugging. */ +#define __udiv_qrnnd_c(q, r, n1, n0, d) \ +do { \ + UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \ + __d1 = __ll_highpart(d); \ + __d0 = __ll_lowpart(d); \ + \ + __r1 = (n1) % __d1; \ + __q1 = (n1) / __d1; \ + __m = (UWtype) __q1 * __d0; \ + __r1 = __r1 * __ll_B | __ll_highpart(n0); \ + if (__r1 < __m) { \ + __q1--, __r1 += (d); \ + if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */ \ + if (__r1 < __m) \ + __q1--, __r1 += (d); \ + } \ + __r1 -= __m; \ + \ + __r0 = __r1 % __d1; \ + __q0 = __r1 / __d1; \ + __m = (UWtype) __q0 * __d0; \ + __r0 = __r0 * __ll_B | __ll_lowpart(n0); \ + if (__r0 < __m) { \ + __q0--, __r0 += (d); \ + if (__r0 >= (d)) \ + if (__r0 < __m) \ + __q0--, __r0 += (d); \ + } \ + __r0 -= __m; \ + \ + (q) = (UWtype) __q1 * __ll_B | __q0; \ + (r) = __r0; \ +} while (0) + +/* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through + __udiv_w_sdiv (defined in libgcc or elsewhere). */ +#if !defined(udiv_qrnnd) && defined(sdiv_qrnnd) +#define udiv_qrnnd(q, r, nh, nl, d) \ +do { \ + UWtype __r; \ + (q) = __MPN(udiv_w_sdiv) (&__r, nh, nl, d); \ + (r) = __r; \ +} while (0) +#endif + + /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */ +#if !defined(udiv_qrnnd) +#define UDIV_NEEDS_NORMALIZATION 1 +#define udiv_qrnnd __udiv_qrnnd_c +#endif + +#ifndef UDIV_NEEDS_NORMALIZATION +#define UDIV_NEEDS_NORMALIZATION 0 +#endif diff --git a/lib/mpi/mpi-bit.c b/lib/mpi/mpi-bit.c new file mode 100644 index 000000000..503537e08 --- /dev/null +++ b/lib/mpi/mpi-bit.c @@ -0,0 +1,56 @@ +/* mpi-bit.c - MPI bit level fucntions + * Copyright (C) 1998, 1999 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include "mpi-internal.h" +#include "longlong.h" + +#define A_LIMB_1 ((mpi_limb_t) 1) + +/**************** + * Sometimes we have MSL (most significant limbs) which are 0; + * this is for some reasons not good, so this function removes them. + */ +void mpi_normalize(MPI a) +{ + for (; a->nlimbs && !a->d[a->nlimbs - 1]; a->nlimbs--) + ; +} + +/**************** + * Return the number of bits in A. + */ +unsigned mpi_get_nbits(MPI a) +{ + unsigned n; + + mpi_normalize(a); + + if (a->nlimbs) { + mpi_limb_t alimb = a->d[a->nlimbs - 1]; + if (alimb) + n = count_leading_zeros(alimb); + else + n = BITS_PER_MPI_LIMB; + n = BITS_PER_MPI_LIMB - n + (a->nlimbs - 1) * BITS_PER_MPI_LIMB; + } else + n = 0; + return n; +} +EXPORT_SYMBOL_GPL(mpi_get_nbits); diff --git a/lib/mpi/mpi-cmp.c b/lib/mpi/mpi-cmp.c new file mode 100644 index 000000000..d25e9e96c --- /dev/null +++ b/lib/mpi/mpi-cmp.c @@ -0,0 +1,68 @@ +/* mpi-cmp.c - MPI functions + * Copyright (C) 1998, 1999 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include "mpi-internal.h" + +int mpi_cmp_ui(MPI u, unsigned long v) +{ + mpi_limb_t limb = v; + + mpi_normalize(u); + if (!u->nlimbs && !limb) + return 0; + if (u->sign) + return -1; + if (u->nlimbs > 1) + return 1; + + if (u->d[0] == limb) + return 0; + else if (u->d[0] > limb) + return 1; + else + return -1; +} +EXPORT_SYMBOL_GPL(mpi_cmp_ui); + +int mpi_cmp(MPI u, MPI v) +{ + mpi_size_t usize, vsize; + int cmp; + + mpi_normalize(u); + mpi_normalize(v); + usize = u->nlimbs; + vsize = v->nlimbs; + if (!u->sign && v->sign) + return 1; + if (u->sign && !v->sign) + return -1; + if (usize != vsize && !u->sign && !v->sign) + return usize - vsize; + if (usize != vsize && u->sign && v->sign) + return vsize - usize; + if (!usize) + return 0; + cmp = mpihelp_cmp(u->d, v->d, usize); + if (u->sign) + return -cmp; + return cmp; +} +EXPORT_SYMBOL_GPL(mpi_cmp); diff --git a/lib/mpi/mpi-inline.h b/lib/mpi/mpi-inline.h new file mode 100644 index 000000000..e2b39852b --- /dev/null +++ b/lib/mpi/mpi-inline.h @@ -0,0 +1,122 @@ +/* mpi-inline.h - Internal to the Multi Precision Integers + * Copyright (C) 1994, 1996, 1998, 1999 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + +#ifndef G10_MPI_INLINE_H +#define G10_MPI_INLINE_H + +#ifndef G10_MPI_INLINE_DECL +#define G10_MPI_INLINE_DECL extern inline +#endif + +G10_MPI_INLINE_DECL mpi_limb_t +mpihelp_add_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_size_t s1_size, mpi_limb_t s2_limb) +{ + mpi_limb_t x; + + x = *s1_ptr++; + s2_limb += x; + *res_ptr++ = s2_limb; + if (s2_limb < x) { /* sum is less than the left operand: handle carry */ + while (--s1_size) { + x = *s1_ptr++ + 1; /* add carry */ + *res_ptr++ = x; /* and store */ + if (x) /* not 0 (no overflow): we can stop */ + goto leave; + } + return 1; /* return carry (size of s1 to small) */ + } + +leave: + if (res_ptr != s1_ptr) { /* not the same variable */ + mpi_size_t i; /* copy the rest */ + for (i = 0; i < s1_size - 1; i++) + res_ptr[i] = s1_ptr[i]; + } + return 0; /* no carry */ +} + +G10_MPI_INLINE_DECL mpi_limb_t +mpihelp_add(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, + mpi_ptr_t s2_ptr, mpi_size_t s2_size) +{ + mpi_limb_t cy = 0; + + if (s2_size) + cy = mpihelp_add_n(res_ptr, s1_ptr, s2_ptr, s2_size); + + if (s1_size - s2_size) + cy = mpihelp_add_1(res_ptr + s2_size, s1_ptr + s2_size, + s1_size - s2_size, cy); + return cy; +} + +G10_MPI_INLINE_DECL mpi_limb_t +mpihelp_sub_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_size_t s1_size, mpi_limb_t s2_limb) +{ + mpi_limb_t x; + + x = *s1_ptr++; + s2_limb = x - s2_limb; + *res_ptr++ = s2_limb; + if (s2_limb > x) { + while (--s1_size) { + x = *s1_ptr++; + *res_ptr++ = x - 1; + if (x) + goto leave; + } + return 1; + } + +leave: + if (res_ptr != s1_ptr) { + mpi_size_t i; + for (i = 0; i < s1_size - 1; i++) + res_ptr[i] = s1_ptr[i]; + } + return 0; +} + +G10_MPI_INLINE_DECL mpi_limb_t +mpihelp_sub(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, + mpi_ptr_t s2_ptr, mpi_size_t s2_size) +{ + mpi_limb_t cy = 0; + + if (s2_size) + cy = mpihelp_sub_n(res_ptr, s1_ptr, s2_ptr, s2_size); + + if (s1_size - s2_size) + cy = mpihelp_sub_1(res_ptr + s2_size, s1_ptr + s2_size, + s1_size - s2_size, cy); + return cy; +} + +#endif /*G10_MPI_INLINE_H */ diff --git a/lib/mpi/mpi-internal.h b/lib/mpi/mpi-internal.h new file mode 100644 index 000000000..c65dd1bff --- /dev/null +++ b/lib/mpi/mpi-internal.h @@ -0,0 +1,257 @@ +/* mpi-internal.h - Internal to the Multi Precision Integers + * Copyright (C) 1994, 1996 Free Software Foundation, Inc. + * Copyright (C) 1998, 2000 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + +#ifndef G10_MPI_INTERNAL_H +#define G10_MPI_INTERNAL_H + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/mpi.h> +#include <linux/errno.h> + +#define log_debug printk +#define log_bug printk + +#define assert(x) \ + do { \ + if (!x) \ + log_bug("failed assertion\n"); \ + } while (0); + +/* If KARATSUBA_THRESHOLD is not already defined, define it to a + * value which is good on most machines. */ + +/* tested 4, 16, 32 and 64, where 16 gave the best performance when + * checking a 768 and a 1024 bit ElGamal signature. + * (wk 22.12.97) */ +#ifndef KARATSUBA_THRESHOLD +#define KARATSUBA_THRESHOLD 16 +#endif + +/* The code can't handle KARATSUBA_THRESHOLD smaller than 2. */ +#if KARATSUBA_THRESHOLD < 2 +#undef KARATSUBA_THRESHOLD +#define KARATSUBA_THRESHOLD 2 +#endif + +typedef mpi_limb_t *mpi_ptr_t; /* pointer to a limb */ +typedef int mpi_size_t; /* (must be a signed type) */ + +static inline int RESIZE_IF_NEEDED(MPI a, unsigned b) +{ + if (a->alloced < b) + return mpi_resize(a, b); + return 0; +} + +/* Copy N limbs from S to D. */ +#define MPN_COPY(d, s, n) \ + do { \ + mpi_size_t _i; \ + for (_i = 0; _i < (n); _i++) \ + (d)[_i] = (s)[_i]; \ + } while (0) + +#define MPN_COPY_INCR(d, s, n) \ + do { \ + mpi_size_t _i; \ + for (_i = 0; _i < (n); _i++) \ + (d)[_i] = (s)[_i]; \ + } while (0) + +#define MPN_COPY_DECR(d, s, n) \ + do { \ + mpi_size_t _i; \ + for (_i = (n)-1; _i >= 0; _i--) \ + (d)[_i] = (s)[_i]; \ + } while (0) + +/* Zero N limbs at D */ +#define MPN_ZERO(d, n) \ + do { \ + int _i; \ + for (_i = 0; _i < (n); _i++) \ + (d)[_i] = 0; \ + } while (0) + +#define MPN_NORMALIZE(d, n) \ + do { \ + while ((n) > 0) { \ + if ((d)[(n)-1]) \ + break; \ + (n)--; \ + } \ + } while (0) + +#define MPN_NORMALIZE_NOT_ZERO(d, n) \ + do { \ + for (;;) { \ + if ((d)[(n)-1]) \ + break; \ + (n)--; \ + } \ + } while (0) + +#define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \ + do { \ + if ((size) < KARATSUBA_THRESHOLD) \ + mul_n_basecase(prodp, up, vp, size); \ + else \ + mul_n(prodp, up, vp, size, tspace); \ + } while (0); + +/* Divide the two-limb number in (NH,,NL) by D, with DI being the largest + * limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB). + * If this would yield overflow, DI should be the largest possible number + * (i.e., only ones). For correct operation, the most significant bit of D + * has to be set. Put the quotient in Q and the remainder in R. + */ +#define UDIV_QRNND_PREINV(q, r, nh, nl, d, di) \ + do { \ + mpi_limb_t _q, _ql, _r; \ + mpi_limb_t _xh, _xl; \ + umul_ppmm(_q, _ql, (nh), (di)); \ + _q += (nh); /* DI is 2**BITS_PER_MPI_LIMB too small */ \ + umul_ppmm(_xh, _xl, _q, (d)); \ + sub_ddmmss(_xh, _r, (nh), (nl), _xh, _xl); \ + if (_xh) { \ + sub_ddmmss(_xh, _r, _xh, _r, 0, (d)); \ + _q++; \ + if (_xh) { \ + sub_ddmmss(_xh, _r, _xh, _r, 0, (d)); \ + _q++; \ + } \ + } \ + if (_r >= (d)) { \ + _r -= (d); \ + _q++; \ + } \ + (r) = _r; \ + (q) = _q; \ + } while (0) + +/*-- mpiutil.c --*/ +mpi_ptr_t mpi_alloc_limb_space(unsigned nlimbs); +void mpi_free_limb_space(mpi_ptr_t a); +void mpi_assign_limb_space(MPI a, mpi_ptr_t ap, unsigned nlimbs); + +/*-- mpi-bit.c --*/ +void mpi_rshift_limbs(MPI a, unsigned int count); +int mpi_lshift_limbs(MPI a, unsigned int count); + +/*-- mpihelp-add.c --*/ +mpi_limb_t mpihelp_add_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_size_t s1_size, mpi_limb_t s2_limb); +mpi_limb_t mpihelp_add_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_ptr_t s2_ptr, mpi_size_t size); +mpi_limb_t mpihelp_add(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, + mpi_ptr_t s2_ptr, mpi_size_t s2_size); + +/*-- mpihelp-sub.c --*/ +mpi_limb_t mpihelp_sub_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_size_t s1_size, mpi_limb_t s2_limb); +mpi_limb_t mpihelp_sub_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_ptr_t s2_ptr, mpi_size_t size); +mpi_limb_t mpihelp_sub(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, + mpi_ptr_t s2_ptr, mpi_size_t s2_size); + +/*-- mpihelp-cmp.c --*/ +int mpihelp_cmp(mpi_ptr_t op1_ptr, mpi_ptr_t op2_ptr, mpi_size_t size); + +/*-- mpihelp-mul.c --*/ + +struct karatsuba_ctx { + struct karatsuba_ctx *next; + mpi_ptr_t tspace; + mpi_size_t tspace_size; + mpi_ptr_t tp; + mpi_size_t tp_size; +}; + +void mpihelp_release_karatsuba_ctx(struct karatsuba_ctx *ctx); + +mpi_limb_t mpihelp_addmul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_size_t s1_size, mpi_limb_t s2_limb); +mpi_limb_t mpihelp_submul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_size_t s1_size, mpi_limb_t s2_limb); +int mpihelp_mul_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size); +int mpihelp_mul(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize, + mpi_ptr_t vp, mpi_size_t vsize, mpi_limb_t *_result); +void mpih_sqr_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size); +void mpih_sqr_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size, + mpi_ptr_t tspace); + +int mpihelp_mul_karatsuba_case(mpi_ptr_t prodp, + mpi_ptr_t up, mpi_size_t usize, + mpi_ptr_t vp, mpi_size_t vsize, + struct karatsuba_ctx *ctx); + +/*-- mpihelp-mul_1.c (or xxx/cpu/ *.S) --*/ +mpi_limb_t mpihelp_mul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_size_t s1_size, mpi_limb_t s2_limb); + +/*-- mpihelp-div.c --*/ +mpi_limb_t mpihelp_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t dividend_size, + mpi_limb_t divisor_limb); +mpi_limb_t mpihelp_divrem(mpi_ptr_t qp, mpi_size_t qextra_limbs, + mpi_ptr_t np, mpi_size_t nsize, + mpi_ptr_t dp, mpi_size_t dsize); +mpi_limb_t mpihelp_divmod_1(mpi_ptr_t quot_ptr, + mpi_ptr_t dividend_ptr, mpi_size_t dividend_size, + mpi_limb_t divisor_limb); + +/*-- mpihelp-shift.c --*/ +mpi_limb_t mpihelp_lshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, + unsigned cnt); +mpi_limb_t mpihelp_rshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, + unsigned cnt); + +/* Define stuff for longlong.h. */ +#define W_TYPE_SIZE BITS_PER_MPI_LIMB +typedef mpi_limb_t UWtype; +typedef unsigned int UHWtype; +#if defined(__GNUC__) +typedef unsigned int UQItype __attribute__ ((mode(QI))); +typedef int SItype __attribute__ ((mode(SI))); +typedef unsigned int USItype __attribute__ ((mode(SI))); +typedef int DItype __attribute__ ((mode(DI))); +typedef unsigned int UDItype __attribute__ ((mode(DI))); +#else +typedef unsigned char UQItype; +typedef long SItype; +typedef unsigned long USItype; +#endif + +#ifdef __GNUC__ +#include "mpi-inline.h" +#endif + +#endif /*G10_MPI_INTERNAL_H */ diff --git a/lib/mpi/mpi-pow.c b/lib/mpi/mpi-pow.c new file mode 100644 index 000000000..5464c8744 --- /dev/null +++ b/lib/mpi/mpi-pow.c @@ -0,0 +1,323 @@ +/* mpi-pow.c - MPI functions + * Copyright (C) 1994, 1996, 1998, 2000 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + +#include <linux/string.h> +#include "mpi-internal.h" +#include "longlong.h" + +/**************** + * RES = BASE ^ EXP mod MOD + */ +int mpi_powm(MPI res, MPI base, MPI exp, MPI mod) +{ + mpi_ptr_t mp_marker = NULL, bp_marker = NULL, ep_marker = NULL; + mpi_ptr_t xp_marker = NULL; + mpi_ptr_t tspace = NULL; + mpi_ptr_t rp, ep, mp, bp; + mpi_size_t esize, msize, bsize, rsize; + int esign, msign, bsign, rsign; + mpi_size_t size; + int mod_shift_cnt; + int negative_result; + int assign_rp = 0; + mpi_size_t tsize = 0; /* to avoid compiler warning */ + /* fixme: we should check that the warning is void */ + int rc = -ENOMEM; + + esize = exp->nlimbs; + msize = mod->nlimbs; + size = 2 * msize; + esign = exp->sign; + msign = mod->sign; + + rp = res->d; + ep = exp->d; + + if (!msize) + return -EINVAL; + + if (!esize) { + /* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0 + * depending on if MOD equals 1. */ + rp[0] = 1; + res->nlimbs = (msize == 1 && mod->d[0] == 1) ? 0 : 1; + res->sign = 0; + goto leave; + } + + /* Normalize MOD (i.e. make its most significant bit set) as required by + * mpn_divrem. This will make the intermediate values in the calculation + * slightly larger, but the correct result is obtained after a final + * reduction using the original MOD value. */ + mp = mp_marker = mpi_alloc_limb_space(msize); + if (!mp) + goto enomem; + mod_shift_cnt = count_leading_zeros(mod->d[msize - 1]); + if (mod_shift_cnt) + mpihelp_lshift(mp, mod->d, msize, mod_shift_cnt); + else + MPN_COPY(mp, mod->d, msize); + + bsize = base->nlimbs; + bsign = base->sign; + if (bsize > msize) { /* The base is larger than the module. Reduce it. */ + /* Allocate (BSIZE + 1) with space for remainder and quotient. + * (The quotient is (bsize - msize + 1) limbs.) */ + bp = bp_marker = mpi_alloc_limb_space(bsize + 1); + if (!bp) + goto enomem; + MPN_COPY(bp, base->d, bsize); + /* We don't care about the quotient, store it above the remainder, + * at BP + MSIZE. */ + mpihelp_divrem(bp + msize, 0, bp, bsize, mp, msize); + bsize = msize; + /* Canonicalize the base, since we are going to multiply with it + * quite a few times. */ + MPN_NORMALIZE(bp, bsize); + } else + bp = base->d; + + if (!bsize) { + res->nlimbs = 0; + res->sign = 0; + goto leave; + } + + if (res->alloced < size) { + /* We have to allocate more space for RES. If any of the input + * parameters are identical to RES, defer deallocation of the old + * space. */ + if (rp == ep || rp == mp || rp == bp) { + rp = mpi_alloc_limb_space(size); + if (!rp) + goto enomem; + assign_rp = 1; + } else { + if (mpi_resize(res, size) < 0) + goto enomem; + rp = res->d; + } + } else { /* Make BASE, EXP and MOD not overlap with RES. */ + if (rp == bp) { + /* RES and BASE are identical. Allocate temp. space for BASE. */ + BUG_ON(bp_marker); + bp = bp_marker = mpi_alloc_limb_space(bsize); + if (!bp) + goto enomem; + MPN_COPY(bp, rp, bsize); + } + if (rp == ep) { + /* RES and EXP are identical. Allocate temp. space for EXP. */ + ep = ep_marker = mpi_alloc_limb_space(esize); + if (!ep) + goto enomem; + MPN_COPY(ep, rp, esize); + } + if (rp == mp) { + /* RES and MOD are identical. Allocate temporary space for MOD. */ + BUG_ON(mp_marker); + mp = mp_marker = mpi_alloc_limb_space(msize); + if (!mp) + goto enomem; + MPN_COPY(mp, rp, msize); + } + } + + MPN_COPY(rp, bp, bsize); + rsize = bsize; + rsign = bsign; + + { + mpi_size_t i; + mpi_ptr_t xp; + int c; + mpi_limb_t e; + mpi_limb_t carry_limb; + struct karatsuba_ctx karactx; + + xp = xp_marker = mpi_alloc_limb_space(2 * (msize + 1)); + if (!xp) + goto enomem; + + memset(&karactx, 0, sizeof karactx); + negative_result = (ep[0] & 1) && base->sign; + + i = esize - 1; + e = ep[i]; + c = count_leading_zeros(e); + e = (e << c) << 1; /* shift the exp bits to the left, lose msb */ + c = BITS_PER_MPI_LIMB - 1 - c; + + /* Main loop. + * + * Make the result be pointed to alternately by XP and RP. This + * helps us avoid block copying, which would otherwise be necessary + * with the overlap restrictions of mpihelp_divmod. With 50% probability + * the result after this loop will be in the area originally pointed + * by RP (==RES->d), and with 50% probability in the area originally + * pointed to by XP. + */ + + for (;;) { + while (c) { + mpi_ptr_t tp; + mpi_size_t xsize; + + /*if (mpihelp_mul_n(xp, rp, rp, rsize) < 0) goto enomem */ + if (rsize < KARATSUBA_THRESHOLD) + mpih_sqr_n_basecase(xp, rp, rsize); + else { + if (!tspace) { + tsize = 2 * rsize; + tspace = + mpi_alloc_limb_space(tsize); + if (!tspace) + goto enomem; + } else if (tsize < (2 * rsize)) { + mpi_free_limb_space(tspace); + tsize = 2 * rsize; + tspace = + mpi_alloc_limb_space(tsize); + if (!tspace) + goto enomem; + } + mpih_sqr_n(xp, rp, rsize, tspace); + } + + xsize = 2 * rsize; + if (xsize > msize) { + mpihelp_divrem(xp + msize, 0, xp, xsize, + mp, msize); + xsize = msize; + } + + tp = rp; + rp = xp; + xp = tp; + rsize = xsize; + + if ((mpi_limb_signed_t) e < 0) { + /*mpihelp_mul( xp, rp, rsize, bp, bsize ); */ + if (bsize < KARATSUBA_THRESHOLD) { + mpi_limb_t tmp; + if (mpihelp_mul + (xp, rp, rsize, bp, bsize, + &tmp) < 0) + goto enomem; + } else { + if (mpihelp_mul_karatsuba_case + (xp, rp, rsize, bp, bsize, + &karactx) < 0) + goto enomem; + } + + xsize = rsize + bsize; + if (xsize > msize) { + mpihelp_divrem(xp + msize, 0, + xp, xsize, mp, + msize); + xsize = msize; + } + + tp = rp; + rp = xp; + xp = tp; + rsize = xsize; + } + e <<= 1; + c--; + } + + i--; + if (i < 0) + break; + e = ep[i]; + c = BITS_PER_MPI_LIMB; + } + + /* We shifted MOD, the modulo reduction argument, left MOD_SHIFT_CNT + * steps. Adjust the result by reducing it with the original MOD. + * + * Also make sure the result is put in RES->d (where it already + * might be, see above). + */ + if (mod_shift_cnt) { + carry_limb = + mpihelp_lshift(res->d, rp, rsize, mod_shift_cnt); + rp = res->d; + if (carry_limb) { + rp[rsize] = carry_limb; + rsize++; + } + } else { + MPN_COPY(res->d, rp, rsize); + rp = res->d; + } + + if (rsize >= msize) { + mpihelp_divrem(rp + msize, 0, rp, rsize, mp, msize); + rsize = msize; + } + + /* Remove any leading zero words from the result. */ + if (mod_shift_cnt) + mpihelp_rshift(rp, rp, rsize, mod_shift_cnt); + MPN_NORMALIZE(rp, rsize); + + mpihelp_release_karatsuba_ctx(&karactx); + } + + if (negative_result && rsize) { + if (mod_shift_cnt) + mpihelp_rshift(mp, mp, msize, mod_shift_cnt); + mpihelp_sub(rp, mp, msize, rp, rsize); + rsize = msize; + rsign = msign; + MPN_NORMALIZE(rp, rsize); + } + res->nlimbs = rsize; + res->sign = rsign; + +leave: + rc = 0; +enomem: + if (assign_rp) + mpi_assign_limb_space(res, rp, size); + if (mp_marker) + mpi_free_limb_space(mp_marker); + if (bp_marker) + mpi_free_limb_space(bp_marker); + if (ep_marker) + mpi_free_limb_space(ep_marker); + if (xp_marker) + mpi_free_limb_space(xp_marker); + if (tspace) + mpi_free_limb_space(tspace); + return rc; +} +EXPORT_SYMBOL_GPL(mpi_powm); diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c new file mode 100644 index 000000000..4cc644273 --- /dev/null +++ b/lib/mpi/mpicoder.c @@ -0,0 +1,260 @@ +/* mpicoder.c - Coder for the external representation of MPIs + * Copyright (C) 1998, 1999 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include <linux/bitops.h> +#include <asm-generic/bitops/count_zeros.h> +#include "mpi-internal.h" + +#define MAX_EXTERN_MPI_BITS 16384 + +/** + * mpi_read_raw_data - Read a raw byte stream as a positive integer + * @xbuffer: The data to read + * @nbytes: The amount of data to read + */ +MPI mpi_read_raw_data(const void *xbuffer, size_t nbytes) +{ + const uint8_t *buffer = xbuffer; + int i, j; + unsigned nbits, nlimbs; + mpi_limb_t a; + MPI val = NULL; + + while (nbytes > 0 && buffer[0] == 0) { + buffer++; + nbytes--; + } + + nbits = nbytes * 8; + if (nbits > MAX_EXTERN_MPI_BITS) { + pr_info("MPI: mpi too large (%u bits)\n", nbits); + return NULL; + } + if (nbytes > 0) + nbits -= count_leading_zeros(buffer[0]); + else + nbits = 0; + + nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB); + val = mpi_alloc(nlimbs); + if (!val) + return NULL; + val->nbits = nbits; + val->sign = 0; + val->nlimbs = nlimbs; + + if (nbytes > 0) { + i = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB; + i %= BYTES_PER_MPI_LIMB; + for (j = nlimbs; j > 0; j--) { + a = 0; + for (; i < BYTES_PER_MPI_LIMB; i++) { + a <<= 8; + a |= *buffer++; + } + i = 0; + val->d[j - 1] = a; + } + } + return val; +} +EXPORT_SYMBOL_GPL(mpi_read_raw_data); + +MPI mpi_read_from_buffer(const void *xbuffer, unsigned *ret_nread) +{ + const uint8_t *buffer = xbuffer; + int i, j; + unsigned nbits, nbytes, nlimbs, nread = 0; + mpi_limb_t a; + MPI val = NULL; + + if (*ret_nread < 2) + goto leave; + nbits = buffer[0] << 8 | buffer[1]; + + if (nbits > MAX_EXTERN_MPI_BITS) { + pr_info("MPI: mpi too large (%u bits)\n", nbits); + goto leave; + } + buffer += 2; + nread = 2; + + nbytes = DIV_ROUND_UP(nbits, 8); + nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB); + val = mpi_alloc(nlimbs); + if (!val) + return NULL; + i = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB; + i %= BYTES_PER_MPI_LIMB; + val->nbits = nbits; + j = val->nlimbs = nlimbs; + val->sign = 0; + for (; j > 0; j--) { + a = 0; + for (; i < BYTES_PER_MPI_LIMB; i++) { + if (++nread > *ret_nread) { + printk + ("MPI: mpi larger than buffer nread=%d ret_nread=%d\n", + nread, *ret_nread); + goto leave; + } + a <<= 8; + a |= *buffer++; + } + i = 0; + val->d[j - 1] = a; + } + +leave: + *ret_nread = nread; + return val; +} +EXPORT_SYMBOL_GPL(mpi_read_from_buffer); + +/**************** + * Return an allocated buffer with the MPI (msb first). + * NBYTES receives the length of this buffer. Caller must free the + * return string (This function does return a 0 byte buffer with NBYTES + * set to zero if the value of A is zero. If sign is not NULL, it will + * be set to the sign of the A. + */ +void *mpi_get_buffer(MPI a, unsigned *nbytes, int *sign) +{ + uint8_t *p, *buffer; + mpi_limb_t alimb; + int i; + unsigned int n; + + if (sign) + *sign = a->sign; + *nbytes = n = a->nlimbs * BYTES_PER_MPI_LIMB; + if (!n) + n++; /* avoid zero length allocation */ + p = buffer = kmalloc(n, GFP_KERNEL); + if (!p) + return NULL; + + for (i = a->nlimbs - 1; i >= 0; i--) { + alimb = a->d[i]; +#if BYTES_PER_MPI_LIMB == 4 + *p++ = alimb >> 24; + *p++ = alimb >> 16; + *p++ = alimb >> 8; + *p++ = alimb; +#elif BYTES_PER_MPI_LIMB == 8 + *p++ = alimb >> 56; + *p++ = alimb >> 48; + *p++ = alimb >> 40; + *p++ = alimb >> 32; + *p++ = alimb >> 24; + *p++ = alimb >> 16; + *p++ = alimb >> 8; + *p++ = alimb; +#else +#error please implement for this limb size. +#endif + } + + /* this is sub-optimal but we need to do the shift operation + * because the caller has to free the returned buffer */ + for (p = buffer; !*p && *nbytes; p++, --*nbytes) + ; + if (p != buffer) + memmove(buffer, p, *nbytes); + + return buffer; +} +EXPORT_SYMBOL_GPL(mpi_get_buffer); + +/**************** + * Use BUFFER to update MPI. + */ +int mpi_set_buffer(MPI a, const void *xbuffer, unsigned nbytes, int sign) +{ + const uint8_t *buffer = xbuffer, *p; + mpi_limb_t alimb; + int nlimbs; + int i; + + nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB); + if (RESIZE_IF_NEEDED(a, nlimbs) < 0) + return -ENOMEM; + a->sign = sign; + + for (i = 0, p = buffer + nbytes - 1; p >= buffer + BYTES_PER_MPI_LIMB;) { +#if BYTES_PER_MPI_LIMB == 4 + alimb = (mpi_limb_t) *p--; + alimb |= (mpi_limb_t) *p-- << 8; + alimb |= (mpi_limb_t) *p-- << 16; + alimb |= (mpi_limb_t) *p-- << 24; +#elif BYTES_PER_MPI_LIMB == 8 + alimb = (mpi_limb_t) *p--; + alimb |= (mpi_limb_t) *p-- << 8; + alimb |= (mpi_limb_t) *p-- << 16; + alimb |= (mpi_limb_t) *p-- << 24; + alimb |= (mpi_limb_t) *p-- << 32; + alimb |= (mpi_limb_t) *p-- << 40; + alimb |= (mpi_limb_t) *p-- << 48; + alimb |= (mpi_limb_t) *p-- << 56; +#else +#error please implement for this limb size. +#endif + a->d[i++] = alimb; + } + if (p >= buffer) { +#if BYTES_PER_MPI_LIMB == 4 + alimb = *p--; + if (p >= buffer) + alimb |= (mpi_limb_t) *p-- << 8; + if (p >= buffer) + alimb |= (mpi_limb_t) *p-- << 16; + if (p >= buffer) + alimb |= (mpi_limb_t) *p-- << 24; +#elif BYTES_PER_MPI_LIMB == 8 + alimb = (mpi_limb_t) *p--; + if (p >= buffer) + alimb |= (mpi_limb_t) *p-- << 8; + if (p >= buffer) + alimb |= (mpi_limb_t) *p-- << 16; + if (p >= buffer) + alimb |= (mpi_limb_t) *p-- << 24; + if (p >= buffer) + alimb |= (mpi_limb_t) *p-- << 32; + if (p >= buffer) + alimb |= (mpi_limb_t) *p-- << 40; + if (p >= buffer) + alimb |= (mpi_limb_t) *p-- << 48; + if (p >= buffer) + alimb |= (mpi_limb_t) *p-- << 56; +#else +#error please implement for this limb size. +#endif + a->d[i++] = alimb; + } + a->nlimbs = i; + + if (i != nlimbs) { + pr_emerg("MPI: mpi_set_buffer: Assertion failed (%d != %d)", i, + nlimbs); + BUG(); + } + return 0; +} +EXPORT_SYMBOL_GPL(mpi_set_buffer); diff --git a/lib/mpi/mpih-cmp.c b/lib/mpi/mpih-cmp.c new file mode 100644 index 000000000..b2fd39677 --- /dev/null +++ b/lib/mpi/mpih-cmp.c @@ -0,0 +1,56 @@ +/* mpihelp-sub.c - MPI helper functions + * Copyright (C) 1994, 1996 Free Software Foundation, Inc. + * Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + +#include "mpi-internal.h" + +/**************** + * Compare OP1_PTR/OP1_SIZE with OP2_PTR/OP2_SIZE. + * There are no restrictions on the relative sizes of + * the two arguments. + * Return 1 if OP1 > OP2, 0 if they are equal, and -1 if OP1 < OP2. + */ +int mpihelp_cmp(mpi_ptr_t op1_ptr, mpi_ptr_t op2_ptr, mpi_size_t size) +{ + mpi_size_t i; + mpi_limb_t op1_word, op2_word; + + for (i = size - 1; i >= 0; i--) { + op1_word = op1_ptr[i]; + op2_word = op2_ptr[i]; + if (op1_word != op2_word) + goto diff; + } + return 0; + +diff: + /* This can *not* be simplified to + * op2_word - op2_word + * since that expression might give signed overflow. */ + return (op1_word > op2_word) ? 1 : -1; +} diff --git a/lib/mpi/mpih-div.c b/lib/mpi/mpih-div.c new file mode 100644 index 000000000..c57d1d462 --- /dev/null +++ b/lib/mpi/mpih-div.c @@ -0,0 +1,236 @@ +/* mpihelp-div.c - MPI helper functions + * Copyright (C) 1994, 1996 Free Software Foundation, Inc. + * Copyright (C) 1998, 1999 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + +#include "mpi-internal.h" +#include "longlong.h" + +#ifndef UMUL_TIME +#define UMUL_TIME 1 +#endif +#ifndef UDIV_TIME +#define UDIV_TIME UMUL_TIME +#endif + +/* Divide num (NP/NSIZE) by den (DP/DSIZE) and write + * the NSIZE-DSIZE least significant quotient limbs at QP + * and the DSIZE long remainder at NP. If QEXTRA_LIMBS is + * non-zero, generate that many fraction bits and append them after the + * other quotient limbs. + * Return the most significant limb of the quotient, this is always 0 or 1. + * + * Preconditions: + * 0. NSIZE >= DSIZE. + * 1. The most significant bit of the divisor must be set. + * 2. QP must either not overlap with the input operands at all, or + * QP + DSIZE >= NP must hold true. (This means that it's + * possible to put the quotient in the high part of NUM, right after the + * remainder in NUM. + * 3. NSIZE >= DSIZE, even if QEXTRA_LIMBS is non-zero. + */ + +mpi_limb_t +mpihelp_divrem(mpi_ptr_t qp, mpi_size_t qextra_limbs, + mpi_ptr_t np, mpi_size_t nsize, mpi_ptr_t dp, mpi_size_t dsize) +{ + mpi_limb_t most_significant_q_limb = 0; + + switch (dsize) { + case 0: + /* We are asked to divide by zero, so go ahead and do it! (To make + the compiler not remove this statement, return the value.) */ + /* + * existing clients of this function have been modified + * not to call it with dsize == 0, so this should not happen + */ + return 1 / dsize; + + case 1: + { + mpi_size_t i; + mpi_limb_t n1; + mpi_limb_t d; + + d = dp[0]; + n1 = np[nsize - 1]; + + if (n1 >= d) { + n1 -= d; + most_significant_q_limb = 1; + } + + qp += qextra_limbs; + for (i = nsize - 2; i >= 0; i--) + udiv_qrnnd(qp[i], n1, n1, np[i], d); + qp -= qextra_limbs; + + for (i = qextra_limbs - 1; i >= 0; i--) + udiv_qrnnd(qp[i], n1, n1, 0, d); + + np[0] = n1; + } + break; + + case 2: + { + mpi_size_t i; + mpi_limb_t n1, n0, n2; + mpi_limb_t d1, d0; + + np += nsize - 2; + d1 = dp[1]; + d0 = dp[0]; + n1 = np[1]; + n0 = np[0]; + + if (n1 >= d1 && (n1 > d1 || n0 >= d0)) { + sub_ddmmss(n1, n0, n1, n0, d1, d0); + most_significant_q_limb = 1; + } + + for (i = qextra_limbs + nsize - 2 - 1; i >= 0; i--) { + mpi_limb_t q; + mpi_limb_t r; + + if (i >= qextra_limbs) + np--; + else + np[0] = 0; + + if (n1 == d1) { + /* Q should be either 111..111 or 111..110. Need special + * treatment of this rare case as normal division would + * give overflow. */ + q = ~(mpi_limb_t) 0; + + r = n0 + d1; + if (r < d1) { /* Carry in the addition? */ + add_ssaaaa(n1, n0, r - d0, + np[0], 0, d0); + qp[i] = q; + continue; + } + n1 = d0 - (d0 != 0 ? 1 : 0); + n0 = -d0; + } else { + udiv_qrnnd(q, r, n1, n0, d1); + umul_ppmm(n1, n0, d0, q); + } + + n2 = np[0]; +q_test: + if (n1 > r || (n1 == r && n0 > n2)) { + /* The estimated Q was too large. */ + q--; + sub_ddmmss(n1, n0, n1, n0, 0, d0); + r += d1; + if (r >= d1) /* If not carry, test Q again. */ + goto q_test; + } + + qp[i] = q; + sub_ddmmss(n1, n0, r, n2, n1, n0); + } + np[1] = n1; + np[0] = n0; + } + break; + + default: + { + mpi_size_t i; + mpi_limb_t dX, d1, n0; + + np += nsize - dsize; + dX = dp[dsize - 1]; + d1 = dp[dsize - 2]; + n0 = np[dsize - 1]; + + if (n0 >= dX) { + if (n0 > dX + || mpihelp_cmp(np, dp, dsize - 1) >= 0) { + mpihelp_sub_n(np, np, dp, dsize); + n0 = np[dsize - 1]; + most_significant_q_limb = 1; + } + } + + for (i = qextra_limbs + nsize - dsize - 1; i >= 0; i--) { + mpi_limb_t q; + mpi_limb_t n1, n2; + mpi_limb_t cy_limb; + + if (i >= qextra_limbs) { + np--; + n2 = np[dsize]; + } else { + n2 = np[dsize - 1]; + MPN_COPY_DECR(np + 1, np, dsize - 1); + np[0] = 0; + } + + if (n0 == dX) { + /* This might over-estimate q, but it's probably not worth + * the extra code here to find out. */ + q = ~(mpi_limb_t) 0; + } else { + mpi_limb_t r; + + udiv_qrnnd(q, r, n0, np[dsize - 1], dX); + umul_ppmm(n1, n0, d1, q); + + while (n1 > r + || (n1 == r + && n0 > np[dsize - 2])) { + q--; + r += dX; + if (r < dX) /* I.e. "carry in previous addition?" */ + break; + n1 -= n0 < d1; + n0 -= d1; + } + } + + /* Possible optimization: We already have (q * n0) and (1 * n1) + * after the calculation of q. Taking advantage of that, we + * could make this loop make two iterations less. */ + cy_limb = mpihelp_submul_1(np, dp, dsize, q); + + if (n2 != cy_limb) { + mpihelp_add_n(np, np, dp, dsize); + q--; + } + + qp[i] = q; + n0 = np[dsize - 1]; + } + } + } + + return most_significant_q_limb; +} diff --git a/lib/mpi/mpih-mul.c b/lib/mpi/mpih-mul.c new file mode 100644 index 000000000..7c841719f --- /dev/null +++ b/lib/mpi/mpih-mul.c @@ -0,0 +1,497 @@ +/* mpihelp-mul.c - MPI helper functions + * Copyright (C) 1994, 1996, 1998, 1999, + * 2000 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + +#include <linux/string.h> +#include "mpi-internal.h" +#include "longlong.h" + +#define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \ + do { \ + if ((size) < KARATSUBA_THRESHOLD) \ + mul_n_basecase(prodp, up, vp, size); \ + else \ + mul_n(prodp, up, vp, size, tspace); \ + } while (0); + +#define MPN_SQR_N_RECURSE(prodp, up, size, tspace) \ + do { \ + if ((size) < KARATSUBA_THRESHOLD) \ + mpih_sqr_n_basecase(prodp, up, size); \ + else \ + mpih_sqr_n(prodp, up, size, tspace); \ + } while (0); + +/* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP), + * both with SIZE limbs, and store the result at PRODP. 2 * SIZE limbs are + * always stored. Return the most significant limb. + * + * Argument constraints: + * 1. PRODP != UP and PRODP != VP, i.e. the destination + * must be distinct from the multiplier and the multiplicand. + * + * + * Handle simple cases with traditional multiplication. + * + * This is the most critical code of multiplication. All multiplies rely + * on this, both small and huge. Small ones arrive here immediately. Huge + * ones arrive here as this is the base case for Karatsuba's recursive + * algorithm below. + */ + +static mpi_limb_t +mul_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size) +{ + mpi_size_t i; + mpi_limb_t cy; + mpi_limb_t v_limb; + + /* Multiply by the first limb in V separately, as the result can be + * stored (not added) to PROD. We also avoid a loop for zeroing. */ + v_limb = vp[0]; + if (v_limb <= 1) { + if (v_limb == 1) + MPN_COPY(prodp, up, size); + else + MPN_ZERO(prodp, size); + cy = 0; + } else + cy = mpihelp_mul_1(prodp, up, size, v_limb); + + prodp[size] = cy; + prodp++; + + /* For each iteration in the outer loop, multiply one limb from + * U with one limb from V, and add it to PROD. */ + for (i = 1; i < size; i++) { + v_limb = vp[i]; + if (v_limb <= 1) { + cy = 0; + if (v_limb == 1) + cy = mpihelp_add_n(prodp, prodp, up, size); + } else + cy = mpihelp_addmul_1(prodp, up, size, v_limb); + + prodp[size] = cy; + prodp++; + } + + return cy; +} + +static void +mul_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, + mpi_size_t size, mpi_ptr_t tspace) +{ + if (size & 1) { + /* The size is odd, and the code below doesn't handle that. + * Multiply the least significant (size - 1) limbs with a recursive + * call, and handle the most significant limb of S1 and S2 + * separately. + * A slightly faster way to do this would be to make the Karatsuba + * code below behave as if the size were even, and let it check for + * odd size in the end. I.e., in essence move this code to the end. + * Doing so would save us a recursive call, and potentially make the + * stack grow a lot less. + */ + mpi_size_t esize = size - 1; /* even size */ + mpi_limb_t cy_limb; + + MPN_MUL_N_RECURSE(prodp, up, vp, esize, tspace); + cy_limb = mpihelp_addmul_1(prodp + esize, up, esize, vp[esize]); + prodp[esize + esize] = cy_limb; + cy_limb = mpihelp_addmul_1(prodp + esize, vp, size, up[esize]); + prodp[esize + size] = cy_limb; + } else { + /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm. + * + * Split U in two pieces, U1 and U0, such that + * U = U0 + U1*(B**n), + * and V in V1 and V0, such that + * V = V0 + V1*(B**n). + * + * UV is then computed recursively using the identity + * + * 2n n n n + * UV = (B + B )U V + B (U -U )(V -V ) + (B + 1)U V + * 1 1 1 0 0 1 0 0 + * + * Where B = 2**BITS_PER_MP_LIMB. + */ + mpi_size_t hsize = size >> 1; + mpi_limb_t cy; + int negflg; + + /* Product H. ________________ ________________ + * |_____U1 x V1____||____U0 x V0_____| + * Put result in upper part of PROD and pass low part of TSPACE + * as new TSPACE. + */ + MPN_MUL_N_RECURSE(prodp + size, up + hsize, vp + hsize, hsize, + tspace); + + /* Product M. ________________ + * |_(U1-U0)(V0-V1)_| + */ + if (mpihelp_cmp(up + hsize, up, hsize) >= 0) { + mpihelp_sub_n(prodp, up + hsize, up, hsize); + negflg = 0; + } else { + mpihelp_sub_n(prodp, up, up + hsize, hsize); + negflg = 1; + } + if (mpihelp_cmp(vp + hsize, vp, hsize) >= 0) { + mpihelp_sub_n(prodp + hsize, vp + hsize, vp, hsize); + negflg ^= 1; + } else { + mpihelp_sub_n(prodp + hsize, vp, vp + hsize, hsize); + /* No change of NEGFLG. */ + } + /* Read temporary operands from low part of PROD. + * Put result in low part of TSPACE using upper part of TSPACE + * as new TSPACE. + */ + MPN_MUL_N_RECURSE(tspace, prodp, prodp + hsize, hsize, + tspace + size); + + /* Add/copy product H. */ + MPN_COPY(prodp + hsize, prodp + size, hsize); + cy = mpihelp_add_n(prodp + size, prodp + size, + prodp + size + hsize, hsize); + + /* Add product M (if NEGFLG M is a negative number) */ + if (negflg) + cy -= + mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace, + size); + else + cy += + mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, + size); + + /* Product L. ________________ ________________ + * |________________||____U0 x V0_____| + * Read temporary operands from low part of PROD. + * Put result in low part of TSPACE using upper part of TSPACE + * as new TSPACE. + */ + MPN_MUL_N_RECURSE(tspace, up, vp, hsize, tspace + size); + + /* Add/copy Product L (twice) */ + + cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size); + if (cy) + mpihelp_add_1(prodp + hsize + size, + prodp + hsize + size, hsize, cy); + + MPN_COPY(prodp, tspace, hsize); + cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize, + hsize); + if (cy) + mpihelp_add_1(prodp + size, prodp + size, size, 1); + } +} + +void mpih_sqr_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size) +{ + mpi_size_t i; + mpi_limb_t cy_limb; + mpi_limb_t v_limb; + + /* Multiply by the first limb in V separately, as the result can be + * stored (not added) to PROD. We also avoid a loop for zeroing. */ + v_limb = up[0]; + if (v_limb <= 1) { + if (v_limb == 1) + MPN_COPY(prodp, up, size); + else + MPN_ZERO(prodp, size); + cy_limb = 0; + } else + cy_limb = mpihelp_mul_1(prodp, up, size, v_limb); + + prodp[size] = cy_limb; + prodp++; + + /* For each iteration in the outer loop, multiply one limb from + * U with one limb from V, and add it to PROD. */ + for (i = 1; i < size; i++) { + v_limb = up[i]; + if (v_limb <= 1) { + cy_limb = 0; + if (v_limb == 1) + cy_limb = mpihelp_add_n(prodp, prodp, up, size); + } else + cy_limb = mpihelp_addmul_1(prodp, up, size, v_limb); + + prodp[size] = cy_limb; + prodp++; + } +} + +void +mpih_sqr_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size, mpi_ptr_t tspace) +{ + if (size & 1) { + /* The size is odd, and the code below doesn't handle that. + * Multiply the least significant (size - 1) limbs with a recursive + * call, and handle the most significant limb of S1 and S2 + * separately. + * A slightly faster way to do this would be to make the Karatsuba + * code below behave as if the size were even, and let it check for + * odd size in the end. I.e., in essence move this code to the end. + * Doing so would save us a recursive call, and potentially make the + * stack grow a lot less. + */ + mpi_size_t esize = size - 1; /* even size */ + mpi_limb_t cy_limb; + + MPN_SQR_N_RECURSE(prodp, up, esize, tspace); + cy_limb = mpihelp_addmul_1(prodp + esize, up, esize, up[esize]); + prodp[esize + esize] = cy_limb; + cy_limb = mpihelp_addmul_1(prodp + esize, up, size, up[esize]); + + prodp[esize + size] = cy_limb; + } else { + mpi_size_t hsize = size >> 1; + mpi_limb_t cy; + + /* Product H. ________________ ________________ + * |_____U1 x U1____||____U0 x U0_____| + * Put result in upper part of PROD and pass low part of TSPACE + * as new TSPACE. + */ + MPN_SQR_N_RECURSE(prodp + size, up + hsize, hsize, tspace); + + /* Product M. ________________ + * |_(U1-U0)(U0-U1)_| + */ + if (mpihelp_cmp(up + hsize, up, hsize) >= 0) + mpihelp_sub_n(prodp, up + hsize, up, hsize); + else + mpihelp_sub_n(prodp, up, up + hsize, hsize); + + /* Read temporary operands from low part of PROD. + * Put result in low part of TSPACE using upper part of TSPACE + * as new TSPACE. */ + MPN_SQR_N_RECURSE(tspace, prodp, hsize, tspace + size); + + /* Add/copy product H */ + MPN_COPY(prodp + hsize, prodp + size, hsize); + cy = mpihelp_add_n(prodp + size, prodp + size, + prodp + size + hsize, hsize); + + /* Add product M (if NEGFLG M is a negative number). */ + cy -= mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace, size); + + /* Product L. ________________ ________________ + * |________________||____U0 x U0_____| + * Read temporary operands from low part of PROD. + * Put result in low part of TSPACE using upper part of TSPACE + * as new TSPACE. */ + MPN_SQR_N_RECURSE(tspace, up, hsize, tspace + size); + + /* Add/copy Product L (twice). */ + cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size); + if (cy) + mpihelp_add_1(prodp + hsize + size, + prodp + hsize + size, hsize, cy); + + MPN_COPY(prodp, tspace, hsize); + cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize, + hsize); + if (cy) + mpihelp_add_1(prodp + size, prodp + size, size, 1); + } +} + +int +mpihelp_mul_karatsuba_case(mpi_ptr_t prodp, + mpi_ptr_t up, mpi_size_t usize, + mpi_ptr_t vp, mpi_size_t vsize, + struct karatsuba_ctx *ctx) +{ + mpi_limb_t cy; + + if (!ctx->tspace || ctx->tspace_size < vsize) { + if (ctx->tspace) + mpi_free_limb_space(ctx->tspace); + ctx->tspace = mpi_alloc_limb_space(2 * vsize); + if (!ctx->tspace) + return -ENOMEM; + ctx->tspace_size = vsize; + } + + MPN_MUL_N_RECURSE(prodp, up, vp, vsize, ctx->tspace); + + prodp += vsize; + up += vsize; + usize -= vsize; + if (usize >= vsize) { + if (!ctx->tp || ctx->tp_size < vsize) { + if (ctx->tp) + mpi_free_limb_space(ctx->tp); + ctx->tp = mpi_alloc_limb_space(2 * vsize); + if (!ctx->tp) { + if (ctx->tspace) + mpi_free_limb_space(ctx->tspace); + ctx->tspace = NULL; + return -ENOMEM; + } + ctx->tp_size = vsize; + } + + do { + MPN_MUL_N_RECURSE(ctx->tp, up, vp, vsize, ctx->tspace); + cy = mpihelp_add_n(prodp, prodp, ctx->tp, vsize); + mpihelp_add_1(prodp + vsize, ctx->tp + vsize, vsize, + cy); + prodp += vsize; + up += vsize; + usize -= vsize; + } while (usize >= vsize); + } + + if (usize) { + if (usize < KARATSUBA_THRESHOLD) { + mpi_limb_t tmp; + if (mpihelp_mul(ctx->tspace, vp, vsize, up, usize, &tmp) + < 0) + return -ENOMEM; + } else { + if (!ctx->next) { + ctx->next = kzalloc(sizeof *ctx, GFP_KERNEL); + if (!ctx->next) + return -ENOMEM; + } + if (mpihelp_mul_karatsuba_case(ctx->tspace, + vp, vsize, + up, usize, + ctx->next) < 0) + return -ENOMEM; + } + + cy = mpihelp_add_n(prodp, prodp, ctx->tspace, vsize); + mpihelp_add_1(prodp + vsize, ctx->tspace + vsize, usize, cy); + } + + return 0; +} + +void mpihelp_release_karatsuba_ctx(struct karatsuba_ctx *ctx) +{ + struct karatsuba_ctx *ctx2; + + if (ctx->tp) + mpi_free_limb_space(ctx->tp); + if (ctx->tspace) + mpi_free_limb_space(ctx->tspace); + for (ctx = ctx->next; ctx; ctx = ctx2) { + ctx2 = ctx->next; + if (ctx->tp) + mpi_free_limb_space(ctx->tp); + if (ctx->tspace) + mpi_free_limb_space(ctx->tspace); + kfree(ctx); + } +} + +/* Multiply the natural numbers u (pointed to by UP, with USIZE limbs) + * and v (pointed to by VP, with VSIZE limbs), and store the result at + * PRODP. USIZE + VSIZE limbs are always stored, but if the input + * operands are normalized. Return the most significant limb of the + * result. + * + * NOTE: The space pointed to by PRODP is overwritten before finished + * with U and V, so overlap is an error. + * + * Argument constraints: + * 1. USIZE >= VSIZE. + * 2. PRODP != UP and PRODP != VP, i.e. the destination + * must be distinct from the multiplier and the multiplicand. + */ + +int +mpihelp_mul(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize, + mpi_ptr_t vp, mpi_size_t vsize, mpi_limb_t *_result) +{ + mpi_ptr_t prod_endp = prodp + usize + vsize - 1; + mpi_limb_t cy; + struct karatsuba_ctx ctx; + + if (vsize < KARATSUBA_THRESHOLD) { + mpi_size_t i; + mpi_limb_t v_limb; + + if (!vsize) { + *_result = 0; + return 0; + } + + /* Multiply by the first limb in V separately, as the result can be + * stored (not added) to PROD. We also avoid a loop for zeroing. */ + v_limb = vp[0]; + if (v_limb <= 1) { + if (v_limb == 1) + MPN_COPY(prodp, up, usize); + else + MPN_ZERO(prodp, usize); + cy = 0; + } else + cy = mpihelp_mul_1(prodp, up, usize, v_limb); + + prodp[usize] = cy; + prodp++; + + /* For each iteration in the outer loop, multiply one limb from + * U with one limb from V, and add it to PROD. */ + for (i = 1; i < vsize; i++) { + v_limb = vp[i]; + if (v_limb <= 1) { + cy = 0; + if (v_limb == 1) + cy = mpihelp_add_n(prodp, prodp, up, + usize); + } else + cy = mpihelp_addmul_1(prodp, up, usize, v_limb); + + prodp[usize] = cy; + prodp++; + } + + *_result = cy; + return 0; + } + + memset(&ctx, 0, sizeof ctx); + if (mpihelp_mul_karatsuba_case(prodp, up, usize, vp, vsize, &ctx) < 0) + return -ENOMEM; + mpihelp_release_karatsuba_ctx(&ctx); + *_result = *prod_endp; + return 0; +} diff --git a/lib/mpi/mpiutil.c b/lib/mpi/mpiutil.c new file mode 100644 index 000000000..bf076d281 --- /dev/null +++ b/lib/mpi/mpiutil.c @@ -0,0 +1,126 @@ +/* mpiutil.ac - Utility functions for MPI + * Copyright (C) 1998, 1999 Free Software Foundation, Inc. + * + * This file is part of GnuPG. + * + * GnuPG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GnuPG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include "mpi-internal.h" + +/**************** + * Note: It was a bad idea to use the number of limbs to allocate + * because on a alpha the limbs are large but we normally need + * integers of n bits - So we should chnage this to bits (or bytes). + * + * But mpi_alloc is used in a lot of places :-) + */ +MPI mpi_alloc(unsigned nlimbs) +{ + MPI a; + + a = kmalloc(sizeof *a, GFP_KERNEL); + if (!a) + return a; + + if (nlimbs) { + a->d = mpi_alloc_limb_space(nlimbs); + if (!a->d) { + kfree(a); + return NULL; + } + } else { + a->d = NULL; + } + + a->alloced = nlimbs; + a->nlimbs = 0; + a->sign = 0; + a->flags = 0; + a->nbits = 0; + return a; +} +EXPORT_SYMBOL_GPL(mpi_alloc); + +mpi_ptr_t mpi_alloc_limb_space(unsigned nlimbs) +{ + size_t len = nlimbs * sizeof(mpi_limb_t); + + if (!len) + return NULL; + + return kmalloc(len, GFP_KERNEL); +} + +void mpi_free_limb_space(mpi_ptr_t a) +{ + if (!a) + return; + + kfree(a); +} + +void mpi_assign_limb_space(MPI a, mpi_ptr_t ap, unsigned nlimbs) +{ + mpi_free_limb_space(a->d); + a->d = ap; + a->alloced = nlimbs; +} + +/**************** + * Resize the array of A to NLIMBS. the additional space is cleared + * (set to 0) [done by m_realloc()] + */ +int mpi_resize(MPI a, unsigned nlimbs) +{ + void *p; + + if (nlimbs <= a->alloced) + return 0; /* no need to do it */ + + if (a->d) { + p = kmalloc(nlimbs * sizeof(mpi_limb_t), GFP_KERNEL); + if (!p) + return -ENOMEM; + memcpy(p, a->d, a->alloced * sizeof(mpi_limb_t)); + kfree(a->d); + a->d = p; + } else { + a->d = kzalloc(nlimbs * sizeof(mpi_limb_t), GFP_KERNEL); + if (!a->d) + return -ENOMEM; + } + a->alloced = nlimbs; + return 0; +} + +void mpi_free(MPI a) +{ + if (!a) + return; + + if (a->flags & 4) + kfree(a->d); + else + mpi_free_limb_space(a->d); + + if (a->flags & ~7) + pr_info("invalid flag value in mpi\n"); + kfree(a); +} +EXPORT_SYMBOL_GPL(mpi_free); + +MODULE_DESCRIPTION("Multiprecision maths library"); +MODULE_LICENSE("GPL"); |