diff options
author | Nicolas Reynolds <fauno@kiwwwi.com.ar> | 2011-06-22 19:50:41 -0300 |
---|---|---|
committer | Nicolas Reynolds <fauno@kiwwwi.com.ar> | 2011-06-22 19:50:41 -0300 |
commit | 2592adab23ef6c7a48fcc14d03a383f4e3447597 (patch) | |
tree | 9b091571a06f82d21f29ccbe669062ce96702e0a | |
parent | 2d8ff6f229ba867a2fc8d1108de40bb4eb299c0d (diff) |
More patches
-rw-r--r-- | extra/ffmpeg/PKGBUILD | 55 | ||||
-rw-r--r-- | extra/ffmpeg/ffmpeg-loongson.patch | 1794 | ||||
-rw-r--r-- | extra/koffice/PKGBUILD | 12 | ||||
-rw-r--r-- | extra/pixman/PKGBUILD | 10 | ||||
-rw-r--r-- | extra/pixman/pixman-loongson2f.patch | 2745 | ||||
-rw-r--r-- | extra/pygobject/fix-pycairo-capi-declaration.patch | 17 | ||||
-rw-r--r-- | extra/x264/PKGBUILD | 4 | ||||
-rw-r--r-- | extra/xulrunner/PKGBUILD | 18 | ||||
-rw-r--r-- | extra/zziplib/PKGBUILD | 2 | ||||
-rw-r--r-- | libre/ffmpeg-libre/PKGBUILD | 6 | ||||
-rw-r--r-- | libre/icecat/PKGBUILD | 3 |
11 files changed, 4649 insertions, 17 deletions
diff --git a/extra/ffmpeg/PKGBUILD b/extra/ffmpeg/PKGBUILD new file mode 100644 index 000000000..1bd7fc9b4 --- /dev/null +++ b/extra/ffmpeg/PKGBUILD @@ -0,0 +1,55 @@ +# $Id$ +# Maintainer : Ionut Biru <ibiru@archlinux.org> +# Contributor: Tom Newsom <Jeepster@gmx.co.uk> +# Contributor: Paul Mattal <paul@archlinux.org> + +pkgname=ffmpeg +pkgver=20110622 +pkgrel=1 +pkgdesc="Complete and free Internet live audio and video broadcasting solution for Linux/Unix" +arch=('i686' 'x86_64' 'mips64el') +url="http://ffmpeg.org/" +license=('GPL') +depends=('bzip2' 'lame' 'sdl' 'libvorbis' 'xvidcore' 'zlib' 'x264' 'libtheora' 'opencore-amr' 'alsa-lib' 'libvdpau' 'libxfixes' 'schroedinger' 'libvpx' 'libva' 'openjpeg' 'rtmpdump') +makedepends=('yasm' 'git') +#git clone git://git.videolan.org/ffmpeg.git +source=(ftp://ftp.archlinux.org/other/ffmpeg/${pkgname}-${pkgver}.tar.xz + ffmpeg-loongson.patch) +md5sums=('6003afa1f87857db729d697e3ec1be36' + '081d03278559a351322157a441fabcf5') + +build() { + cd "$srcdir/$pkgname" + + [ "$CARCH" = "mips64el" ] && patch -Np1 -i $srcdir/ffmpeg-loongson.patch + + ./configure \ + --prefix=/usr \ + --enable-libmp3lame \ + --enable-libvorbis \ + --enable-libxvid \ + --enable-libx264 \ + --enable-libvpx \ + --enable-libtheora \ + --enable-postproc \ + --enable-shared \ + --enable-x11grab \ + --enable-libopencore_amrnb \ + --enable-libopencore_amrwb \ + --enable-libschroedinger \ + --enable-libopenjpeg \ + --enable-librtmp \ + --enable-gpl \ + --enable-version3 \ + --enable-runtime-cpudetect \ + --disable-debug + + make + make tools/qt-faststart + make doc/ff{mpeg,play,server}.1 + + make DESTDIR="$pkgdir" install install-man + install -D -m755 tools/qt-faststart "$pkgdir/usr/bin/qt-faststart" +} + +# vim:set ts=2 sw=2 et: diff --git a/extra/ffmpeg/ffmpeg-loongson.patch b/extra/ffmpeg/ffmpeg-loongson.patch new file mode 100644 index 000000000..501eafd15 --- /dev/null +++ b/extra/ffmpeg/ffmpeg-loongson.patch @@ -0,0 +1,1794 @@ +diff --git a/configure b/configure +index 25e8cef..1d6c652 100755 +--- a/configure ++++ b/configure +@@ -230,6 +230,7 @@ Advanced options (experts only): + --disable-armvfp disable ARM VFP optimizations + --disable-iwmmxt disable iwmmxt optimizations + --disable-mmi disable MMI optimizations ++ --disable-loongson2mmi disable LOONGSON2 Multi-Media Instructions usage" + --disable-neon disable neon optimizations + --disable-vis disable VIS optimizations + --disable-yasm disable use of yasm assembler +@@ -995,6 +996,7 @@ ARCH_EXT_LIST=' + armvfp + iwmmxt + mmi ++ loongson2mmi + mmx + mmx2 + neon +@@ -2862,6 +2864,7 @@ if enabled arm; then + fi + if enabled mips; then + echo "MMI enabled ${mmi-no}" ++ echo "LOONGSON2MMI enabled ${loongson2mmi-no}" + fi + if enabled ppc; then + echo "AltiVec enabled ${altivec-no}" +diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h +index add4b10..8244e51 100644 +--- a/libavcodec/avcodec.h ++++ b/libavcodec/avcodec.h +@@ -1586,6 +1586,8 @@ typedef struct AVCodecContext { + #define FF_IDCT_SIMPLENEON 22 + #define FF_IDCT_SIMPLEALPHA 23 + #define FF_IDCT_BINK 24 ++#define FF_IDCT_LIBMPEG2LOONGSON2 25 ++#define FF_IDCT_XVIDLOONGSON2 26 + + /** + * slice count +diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c +index bbfdb6a..dfc3452 100644 +--- a/libavcodec/dsputil.c ++++ b/libavcodec/dsputil.c +@@ -4525,6 +4525,7 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx) + if (HAVE_MMI) dsputil_init_mmi (c, avctx); + if (ARCH_SH4) dsputil_init_sh4 (c, avctx); + if (ARCH_BFIN) dsputil_init_bfin (c, avctx); ++ if (HAVE_LOONGSON2MMI) dsputil_init_loongson2(c, avctx); + + for(i=0; i<64; i++){ + if(!c->put_2tap_qpel_pixels_tab[0][i]) +diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h +index d1816e6..1a72ae9 100644 +--- a/libavcodec/dsputil.h ++++ b/libavcodec/dsputil.h +@@ -636,6 +636,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx); + void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx); + void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx); + void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx); ++void dsputil_init_loongson2(DSPContext* c, AVCodecContext *avctx); + + void ff_dsputil_init_dwt(DSPContext *c); + void ff_cavsdsp_init(DSPContext* c, AVCodecContext *avctx); +diff --git a/libavcodec/loongson2/dsputil_loongson2.c b/libavcodec/loongson2/dsputil_loongson2.c +new file mode 100644 +index 0000000..01bd3ac +--- /dev/null ++++ b/libavcodec/loongson2/dsputil_loongson2.c +@@ -0,0 +1,221 @@ ++/* ++ * Copyright(C) 2006-2010 comcat <jiankemeng@gmail.com> ++ * ++ * Optimized for Loongson2 CPUs by comcat <jiankemeng@gmail.com> ++ * ++ */ ++ ++#include "dsputil_loongson2.h" ++#include "../simple_idct.h" ++#include "../mpegvideo.h" ++ ++//extern void ff_idct_xvid_loongson2(short *block); ++ ++extern void ff_loongson2_idct(DCTELEM *block); ++extern void ff_idct_xvid_loongson2(short *block); ++ ++static void add_pixels_clamped_loongson2(const DCTELEM *block, uint8_t *restrict pixels, int line_size) ++{ ++ const DCTELEM *p; ++ uint8_t *pix; ++ int i,j; ++ p = block; ++ pix = pixels; ++ i = 4; ++ j = line_size << 1; ++ __asm __volatile("xor $f14, $f14, $f14\n\t"); ++ do { ++ __asm __volatile( ++// ".set mips3 \n\t" ++ "ldc1 $f0, 0(%2) \n\t" ++ "ldc1 $f2, 8(%2) \n\t" ++ "ldc1 $f4, 16(%2) \n\t" ++ "ldc1 $f6, 24(%2) \n\t" ++ "ldc1 $f8, %0 \n\t" ++ "ldc1 $f12, %1 \n\t" ++ "mov.d $f10, $f8 \n\t" ++ ++ "punpcklbh $f8, $f8, $f14 \n\t" ++ "punpckhbh $f10, $f10, $f14\n\t" ++ ++ "paddsh $f0, $f0, $f8 \n\t" ++ "paddsh $f2, $f2, $f10 \n\t" ++ ++ "mov.d $f10, $f12 \n\t" ++ ++ "punpcklbh $f12, $f12, $f14\n\t" ++ "punpckhbh $f10, $f10, $f14\n\t" ++ ++ "paddsh $f4, $f4, $f12 \n\t" ++ "paddsh $f6, $f6, $f10 \n\t" ++ ++ "packushb $f0, $f0, $f2 \n\t" ++ "packushb $f4, $f4, $f6 \n\t" ++ ++ "sdc1 $f0, %0 \n\t" ++ "sdc1 $f4, %1 \n\t" ++// ".set mips2 \n\t" ++ :"+m"(*pix), "+m"(*(pix+line_size)) ++ :"r"(p) ++ :"$f0","$f2","$f4","$f6","$f8","$f10","$f12","$f14","memory"); ++ pix += j; ++ p += 16; ++ } while (--i); ++ ++} ++ ++static void put_pixels_clamped_loongson2(const DCTELEM *block, uint8_t *restrict pixels, int line_size) ++{ ++ const DCTELEM *p; ++ uint8_t *pix; ++ int tmp = line_size * 3; ++ p = block; ++ pix = pixels; ++ __asm __volatile ++ ( ++// ".set mips3 \n\t" ++ //"dadd $12, $0, $0\n\t" ++ //"dadd $13, $0, $0\n\t" ++ //"dadd $14, $0, $0\n\t" ++ ++ "ldc1 $f0, 0(%3)\n\t" ++ "ldc1 $f2, 8(%3)\n\t" ++ "ldc1 $f4, 16(%3)\n\t" ++ "ldc1 $f6, 24(%3)\n\t" ++ "ldc1 $f8, 32(%3)\n\t" ++ "ldc1 $f10, 40(%3)\n\t" ++ "ldc1 $f16, 48(%3)\n\t" ++ "ldc1 $f18, 56(%3)\n\t" ++ ++ "packushb $f0, $f0, $f2\n\t" ++ "packushb $f4, $f4, $f6\n\t" ++ "packushb $f8, $f8, $f10\n\t" ++ "packushb $f16, $f16, $f18\n\t" ++ ++ "add $12, %0, %1\n\t" ++ "add $13, $12, %1\n\t" ++ "add $14, %0, %2\n\t" ++ ++ "sdc1 $f0, 0(%0)\n\t" ++ "sdc1 $f4, 0($12)\n\t" ++ "sdc1 $f8, 0($13)\n\t" ++ "sdc1 $f16, 0($14)\n\t" ++// ".set mips2\n\t" ++ : ++ :"r" (pix), "r" (line_size), "r" (tmp), "r"(p) ++ :"$12","$13","$14","$f0","$f2","$f4","$f6","$f8","$f10","$16","$18" ++ ); ++ ++ pix += line_size*4; ++ p += 32; ++ ++ __asm __volatile ++ ( ++// ".set mips3 \n\t" ++ ++ "dadd $12, $0, $0\n\t" ++ "dadd $13, $0, $0\n\t" ++ "dadd $14, $0, $0\n\t" ++ "lw $12, %3\n\t" ++ ++ "ldc1 $f0, 0($12)\n\t" ++ "ldc1 $f2, 8($12)\n\t" ++ "ldc1 $f4, 16($12)\n\t" ++ "ldc1 $f6, 24($12)\n\t" ++ "ldc1 $f8, 32($12)\n\t" ++ "ldc1 $f10, 40($12)\n\t" ++ "ldc1 $f16, 48($12)\n\t" ++ "ldc1 $f18, 56($12)\n\t" ++ ++ "packushb $f0, $f0, $f2\n\t" ++ "packushb $f4, $f4, $f6\n\t" ++ "packushb $f8, $f8, $f10\n\t" ++ "packushb $f16, $f16, $f18\n\t" ++ ++ "add $12, %1, %0\n\t" ++ "add $13, $12, %1\n\t" ++ "add $15, %2, %0\n\t" ++ ++ "sdc1 $f0, 0(%0)\n\t" ++ "sdc1 $f4, 0($12)\n\t" ++ ++ "sdc1 $f8, 0($13)\n\t" ++ "sdc1 $f16, 0($15)\n\t" ++// ".set mips2\n\t" ++ : ++ :"r" (pix), "r" (line_size), "r" (tmp), "m"(p) ++ :"$12","$13","$15","$f0","$f2","$f4","$f6","$f8","$f10","$16","$18","memory" ++ ); ++ ++} ++ ++/* ++void put_signed_pixels_clamped_loongson2(const DCTELEM *block, uint8_t *pixels, int line_size) ++{ ++ ++} ++ ++ ++void ff_loongson2_idct_put(uint8_t *dest, int line_size, DCTELEM *block) ++{ ++ ff_loongson2_idct(block); ++ put_pixels_clamped_loongson2(block, dest, line_size); ++} ++ ++void ff_loongson2_idct_add(uint8_t *dest, int line_size, DCTELEM *block) ++{ ++ ff_loongson2_idct(block); ++ add_pixels_clamped_loongson2(block, dest, line_size); ++}*/ ++ ++static void ff_idct_xvid_loongson2_put(uint8_t *dest, int line_size, DCTELEM *block) ++{ ++ ff_idct_xvid_loongson2(block); ++ put_pixels_clamped_loongson2(block, dest, line_size); ++} ++ ++static void ff_idct_xvid_loongson2_add(uint8_t *dest, int line_size, DCTELEM *block) ++{ ++ ff_idct_xvid_loongson2(block); ++ add_pixels_clamped_loongson2(block, dest, line_size); ++} ++ ++void dsputil_init_loongson2(DSPContext *c, AVCodecContext *avctx) ++{ ++ ++ const int idct_algo = avctx->idct_algo; ++ ++/* ++#ifdef CONFIG_ENCODERS ++ const int dct_algo = avctx->dct_algo; ++ if(dct_algo == FF_DCT_AUTO || dct_algo == FF_DCT_LOONGSON2) ++ c->fdct = ff_fdct_loongson2; ++#endif ++*/ ++ ++#if 0 ++ if(avctx->lowres==0) ++ { ++ if(idct_algo == FF_IDCT_LIBMPEG2LOONGSON2) ++ { ++ c->idct_add = ff_loongson2_idct_add; ++ c->idct_put = ff_loongson2_idct_put; ++ c->idct = ff_loongson2_idct; ++ } ++ else if(idct_algo == FF_IDCT_XVIDLOONGSON2) ++ { ++#endif ++ c->idct_add = ff_idct_xvid_loongson2_add; ++ c->idct_put = ff_idct_xvid_loongson2_put; ++ c->idct = ff_idct_xvid_loongson2; ++ //} ++ //} ++ ++ c->put_pixels_clamped = put_pixels_clamped_loongson2; ++ c->add_pixels_clamped = add_pixels_clamped_loongson2; ++ ++#ifdef CONFIG_ENCODERS ++ dsputil_init_pix_loongson2(c, avctx); ++#endif ++ ++} +diff --git a/libavcodec/loongson2/dsputil_loongson2.d b/libavcodec/loongson2/dsputil_loongson2.d +new file mode 100644 +index 0000000..808f0a3 +--- /dev/null ++++ b/libavcodec/loongson2/dsputil_loongson2.d +@@ -0,0 +1,18 @@ ++libavcodec/loongson2/dsputil_loongson2.o: \ ++ libavcodec/loongson2/dsputil_loongson2.c \ ++ libavcodec/loongson2/dsputil_loongson2.h libavcodec/dsputil.h \ ++ libavutil/intreadwrite.h config.h libavutil/bswap.h \ ++ libavutil/attributes.h libavutil/common.h libavutil/intmath.h \ ++ libavutil/mem.h libavutil/internal.h libavutil/timer.h libavutil/libm.h \ ++ libavutil/mips/intreadwrite.h libavcodec/avcodec.h libavutil/avutil.h \ ++ libavutil/error.h libavutil/avutil.h libavutil/mathematics.h \ ++ libavutil/rational.h libavutil/intfloat_readwrite.h libavutil/log.h \ ++ libavutil/pixfmt.h libavutil/avconfig.h \ ++ libavcodec/loongson2/../simple_idct.h libavcodec/loongson2/../dsputil.h \ ++ libavcodec/loongson2/../mpegvideo.h libavcodec/loongson2/../get_bits.h \ ++ libavutil/bswap.h libavutil/common.h libavutil/log.h \ ++ libavcodec/loongson2/../mathops.h libavcodec/loongson2/../mips/mathops.h \ ++ libavcodec/loongson2/../put_bits.h libavcodec/loongson2/../ratecontrol.h \ ++ libavcodec/loongson2/../eval.h libavcodec/loongson2/../parser.h \ ++ libavcodec/loongson2/../avcodec.h libavcodec/loongson2/../mpeg12data.h \ ++ libavutil/rational.h libavcodec/loongson2/../rl.h +diff --git a/libavcodec/loongson2/dsputil_loongson2.h b/libavcodec/loongson2/dsputil_loongson2.h +new file mode 100644 +index 0000000..87c7bd9 +--- /dev/null ++++ b/libavcodec/loongson2/dsputil_loongson2.h +@@ -0,0 +1,3 @@ ++#include "libavcodec/dsputil.h" ++ ++void dsputil_init_pix_loongson2(DSPContext* c, AVCodecContext *avctx); +diff --git a/libavcodec/loongson2/dsputil_loongson2.o b/libavcodec/loongson2/dsputil_loongson2.o +new file mode 100644 +index 0000000..fca0b55 +Binary files /dev/null and b/libavcodec/loongson2/dsputil_loongson2.o differ +diff --git a/libavcodec/loongson2/idct_loongson2.c b/libavcodec/loongson2/idct_loongson2.c +new file mode 100644 +index 0000000..539cab5 +--- /dev/null ++++ b/libavcodec/loongson2/idct_loongson2.c +@@ -0,0 +1,336 @@ ++/* ++ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> ++ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> ++ * ++ * Copyright (c) 2007-2010 comcat <jiankemeng@gmail.com>. ++ * ++ * Optimized for Loongson2 CPUs by comcat <jiankemeng@gmail.com> ++ * ++ * Based on i386 ++ */ ++ ++#include "libavutil/common.h" ++#include "dsputil_loongson2.h" ++ ++ ++#define ATTR_ALIGN(align) __attribute__ ((__aligned__ (align))) ++ ++ ++#define ROW_SHIFT 11 ++#define COL_SHIFT 6 ++ ++#define round(bias) ((int)(((bias)+0.5) * (1<<ROW_SHIFT))) ++#define rounder(bias) {round (bias), round (bias)} ++ ++ ++ ++#define loongson2_table(c1,c2,c3,c4,c5,c6,c7) { c4,c2,-c4,-c2, \ ++ c4,c6,c4,c6, \ ++ c1,c3,-c1,-c5,\ ++ c5,c7,c3,-c7, \ ++ c4,-c6,c4,-c6, \ ++ -c4,c2,c4,-c2, \ ++ c5,-c1,c3,-c1, \ ++ c7,c3,c7,-c5 } ++ ++ ++static inline void loongson2_row_head(int16_t * const row, const int offset, ++ const int16_t * const table) ++{ ++ __asm__ volatile( ++// ".set\tmips3\n" ++ ".set noreorder\n" ++ "ldc1 $f6,%0\n" ++ "ldc1 $f14,%1\n" ++ "ldc1 $f2,%2\n" ++ "ldc1 $f8,%3\n" ++ "dli $12,%4\n" ++ "dmtc1 $12,$f16\n" ++ "mov.d $f4,$f6\n" ++ "mov.d $f10,$f14\n" ++ "pmaddhw $f2,$f2,$f4\n" ++ "pshufh $f6,$f6,$f16\n" ++ ".set reorder\n" ++// ".set\tmips0\n" ++ : ++ :"m"(*(row+offset)),"m"(*(row+offset+4)),"m"(*table),"m"(*(table+4)),"i"(0x4e) ++ :"$f2","$f4","$f6","$f8","$f10","$f14","$f16","$12" ++ ); ++} ++ ++ ++static inline void loongson2_row(const int16_t * const table, ++ const int32_t * const rounder) ++{ ++ __asm__ volatile ( ++// ".set\tmips3\n" ++ ".set\tnoreorder\n" ++ "ldc1 $f0,%0\n" ++ "pmaddhw $f8,$f8,$f6\n" ++ "ldc1 $f16,%1\n" ++ "dli $13,%8\n" ++ "ldc1 $f20,%2\n" ++ "pmaddhw $f0,$f0,$f14\n" ++ "ldc1 $f22,%3\n" ++ "pmaddhw $f4,$f4,$f16\n" ++ "paddw $f2,$f2,$f22\n" ++ "ldc1 $f22,%4\n" ++ "dmtc1 $13,$f16\n" ++ "paddw $f2,$f2,$f8\n" ++ "pmaddhw $f14,$f14,$f22\n" ++ "mov.d $f8,$f2\n" ++ "pshufh $f10,$f10,$f16\n" ++ "ldc1 $f22,%3\n" ++ "pmaddhw $f20,$f20,$f10\n" ++ "ldc1 $f16,%5\n" ++ "paddw $f4,$f4,$f22\n" ++ "paddw $f0,$f0,$f20\n" ++ "dli $12,%6\n" ++ "pmaddhw $f6,$f6,$f16\n" ++ "psubw $f2,$f2,$f0\n" ++ "ldc1 $f16,%7\n" ++ "paddw $f0,$f0,$f8\n" ++ "paddw $f4,$f4,$f6\n" ++ "pmaddhw $f10,$f10,$f16\n" ++ "mov.d $f8,$f4\n" ++ "dmtc1 $12,$f16\n" ++ "paddw $f14,$f14,$f10\n" ++ "psraw $f2,$f2,$f16\n" ++ "psraw $f0,$f0,$f16\n" ++ "paddw $f4,$f4,$f14\n" ++ "psubw $f8,$f8,$f14\n" ++ ".set\treorder\n" ++// ".set\tmips0\n" ++ : ++ :"m"(*(table+8)),"m"(*(table+16)),"m"(*(table+12)),"m"(*rounder),"m"(*(table+24)),"m"(*(table+20)),"i"(ROW_SHIFT),"m"(*(table+16)),"i"(0x4e) ++ :"$f0","$f2","$f4","$f6","$f8","$f10","$f14","$f16","$f20","$f22","$12","$13","memory" ++ ); ++} ++ ++static inline void loongson2_row_tail(int16_t * const row, const int store) ++{ ++ __asm__ volatile ( ++// ".set\tmips3\n" ++ ".set\tnoreorder\n" ++ "dli $12,%2\n" ++ "dmtc1 $12,$f16\n" ++ "psraw $f4,$f4,$f16\n" ++ "psraw $f8,$f8,$f16\n" ++ "packsswh $f0,$f0,$f4\n" ++ "packsswh $f8,$f8,$f2\n" ++ "sdc1 $f0,%0\n" ++ "dli $13,%3\n" ++ "dmtc1 $13,$f22\n" ++ "pshufh $f8,$f8,$f22\n" ++ "sdc1 $f8,%1\n" ++ ".set\treorder\n" ++// ".set\tmips0\n" ++ :"=m"(*(row+store)),"=m"(*(row+store+4)) ++ :"i"(ROW_SHIFT),"i"(0xb1) ++ :"$f0","$f2","$f4","$f6","$f8","$f16","$f22","$12","$13","memory" ++ ); ++} ++ ++static inline void loongson2_row_mid(int16_t * const row, const int store, ++ const int offset, ++ const int16_t * const table) ++{ ++ __asm__ volatile ( ++// ".set\tmips3\n" ++ ".set\tnoreorder\n" ++ "ldc1 $f6,%2\n" ++ "dli $12,%3\n" ++ "dmtc1 $12,$f16\n" ++ "psraw $f4,$f4,$f16\n" ++ "ldc1 $f14,%4\n" ++ "psraw $f8,$f8,$f16\n" ++ "packsswh $f0,$f0,$f4\n" ++ "mov.d $f10,$f14\n" ++ "packsswh $f8,$f8,$f2\n" ++ "mov.d $f4,$f6\n" ++ "sdc1 $f0,%0\n" ++ "dli $13,%5\n" ++ "dmtc1 $13,$f22\n" ++ "pshufh $f8,$f8,$f22\n" ++ "ldc1 $f2,%6\n" ++ "sdc1 $f8,%1\n" ++ "pmaddhw $f2,$f2,$f4\n" ++ "ldc1 $f8,%7\n" ++ "dli $12,%8\n" ++ "dmtc1 $12,$f16\n" ++ "pshufh $f6,$f6,$f16\n" ++ ".set\treorder\n" ++// ".set\tmips0\n" ++ :"=m"(*(row+store)),"=m"(*(row+store+4)) ++ : "m"(*(row+offset)),"i"(ROW_SHIFT),"m"(*(row+offset+4)),"i"(0xb1),"m"(*table),"m"(*(table+4)),"i"(0x4e) ++ :"$f0","$f2","$f4","$f6","$f8","$f10","$14","$f16","$f22","$12","$13","memory" ++ ); ++} ++ ++static inline void idct_col(int16_t * const col, const int offset) ++{ ++#define T1 13036 ++#define T2 27146 ++#define T3 43790 ++#define C4 23170 ++ static const short _T1[] ATTR_ALIGN(8) = {T1,T1,T1,T1}; ++ static const short _T2[] ATTR_ALIGN(8) = {T2,T2,T2,T2}; ++ static const short _T3[] ATTR_ALIGN(8) = {T3,T3,T3,T3}; ++ static const short _C4[] ATTR_ALIGN(8) = {C4,C4,C4,C4}; ++ ++ __asm__ volatile ( ++// ".set\tmips3\n" ++ ".set\tnoreorder\n" ++ "ldc1 $f4,%8\n" ++ "ldc1 $f0,%9\n" ++ "mov.d $f6,$f4\n" ++ "ldc1 $f8,%10\n" ++ "pmulhh $f4,$f4,$f0\n" ++ "ldc1 $f14,%11\n" ++ "pmulhh $f6,$f6,$f8\n" ++ "ldc1 $f10,%12\n" ++ "mov.d $f20,$f14\n" ++ "ldc1 $f2,%13\n" ++ "psubsh $f4,$f4,$f8\n" ++ "ldc1 $f8,%14\n" ++ "pmulhh $f14,$f14,$f2\n" ++ "paddsh $f0,$f0,$f6\n" ++ "pmulhh $f20,$f20,$f10\n" ++ "mov.d $f6,$f8\n" ++ "paddsh $f14,$f14,$f2\n" ++ "ldc1 $f16,%15\n" ++ "pmulhh $f8,$f8,$f16\n" ++ "paddsh $f20,$f20,$f10\n" ++ "psubsh $f14,$f14,$f10\n" ++ "paddsh $f20,$f20,$f2\n" ++ "ldc1 $f2,%16\n" ++ "mov.d $f10,$f4\n" ++ "pmulhh $f6,$f6,$f2\n" ++ "psubsh $f4,$f4,$f14\n" ++ "psubsh $f8,$f8,$f2\n" ++ "paddsh $f14,$f14,$f10\n" ++ "sdc1 $f4,%0\n" ++ "mov.d $f10,$f0\n" ++ "ldc1 $f22,%15\n" ++ "paddsh $f6,$f6,$f22\n" ++ "paddsh $f10,$f10,$f20\n" ++ "psubsh $f0,$f0,$f20\n" ++ "mov.d $f20,$f0\n" ++ "ldc1 $f2,%17\n" ++ "paddsh $f0,$f0,$f14\n" ++ "ldc1 $f4,%18\n" ++ "psubsh $f20,$f20,$f14\n" ++ "sdc1 $f10,%1\n" ++ "pmulhh $f0,$f0,$f4\n" ++ "mov.d $f10,$f8\n" ++ "pmulhh $f20,$f20,$f4\n" ++ "ldc1 $f14,%19\n" ++ "mov.d $f4,$f2\n" ++ "psubsh $f2,$f2,$f14\n" ++ "paddsh $f4,$f4,$f14\n" ++ "paddsh $f8,$f8,$f2\n" ++ "mov.d $f14,$f4\n" ++ "psubsh $f2,$f2,$f10\n" ++ "paddsh $f14,$f14,$f6\n" ++ "paddsh $f0,$f0,$f0\n" ++ "psubsh $f4,$f4,$f6\n" ++ "paddsh $f20,$f20,$f20\n" ++ "mov.d $f6,$f2\n" ++ "mov.d $f10,$f8\n" ++ "paddsh $f2,$f2,$f20\n" ++ "dli $12,%20\n" ++ "dmtc1 $12,$f16\n" ++ "psrah $f2,$f2,$f16\n" ++ "paddsh $f8,$f8,$f0\n" ++ "psrah $f8,$f8,$f16\n" ++ "psubsh $f10,$f10,$f0\n" ++ "ldc1 $f0,%12\n" ++ "psubsh $f6,$f6,$f20\n" ++ "psrah $f10,$f10,$f16\n" ++ "mov.d $f20,$f14\n" ++ "sdc1 $f8,%2\n" ++ "psrah $f6,$f6,$f16\n" ++ "sdc1 $f2,%3\n" ++ "paddsh $f14,$f14,$f0\n" ++ "ldc1 $f8,%13\n" ++ "psubsh $f20,$f20,$f0\n" ++ "psrah $f14,$f14,$f16\n" ++ "mov.d $f2,$f4\n" ++ "sdc1 $f6,%1\n" ++ "psubsh $f2,$f2,$f8\n" ++ "psrah $f20,$f20,$f16\n" ++ "paddsh $f8,$f8,$f4\n" ++ "sdc1 $f14,%4\n" ++ "psrah $f2,$f2,$f16\n" ++ "sdc1 $f10,%5\n" ++ "psrah $f8,$f8,$f16\n" ++ "sdc1 $f20,%6\n" ++ "sdc1 $f2,%7\n" ++ "sdc1 $f8,%0\n" ++ ".set\treorder\n" ++// ".set\tmips0\n" ++ :"=m"(*(col+offset+3*8)),"=m"(*(col+offset+5*8)),"=m"(*(col+offset+1*8)),"=m"(*(col+offset+2*8)),"=m"(*(col+offset+0*8)),"=m"(*(col+offset+6*8)),"=m"(*(col+offset+7*8)),"=m"(*(col+offset+4*8)) ++ :"m"(*_T1),"m"(*(col+offset+1*8)),"m"(*(col+offset+7*8)),"m"(*_T3),"m"(*(col+offset+5*8)),"m"(*(col+offset+3*8)),"m"(*_T2),"m"(*(col+offset+2*8)),"m"(*(col+offset+6*8)),"m"(*(col+offset+0*8)),"m"(*_C4),"m"(*(col+offset+4*8)),"i"(COL_SHIFT) ++ :"$f0","$f2","$f4","$f6","$f8","$f10","$14","$f16","$20","$f22","$12","memory" ++ ); ++} ++ ++static const int32_t rounder0[] ATTR_ALIGN(8) = ++ rounder ((1 << (COL_SHIFT - 1)) - 0.5); ++static const int32_t rounder4[] ATTR_ALIGN(8) = rounder (0); ++static const int32_t rounder1[] ATTR_ALIGN(8) = ++ rounder (1.25683487303); /* C1*(C1/C4+C1+C7)/2 */ ++static const int32_t rounder7[] ATTR_ALIGN(8) = ++ rounder (-0.25); /* C1*(C7/C4+C7-C1)/2 */ ++static const int32_t rounder2[] ATTR_ALIGN(8) = ++ rounder (0.60355339059); /* C2 * (C6+C2)/2 */ ++static const int32_t rounder6[] ATTR_ALIGN(8) = ++ rounder (-0.25); /* C2 * (C6-C2)/2 */ ++static const int32_t rounder3[] ATTR_ALIGN(8) = ++ rounder (0.087788325588); /* C3*(-C3/C4+C3+C5)/2 */ ++static const int32_t rounder5[] ATTR_ALIGN(8) = ++ rounder (-0.441341716183); /* C3*(-C5/C4+C5-C3)/2 */ ++ ++ ++#undef COL_SHIFT ++#undef ROW_SHIFT ++ ++ ++#define declare_idct(idct,table,idct_row_head,idct_row,idct_row_tail,idct_row_mid) \ ++inline void idct (int16_t * const block) \ ++{ \ ++ static const int16_t table04[] ATTR_ALIGN(16) = \ ++ table (22725, 21407, 19266, 16384, 12873, 8867, 4520); \ ++ static const int16_t table17[] ATTR_ALIGN(16) = \ ++ table (31521, 29692, 26722, 22725, 17855, 12299, 6270); \ ++ static const int16_t table26[] ATTR_ALIGN(16) = \ ++ table (29692, 27969, 25172, 21407, 16819, 11585, 5906); \ ++ static const int16_t table35[] ATTR_ALIGN(16) = \ ++ table (26722, 25172, 22654, 19266, 15137, 10426, 5315); \ ++ \ ++ idct_row_head (block, 0*8, table04); \ ++ idct_row (table04, rounder0); \ ++ idct_row_mid (block, 0*8, 4*8, table04); \ ++ idct_row (table04, rounder4); \ ++ idct_row_mid (block, 4*8, 1*8, table17); \ ++ idct_row (table17, rounder1); \ ++ idct_row_mid (block, 1*8, 7*8, table17); \ ++ idct_row (table17, rounder7); \ ++ idct_row_mid (block, 7*8, 2*8, table26); \ ++ idct_row (table26, rounder2); \ ++ idct_row_mid (block, 2*8, 6*8, table26); \ ++ idct_row (table26, rounder6); \ ++ idct_row_mid (block, 6*8, 3*8, table35); \ ++ idct_row (table35, rounder3); \ ++ idct_row_mid (block, 3*8, 5*8, table35); \ ++ idct_row (table35, rounder5); \ ++ idct_row_tail (block, 5*8); \ ++ \ ++ idct_col (block, 0); \ ++ idct_col (block, 4); \ ++} ++ ++void ff_loongson2_idct(DCTELEM *block); ++ ++declare_idct (ff_loongson2_idct, loongson2_table, ++ loongson2_row_head, loongson2_row, loongson2_row_tail, loongson2_row_mid) +diff --git a/libavcodec/loongson2/idct_loongson2_xvid.c b/libavcodec/loongson2/idct_loongson2_xvid.c +new file mode 100644 +index 0000000..4a1ee1e +--- /dev/null ++++ b/libavcodec/loongson2/idct_loongson2_xvid.c +@@ -0,0 +1,301 @@ ++/* ++ * XVID MPEG-4 VIDEO CODEC ++ * ++ * Copyright(C) 2006-2010 comcat <jiankemeng@gmail.com> ++ * ++ * Optimized for Loongson2 CPUs by comcat <jiankemeng@gmail.com> ++ * ++ * Based on i386 ++ * ++ */ ++ ++ ++#include <inttypes.h> ++#include "../avcodec.h" ++ ++void ff_idct_xvid_loongson2(short *block); ++ ++//============================================================================= ++// Macros and other preprocessor constants ++//============================================================================= ++ ++#define BITS_INV_ACC 5 // 4 or 5 for IEEE ++#define SHIFT_INV_ROW (16 - BITS_INV_ACC) //11 ++#define SHIFT_INV_COL (1 + BITS_INV_ACC) //6 ++#define RND_INV_ROW (1024 * (6 - BITS_INV_ACC)) ++#define RND_INV_COL (16 * (BITS_INV_ACC - 3)) ++#define RND_INV_CORR (RND_INV_COL - 1) ++ ++#define BITS_FRW_ACC 3 // 2 or 3 for accuracy ++#define SHIFT_FRW_COL BITS_FRW_ACC ++#define SHIFT_FRW_ROW (BITS_FRW_ACC + 17) ++#define RND_FRW_ROW (262144*(BITS_FRW_ACC - 1)) ++ ++ ++//----------------------------------------------------------------------------- ++// Various memory constants (trigonometric values or rounding values) ++//----------------------------------------------------------------------------- ++ ++static const int16_t tg_1_16[4*4] attribute_used __attribute__ ((aligned(8))) = { ++ 13036,13036,13036,13036, // tg * (2<<16) + 0.5 ++ 27146,27146,27146,27146, // tg * (2<<16) + 0.5 ++ -21746,-21746,-21746,-21746, // tg * (2<<16) + 0.5 ++ 23170,23170,23170,23170}; // cos * (2<<15) + 0.5 ++ ++static const int32_t rounder_0[2*8] attribute_used __attribute__ ((aligned(8))) = { ++ 65536,65536, ++ 3597,3597, ++ 2260,2260, ++ 1203,1203, ++ 0,0, ++ 120,120, ++ 512,512, ++ 512,512}; ++ ++ ++// Table for rows 0,4 - constants are multiplied by cos_4_16 ++static const int16_t tab_i_04_mmx[32*4] attribute_used __attribute__ ((aligned(8))) = { ++ 16384,16384,16384,-16384, // movq-> w06 w04 w02 w00 ++ 21407,8867,8867,-21407, // w07 w05 w03 w01 ++ 16384,-16384,16384,16384, // w14 w12 w10 w08 ++ -8867,21407,-21407,-8867, // w15 w13 w11 w09 ++ 22725,12873,19266,-22725, // w22 w20 w18 w16 ++ 19266,4520,-4520,-12873, // w23 w21 w19 w17 ++ 12873,4520,4520,19266, // w30 w28 w26 w24 ++ -22725,19266,-12873,-22725, // w31 w29 w27 w25 ++// Table for rows 1,7 - constants are multiplied by cos_1_16 ++ 22725,22725,22725,-22725, // movq-> w06 w04 w02 w00 ++ 29692,12299,12299,-29692, // w07 w05 w03 w01 ++ 22725,-22725,22725,22725, // w14 w12 w10 w08 ++ -12299,29692,-29692,-12299, // w15 w13 w11 w09 ++ 31521,17855,26722,-31521, // w22 w20 w18 w16 ++ 26722,6270,-6270,-17855, // w23 w21 w19 w17 ++ 17855,6270,6270,26722, // w30 w28 w26 w24 ++ -31521,26722,-17855,-31521, // w31 w29 w27 w25 ++// Table for rows 2,6 - constants are multiplied by cos_2_16 ++ 21407,21407,21407,-21407, // movq-> w06 w04 w02 w00 ++ 27969,11585,11585,-27969, // w07 w05 w03 w01 ++ 21407,-21407,21407,21407, // w14 w12 w10 w08 ++ -11585,27969,-27969,-11585, // w15 w13 w11 w09 ++ 29692,16819,25172,-29692, // w22 w20 w18 w16 ++ 25172,5906,-5906,-16819, // w23 w21 w19 w17 ++ 16819,5906,5906,25172, // w30 w28 w26 w24 ++ -29692,25172,-16819,-29692, // w31 w29 w27 w25 ++// Table for rows 3,5 - constants are multiplied by cos_3_16 ++ 19266,19266,19266,-19266, // movq-> w06 w04 w02 w00 ++ 25172,10426,10426,-25172, // w07 w05 w03 w01 ++ 19266,-19266,19266,19266, // w14 w12 w10 w08 ++ -10426,25172,-25172,-10426, // w15 w13 w11 w09 ++ 26722,15137,22654,-26722, // w22 w20 w18 w16 ++ 22654,5315,-5315,-15137, // w23 w21 w19 w17 ++ 15137,5315,5315,22654, // w30 w28 w26 w24 ++ -26722,22654,-15137,-26722, // w31 w29 w27 w25 ++}; ++ ++ ++// %3 for rows 0,4 - constants are multiplied by cos_4_16 ++static const int16_t tab_i_04_xmm[32*4] attribute_used __attribute__ ((aligned(8))) = { ++ 16384,21407,16384,8867, // movq-> w05 w04 w01 w00 ++ 16384,8867,-16384,-21407, // w07 w06 w03 w02 ++ 16384,-8867,16384,-21407, // w13 w12 w09 w08 ++ -16384,21407,16384,-8867, // w15 w14 w11 w10 ++ 22725,19266,19266,-4520, // w21 w20 w17 w16 ++ 12873,4520,-22725,-12873, // w23 w22 w19 w18 ++ 12873,-22725,4520,-12873, // w29 w28 w25 w24 ++ 4520,19266,19266,-22725, // w31 w30 w27 w26 ++// %3 for rows 1,7 - constants are multiplied by cos_1_16 ++ 22725,29692,22725,12299, // movq-> w05 w04 w01 w00 ++ 22725,12299,-22725,-29692, // w07 w06 w03 w02 ++ 22725,-12299,22725,-29692, // w13 w12 w09 w08 ++ -22725,29692,22725,-12299, // w15 w14 w11 w10 ++ 31521,26722,26722,-6270, // w21 w20 w17 w16 ++ 17855,6270,-31521,-17855, // w23 w22 w19 w18 ++ 17855,-31521,6270,-17855, // w29 w28 w25 w24 ++ 6270,26722,26722,-31521, // w31 w30 w27 w26 ++// %3 for rows 2,6 - constants are multiplied by cos_2_16 ++ 21407,27969,21407,11585, // movq-> w05 w04 w01 w00 ++ 21407,11585,-21407,-27969, // w07 w06 w03 w02 ++ 21407,-11585,21407,-27969, // w13 w12 w09 w08 ++ -21407,27969,21407,-11585, // w15 w14 w11 w10 ++ 29692,25172,25172,-5906, // w21 w20 w17 w16 ++ 16819,5906,-29692,-16819, // w23 w22 w19 w18 ++ 16819,-29692,5906,-16819, // w29 w28 w25 w24 ++ 5906,25172,25172,-29692, // w31 w30 w27 w26 ++// %3 for rows 3,5 - constants are multiplied by cos_3_16 ++ 19266,25172,19266,10426, // movq-> w05 w04 w01 w00 ++ 19266,10426,-19266,-25172, // w07 w06 w03 w02 ++ 19266,-10426,19266,-25172, // w13 w12 w09 w08 ++ -19266,25172,19266,-10426, // w15 w14 w11 w10 ++ 26722,22654,22654,-5315, // w21 w20 w17 w16 ++ 15137,5315,-26722,-15137, // w23 w22 w19 w18 ++ 15137,-26722,5315,-15137, // w29 w28 w25 w24 ++ 5315,22654,22654,-26722, // w31 w30 w27 w26 ++}; ++ ++ ++ ++#define DCT_8_INV_ROW_LOONGSON2(A1,A2,A3,A4)\ ++ "ldc1 $f0, " #A1 " \n\t"/* 0 ; x3 x2 x1 x0*/\ ++ "ldc1 $f2, 8+" #A1 " \n\t"/* 1 ; x7 x6 x5 x4*/\ ++ "mov.d $f4, $f0 \n\t"/* 2 ; x3 x2 x1 x0*/\ ++ "ldc1 $f6, " #A3 " \n\t"/* 3 ; w05 w04 w01 w00*/\ ++ "li $12, 0x88 \n\t"\ ++ "dmtc1 $12, $f16 \n\t"\ ++ "pshufh $f0, $f0, $f16 \n\t"/* x2 x0 x2 x0*/\ ++ "ldc1 $f8, 8+" #A3 " \n\t"/* 4 ; w07 w06 w03 w02*/\ ++ "mov.d $f10, $f2 \n\t"/* 5 ; x7 x6 x5 x4*/\ ++ "pmaddhw $f6, $f6, $f0 \n\t"/* x2*w05+x0*w04 x2*w01+x0*w00*/\ ++ "ldc1 $f12, 32+" #A3 " \n\t"/* 6 ; w21 w20 w17 w16*/\ ++ "pshufh $f2, $f2, $f16 \n\t"/* x6 x4 x6 x4*/\ ++ "pmaddhw $f8, $f8, $f2 \n\t"/* x6*w07+x4*w06 x6*w03+x4*w02*/\ ++ "li $12, 0xdd \n\t"\ ++ "dmtc1 $12, $f16 \n\t"\ ++ "ldc1 $f14, 40+" #A3 " \n\t"/* 7 ; w23 w22 w19 w18*/\ ++ "pshufh $f4, $f4, $f16 \n\t"/* x3 x1 x3 x1*/\ ++ "pmaddhw $f12, $f12, $f4 \n\t"/* x3*w21+x1*w20 x3*w17+x1*w16*/\ ++ "ldc1 $f18, " #A4 " \n\t" \ ++ "ldc1 $f20, 16+" #A3 " \n\t" \ ++ "ldc1 $f22, 24+" #A3 " \n\t" \ ++ "ldc1 $f24, 48+" #A3 " \n\t" \ ++ "ldc1 $f26, 56+" #A3 " \n\t" \ ++ "pshufh $f10, $f10, $f16 \n\t"/* x7 x5 x7 x5*/\ ++ "pmaddhw $f14, $f14, $f10 \n\t"/* x7*w23+x5*w22 x7*w19+x5*w18*/\ ++ "paddw $f6, $f6, $f18 \n\t"/* +%4*/\ ++ "pmaddhw $f0, $f0, $f20 \n\t"/* x2*w13+x0*w12 x2*w09+x0*w08*/\ ++ "paddw $f6, $f6, $f8 \n\t"/* 4 ; a1=sum(even1) a0=sum(even0)*/\ ++ "pmaddhw $f2, $f2, $f22 \n\t"/* x6*w15+x4*w14 x6*w11+x4*w10*/\ ++ "mov.d $f8, $f6 \n\t"/* 4 ; a1 a0*/\ ++ "li $12, 11 \n\t"\ ++ "dmtc1 $12, $f16 \n\t"\ ++ "pmaddhw $f4, $f4, $f24 \n\t"/* x3*w29+x1*w28 x3*w25+x1*w24*/\ ++ "paddw $f12, $f12, $f14 \n\t"/* 7 ; b1=sum(odd1) b0=sum(odd0)*/\ ++ "pmaddhw $f10, $f10, $f26 \n\t"/* x7*w31+x5*w30 x7*w27+x5*w26*/\ ++ "paddw $f6, $f6, $f12 \n\t"/* a1+b1 a0+b0*/\ ++ "paddw $f0, $f0, $f18 \n\t"/* +%4*/\ ++ "psraw $f6, $f6, $f16 \n\t"/* y1=a1+b1 y0=a0+b0*/\ ++ "paddw $f0, $f0, $f2 \n\t"/* 1 ; a3=sum(even3) a2=sum(even2)*/\ ++ "psubw $f8, $f8, $f12 \n\t"/* 6 ; a1-b1 a0-b0*/\ ++ "mov.d $f14, $f0 \n\t"/* 7 ; a3 a2*/\ ++ "paddw $f4, $f4, $f10 \n\t"/* 5 ; b3=sum(odd3) b2=sum(odd2)*/\ ++ "paddw $f0, $f0, $f4 \n\t"/* a3+b3 a2+b2*/\ ++ "psraw $f8, $f8, $f16 \n\t"/* y6=a1-b1 y7=a0-b0*/\ ++ "psubw $f14, $f14, $f4 \n\t"/* 2 ; a3-b3 a2-b2*/\ ++ "psraw $f0, $f0, $f16 \n\t"/* y3=a3+b3 y2=a2+b2*/\ ++ "psraw $f14, $f14, $f16 \n\t"/* y4=a3-b3 y5=a2-b2*/\ ++ "li $12, 0xb1 \n\t"\ ++ "dmtc1 $12, $f20 \n\t"\ ++ "packsswh $f6, $f6, $f0 \n\t"/* 0 ; y3 y2 y1 y0*/\ ++ "packsswh $f14, $f14, $f8 \n\t"/* 4 ; y6 y7 y4 y5*/\ ++ "sdc1 $f6, " #A2 " \n\t"/* 3 ; save y3 y2 y1 y0*/\ ++ "pshufh $f14, $f14, $f20 \n\t"/* y7 y6 y5 y4*/\ ++ "sdc1 $f14, 8 +" #A2 " \n\t"/* 7 ; save y7 y6 y5 y4*/\ ++ ++ ++#define DCT_8_INV_COL(A1,A2)\ ++ "ldc1 $f0, 2*8(%3) \n\t"/* */\ ++ "ldc1 $f6, 16*3+" #A1 " \n\t"/* x3 */\ ++ "mov.d $f2, $f0 \n\t"/* tg_3_16*/\ ++ "ldc1 $f10, 16*5+" #A1 " \n\t"/* x5 */\ ++ "pmulhh $f0, $f0, $f6 \n\t"/* x3*(tg_3_16-1)*/\ ++ "ldc1 $f8, (%3) \n\t"\ ++ "pmulhh $f2, $f2, $f10 \n\t"/* x5*(tg_3_16-1)*/\ ++ "ldc1 $f14, 16*7+" #A1 " \n\t"/* x7 */\ ++ "mov.d $f4, $f8 \n\t"/* tg_1_16*/\ ++ "ldc1 $f12, 16*1+" #A1 " \n\t"/* x1 */\ ++ "pmulhh $f8, $f8, $f14 \n\t"/* x7*tg_1_16*/\ ++ "paddsh $f0, $f0, $f6 \n\t"/* x3*tg_3_16*/\ ++ "pmulhh $f4, $f4, $f12 \n\t"/* x1*tg_1_16*/\ ++ "paddsh $f2, $f2, $f6 \n\t"/* x3+x5*(tg_3_16-1)*/\ ++ "psubsh $f0, $f0, $f10 \n\t"/* x3*tg_3_16-x5 = tm35*/\ ++ "ldc1 $f6, 3*8(%3) \n\t"\ ++ "paddsh $f2, $f2, $f10 \n\t"/* x3+x5*tg_3_16 = tp35*/\ ++ "paddsh $f8, $f8, $f12 \n\t"/* x1+tg_1_16*x7 = tp17*/\ ++ "psubsh $f4, $f4, $f14 \n\t"/* x1*tg_1_16-x7 = tm17*/\ ++ "mov.d $f10, $f8 \n\t"/* tp17*/\ ++ "mov.d $f12, $f4 \n\t"/* tm17*/\ ++ "paddsh $f10, $f10, $f2 \n\t"/* tp17+tp35 = b0*/\ ++ "psubsh $f12, $f12, $f0 \n\t"/* tm17-tm35 = b3*/\ ++ "psubsh $f8, $f8, $f2 \n\t"/* tp17-tp35 = t1*/\ ++ "paddsh $f4, $f4, $f0 \n\t"/* tm17+tm35 = t2*/\ ++ "ldc1 $f14, 1*8(%3) \n\t"\ ++ "mov.d $f2, $f8 \n\t"/* t1*/\ ++ "sdc1 $f10, 3*16+" #A2 " \n\t"/* save b0*/\ ++ "paddsh $f2, $f2, $f4 \n\t"/* t1+t2*/\ ++ "sdc1 $f12, 5*16+" #A2 " \n\t"/* save b3*/\ ++ "psubsh $f8, $f8, $f4 \n\t"/* t1-t2*/\ ++ "ldc1 $f10, 2*16+" #A1 " \n\t"\ ++ "mov.d $f0, $f14 \n\t"/* tg_2_16*/\ ++ "ldc1 $f12, 6*16+" #A1 " \n\t"\ ++ "pmulhh $f0, $f0, $f10 \n\t"/* x2*tg_2_16*/\ ++ "pmulhh $f14, $f14, $f12 \n\t"/* x6*tg_2_16*/\ ++ "pmulhh $f2, $f2, $f6 \n\t"/* ocos_4_16*(t1+t2) = b1/2*/\ ++ "ldc1 $f4, 0*16+" #A1 " \n\t"\ ++ "pmulhh $f8, $f8, $f6 \n\t"/* ocos_4_16*(t1-t2) = b2/2*/\ ++ "psubsh $f0, $f0, $f12 \n\t"/* t2*tg_2_16-x6 = tm26*/\ ++ "mov.d $f6, $f4 \n\t"/* x0*/\ ++ "ldc1 $f12, 4*16+" #A1 " \n\t"\ ++ "paddsh $f14, $f14, $f10 \n\t"/* x2+x6*tg_2_16 = tp26*/\ ++ "paddsh $f4, $f4, $f12 \n\t"/* x0+x4 = tp04*/\ ++ "psubsh $f6, $f6, $f12 \n\t"/* x0-x4 = tm04*/\ ++ "mov.d $f10, $f4 \n\t"/* tp04*/\ ++ "mov.d $f12, $f6 \n\t"/* tm04*/\ ++ "psubsh $f4, $f4, $f14 \n\t"/* tp04-tp26 = a3*/\ ++ "paddsh $f6, $f6, $f0 \n\t"/* tm04+tm26 = a1*/\ ++ "paddsh $f2, $f2, $f2 \n\t"/* b1*/\ ++ "paddsh $f8, $f8, $f8 \n\t"/* b2*/\ ++ "paddsh $f10, $f10, $f14 \n\t"/* tp04+tp26 = a0*/\ ++ "psubsh $f12, $f12, $f0 \n\t"/* tm04-tm26 = a2*/\ ++ "li $12, 6 \n\t"\ ++ "dmtc1 $12, $f18 \n\t"\ ++ "mov.d $f14, $f6 \n\t"/* a1*/\ ++ "mov.d $f0, $f12 \n\t"/* a2*/\ ++ "paddsh $f6, $f6, $f2 \n\t"/* a1+b1*/\ ++ "paddsh $f12, $f12, $f8 \n\t"/* a2+b2*/\ ++ "psrah $f6, $f6, $f18 \n\t"/* dst1*/\ ++ "psubsh $f14, $f14, $f2 \n\t"/* a1-b1*/\ ++ "psrah $f12, $f12, $f18 \n\t"/* dst2*/\ ++ "psubsh $f0, $f0, $f8 \n\t"/* a2-b2*/\ ++ "ldc1 $f2, 3*16+" #A2 " \n\t"/* load b0*/\ ++ "psrah $f14, $f14, $f18 \n\t"/* dst6*/\ ++ "mov.d $f8, $f10 \n\t"/* a0*/\ ++ "psrah $f0, $f0, $f18 \n\t"/* dst5*/\ ++ "sdc1 $f6, 1*16+" #A2 " \n\t"\ ++ "paddsh $f10, $f10, $f2 \n\t"/* a0+b0*/\ ++ "sdc1 $f12, 2*16+" #A2 " \n\t"\ ++ "psubsh $f8, $f8, $f2 \n\t"/* a0-b0*/\ ++ "ldc1 $f6, 5*16+" #A2 " \n\t"/* load b3*/\ ++ "psrah $f10, $f10, $f18 \n\t"/* dst0*/\ ++ "mov.d $f12, $f4 \n\t"/* a3*/\ ++ "psrah $f8, $f8, $f18 \n\t"/* dst7*/\ ++ "sdc1 $f0, 5*16+" #A2 " \n\t"\ ++ "paddsh $f4, $f4, $f6 \n\t"/* a3+b3*/\ ++ "sdc1 $f14, 6*16+" #A2 " \n\t"\ ++ "psubsh $f12, $f12, $f6 \n\t"/* a3-b3*/\ ++ "sdc1 $f10, 0*16+" #A2 " \n\t"\ ++ "psrah $f4, $f4, $f18 \n\t"/* dst3*/\ ++ "sdc1 $f8, 7*16+" #A2 " \n\t"\ ++ "psrah $f12, $f12, $f18 \n\t"/* dst4*/\ ++ "sdc1 $f4, 3*16+" #A2 " \n\t"\ ++ "sdc1 $f12, 4*16+" #A2 " \n\t" ++ ++ ++ ++void ff_idct_xvid_loongson2(short *block){ ++ __asm__ volatile( ++ //# Process each row ++ DCT_8_INV_ROW_LOONGSON2(0*16(%0), 0*16(%0), 64*0(%2), 8*0(%1)) ++ DCT_8_INV_ROW_LOONGSON2(1*16(%0), 1*16(%0), 64*1(%2), 8*1(%1)) ++ DCT_8_INV_ROW_LOONGSON2(2*16(%0), 2*16(%0), 64*2(%2), 8*2(%1)) ++ DCT_8_INV_ROW_LOONGSON2(3*16(%0), 3*16(%0), 64*3(%2), 8*3(%1)) ++ DCT_8_INV_ROW_LOONGSON2(4*16(%0), 4*16(%0), 64*0(%2), 8*4(%1)) ++ DCT_8_INV_ROW_LOONGSON2(5*16(%0), 5*16(%0), 64*3(%2), 8*5(%1)) ++ DCT_8_INV_ROW_LOONGSON2(6*16(%0), 6*16(%0), 64*2(%2), 8*6(%1)) ++ DCT_8_INV_ROW_LOONGSON2(7*16(%0), 7*16(%0), 64*1(%2), 8*7(%1)) ++ ++ //# Process the columns (4 at a time) ++ DCT_8_INV_COL(0(%0), 0(%0)) ++ DCT_8_INV_COL(8(%0), 8(%0)) ++ : ++ : "r"(block), "r"(rounder_0), "r"(tab_i_04_xmm), "r"(tg_1_16) ++ :"$f0","$f2","$f4","$f6","$f8","$f10","$f12","$f14","$f18","$f16","$20","$22","$24","$26"); ++} ++ +diff --git a/libavcodec/loongson2/motion_est_loongson2.c b/libavcodec/loongson2/motion_est_loongson2.c +new file mode 100644 +index 0000000..bb67290 +--- /dev/null ++++ b/libavcodec/loongson2/motion_est_loongson2.c +@@ -0,0 +1,365 @@ ++/* ++ * Loongson2E MMI optimized motion estimation ++ * Copyright (c) 2007 comcat <jiankemeng@gmail.com>. ++ * ++ * based on Michael Niedermayer <michaelni@gmx.at> ++ * ++ */ ++ ++#include "dsputil_loongson2.h" ++#include "../avcodec.h" ++ ++static const __attribute__ ((aligned(8))) uint64_t round_tab[3]={ ++ 0x0000000000000000ULL, ++ 0x0001000100010001ULL, ++ 0x0002000200020002ULL, ++}; ++ ++static attribute_used __attribute__ ((aligned(8))) uint64_t bone= 0x0101010101010101LL; ++ ++static inline void sad8_1_loongson2(uint8_t *blk1, uint8_t *blk2, int stride, int h) ++{ ++ long len= -(stride*h); ++ __asm__ volatile( ++ ++// ".set mips3 \n\t" ++ ".align 4 \n\t" ++ ++ "move $8, %0 \n\t" ++ "move $21, %1 \n\t" ++ "move $22, %2 \n\t" ++ "move $23, %3 \n\t" ++ ++ "1: \n\t" ++ ++ "add $9, $8, $21 \n\t" ++ "add $10, $8, $22 \n\t" ++ ++ "uld $11, ($9) \n\t" ++ "dmtc1 $11, $f0 \n\t" ++ ++ "uld $12, ($9) \n\t" ++ "dmtc1 $12, $f4 \n\t" ++ ++ "pasubub $f10, $f0, $f4 \n\t" ++ "biadd $f0, $f10 \n\t" ++ ++ "add $8, $8, $23 \n\t" ++ ++ "add $9, $8, $21 \n\t" ++ "add $10, $8, $22 \n\t" ++ ++ "uld $11, ($9) \n\t" ++ "dmtc1 $11, $f2 \n\t" ++ ++ "uld $12, ($10) \n\t" ++ "dmtc1 $12, $f6 \n\t" ++ ++ "pasubub $f16, $f2, $f6 \n\t" ++ "biadd $f6, $f16 \n\t" ++ ++ "paddh $f0, $f0, $f6 \n\t" ++ ++ "paddh $f12, $f12, $f0 \n\t" ++ ++ "bltz $8, 1b \n\t" ++ "add $8, $8, $23 \n\t" ++ ++ : "+r" (len) ++ : "r" (blk1 - len), "r" (blk2 - len), "r" ((long)stride) ++ : "$8", "$9", "$10", "$21", "$22", "$23", "$f0", "$f2", "$f4", "$f6", "$f10", "$f16" ++ ); ++} ++ ++static inline void sad8_2_loongson2(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int stride, int h) ++{ ++ long len= -(stride*h); ++ __asm__ volatile( ++ ++// ".set mips3 \n\t" ++ ".align 4 \n\t" ++ ++ "move $8, %0 \n\t" ++ ++ "1: \n\t" ++ "add $9, $8, %1 \n\t" ++ "add $10, $8, %2 \n\t" ++ "add $11, $8, %3 \n\t" ++ ++ "uld $12, ($9) \n\t" ++ "dmtc1 $12, $f0 \n\t" ++ "uld $13, ($10) \n\t" ++ "dmtc1 $13, $f4 \n\t" ++ ++ "pavgb $f0, $f0, $f4 \n\t" ++ ++ "uld $12, ($11) \n\t" ++ "dmtc1 $12, $f4 \n\t" ++ ++ "pasubub $f10, $f0, $f4 \n\t" ++ "biadd $f0, $f10 \n\t" ++ ++ "add $8, $8, %4 \n\t" ++ ++ "add $9, $8, %1 \n\t" ++ "add $10, $8, %2 \n\t" ++ "add $11, $8, %3 \n\t" ++ ++ "uld $12, ($9) \n\t" ++ "dmtc1 $12, $f2 \n\t" ++ "uld $13, ($10) \n\t" ++ "dmtc1 $13, $f6 \n\t" ++ ++ "pavgb $f6, $f6, $f2 \n\t" ++ ++ "uld $12, ($11) \n\t" ++ "dmtc1 $12, $f2 \n\t" ++ ++ "pasubub $f16, $f6, $f2 \n\t" ++ "biadd $f6, $f16 \n\t" ++ ++ "paddh $f0, $f0, $f6 \n\t" ++ "paddh $f12, $f12, $f0 \n\t" ++ ++ "bltz $8, 1b \n\t" ++ "add $8, $8, %4 \n\t" ++ : "+r" (len) ++ : "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" ((long)stride) ++ : "$8", "$9", "$10", "$11", "$12", "$13", "$f0", "$f2", "$f4", "$f6", "$f10", "$f16" ++ ); ++} ++ ++static inline void sad8_4_loongson2(uint8_t *blk1, uint8_t *blk2, int stride, int h) ++{ ++ long len= -(stride*h); ++ __asm__ volatile( ++ ++ ++// ".set mips3 \n\t" ++ ".align 4 \n\t" ++ ++ "ldc1 $f10, "MANGLE(bone)" \n\t" ++ ++ "move $8, %0 \n\t" ++ ++ "1: \n\t" ++ "add $9, $8, %1 \n\t" ++ "add $10, $8, %2 \n\t" ++ "add $11, $8, %3 \n\t" ++ ++ "uld $12, ($9) \n\t" ++ "dmtc1 $12, $f0 \n\t" ++ ++ "uld $13, ($10) \n\t" ++ "dmtc1 $13, $f4 \n\t" ++ ++ "uld $12, 1($9) \n\t" ++ "dmtc1 $12, $f2 \n\t" ++ ++ "uld $13, 1($10) \n\t" ++ "dmtc1 $13, $f6 \n\t" ++ ++ "pavgb $f0, $f0, $f4 \n\t" ++ "pavgb $f6, $f6, $f2 \n\t" ++ ++ "psubusb $f6, $f6, $f10 \n\t" ++ "pavgb $f0, $f0, $f6 \n\t" ++ ++ "uld $13, 1($11) \n\t" ++ "dmtc1 $13, $f4 \n\t" ++ ++ "pasubub $f16, $f0, $f4 \n\t" ++ "biadd $f0, $f16 \n\t" ++ ++ "add $8, $8, %4 \n\t" ++ ++ "add $9, $8, %1 \n\t" ++ "add $10, $8, %2 \n\t" ++ "add $11, $8, %3 \n\t" ++ ++ "uld $12, ($9) \n\t" ++ "dmtc1 $12, $f2 \n\t" ++ "uld $13, ($10) \n\t" ++ "dmtc1 $12, $f6 \n\t" ++ "uld $12, 1($9) \n\t" ++ "dmtc1 $12, $f4 \n\t" ++ "uld $13, 1($10) \n\t" ++ "dmtc1 $12, $f8 \n\t" ++ ++ "pavgb $f2, $f2, $f6 \n\t" ++ "pavgb $f4, $f4, $f8 \n\t" ++ ++ "psubusb $f4, $f4, $f10 \n\t" ++ "pavgb $f4, $f4, $f2 \n\t" ++ ++ "uld $13, ($11) \n\t" ++ "dmtc1 $13, $f2 \n\t" ++ ++ "pasubub $f18, $f4, $f2 \n\t" ++ "biadd $f4, $f18 \n\t" ++ ++ "paddh $f0, $f0, $f4 \n\t" ++ "paddh $f12, $f12, $f0 \n\t" ++ ++ "bltz $8, 1b \n\t" ++ "add $8, $8, %4 \n\t" ++ : "+r" (len) ++ : "r" (blk1 - len), "r" (blk1 - len + stride), "r" (blk2 - len), "r" ((long)stride) ++ : "$8", "$9", "$10", "$11", "$12", "$13", "$f0", "$f2", "$f4", "$f6", "$f8", "$f10", "$f16", "$f18" ++ ); ++} ++ ++static inline int sum_loongson2(void) ++{ ++ int ret; ++ __asm__ volatile( ++// ".set mips3 \n\t" ++ ++ "dmfc1 %0, $f12 \n\t" ++ : "=r" (ret) ++ ); ++ return ret; ++} ++ ++ ++static int sad8_loongson2(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h) ++{ ++ assert(h==8); ++ __asm__ volatile( ++// ".set mips3 \n\t" ++ "xor $f14, $f14, $f14 \n\t" ++ "xor $f12, $f12, $f12 \n\t" ++ : ++ ); ++ ++ sad8_1_loongson2(blk1, blk2, stride, 8); ++ ++ return sum_loongson2(); ++} ++ ++static int sad8_x2_loongson2(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h) ++{ ++ assert(h==8); ++ __asm__ volatile( ++// ".set mips3 \n\t" ++ "xor $f14, $f14, $f14 \n\t" ++ "xor $f12, $f12, $f12 \n\t" ++ ++ "ldc1 $f10, %0 \n\t" ++ :: "m"(round_tab[1]) ++ ); ++ ++ sad8_2_loongson2(blk1, blk1+1, blk2, stride, 8); ++ ++ return sum_loongson2(); ++} ++ ++static int sad8_y2_loongson2(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h) ++{ ++ assert(h==8); ++ __asm__ volatile( ++// ".set mips3 \n\t" ++ "xor $f14, $f14, $f14 \n\t" ++ "xor $f12, $f12, $f12 \n\t" ++ ++ "ldc1 $f10, %0 \n\t" ++ :: "m"(round_tab[1]) ++ ); ++ ++ sad8_2_loongson2(blk1, blk1+stride, blk2, stride, 8); ++ ++ return sum_loongson2(); ++} ++ ++static int sad8_xy2_loongson2(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h) ++{ ++ assert(h==8); ++ __asm__ volatile( ++// ".set mips3 \n\t" ++ "xor $f14, $f14, $f14 \n\t" ++ "xor $f12, $f12, $f12 \n\t" ++ "ldc1 $f10, %0 \n\t" ++ :: "m"(round_tab[2]) ++ ); ++ ++ sad8_4_loongson2(blk1, blk2, stride, 8); ++ ++ return sum_loongson2(); ++} ++ ++static int sad16_loongson2(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h) ++{ ++ __asm__ volatile( ++// ".set mips3 \n\t" ++ "xor $f14, $f14, $f14 \n\t" ++ "xor $f12, $f12, $f12 \n\t":); ++ ++ sad8_1_loongson2(blk1 , blk2 , stride, h); ++ sad8_1_loongson2(blk1+8, blk2+8, stride, h); ++ ++ return sum_loongson2(); ++} ++ ++static int sad16_x2_loongson2(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h) ++{ ++ __asm__ volatile( ++// ".set mips3 \n\t" ++ "xor $f14, $f14, $f14 \n\t" ++ "xor $f12, $f12, $f12 \n\t" ++ "ldc1 $f10, %0 \n\t" ++ :: "m"(round_tab[1]) ++ ); ++ ++ sad8_2_loongson2(blk1 , blk1+1, blk2 , stride, h); ++ sad8_2_loongson2(blk1+8, blk1+9, blk2+8, stride, h); ++ ++ return sum_loongson2(); ++} ++ ++static int sad16_y2_loongson2(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h) ++{ ++ __asm__ volatile( ++// ".set mips3 \n\t" ++ "xor $f14, $f14, $f14 \n\t" ++ "xor $f12, $f12, $f12 \n\t" ++ "ldc1 $f10, %0 \n\t" ++ :: "m"(round_tab[1]) ++ ); ++ ++ sad8_2_loongson2(blk1 , blk1+stride, blk2 , stride, h); ++ sad8_2_loongson2(blk1+8, blk1+stride+8,blk2+8, stride, h); ++ ++ return sum_loongson2(); ++} ++ ++static int sad16_xy2_loongson2(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h) ++{ ++ __asm__ volatile( ++// ".set mips3 \n\t" ++ "xor $f14, $f14, $f14 \n\t" ++ "xor $f12, $f12, $f12 \n\t" ++ "ldc1 $f10, %0 \n\t" ++ :: "m"(round_tab[2]) ++ ); ++ ++ sad8_4_loongson2(blk1 , blk2 , stride, h); ++ sad8_4_loongson2(blk1+8, blk2+8, stride, h); ++ ++ return sum_loongson2(); ++} ++ ++ ++void dsputil_init_pix_loongson2(DSPContext* c, AVCodecContext *avctx) ++{ ++ c->pix_abs[0][0] = sad16_loongson2; ++ c->pix_abs[0][1] = sad16_x2_loongson2; ++ c->pix_abs[0][2] = sad16_y2_loongson2; ++ c->pix_abs[0][3] = sad16_xy2_loongson2; ++ c->pix_abs[1][0] = sad8_loongson2; ++ c->pix_abs[1][1] = sad8_x2_loongson2; ++ c->pix_abs[1][2] = sad8_y2_loongson2; ++ c->pix_abs[1][3] = sad8_xy2_loongson2; ++ ++ c->sad[0]= sad16_loongson2; ++ c->sad[1]= sad8_loongson2; ++} +diff --git a/libavcodec/loongson2/mpegvideo_loongson2.c b/libavcodec/loongson2/mpegvideo_loongson2.c +new file mode 100644 +index 0000000..18d070a +--- /dev/null ++++ b/libavcodec/loongson2/mpegvideo_loongson2.c +@@ -0,0 +1,385 @@ ++/* ++ * The simplest mpeg encoder (well, it was the simplest!) ++ * Copyright (c) 2007-2010 comcat <jiankemeng@gmail.com>. ++ * ++ * Optimized for Loongson2 CPUs by comcat <jiankemeng@gmail.com> ++ * ++ * Based on i386 ++ */ ++ ++#include "dsputil_loongson2.h" ++#include "../mpegvideo.h" ++#include "../avcodec.h" ++ ++extern uint8_t zigzag_direct_noperm[64]; ++extern uint16_t inv_zigzag_direct16[64]; ++ ++static const unsigned long long int mm_wabs __attribute__ ((aligned(8))) = 0xffffffffffffffffULL; ++static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL; ++ ++ ++static void dct_unquantize_h263_intra_loongson2(MpegEncContext *s, ++ DCTELEM *block, int n, int qscale) ++{ ++ long level, qmul, qadd, nCoeffs; ++ ++ qmul = qscale << 1; ++ ++ assert(s->block_last_index[n]>=0 || s->h263_aic); ++ if (!s->h263_aic) { ++ if (n < 4) ++ level = block[0] * s->y_dc_scale; ++ else ++ level = block[0] * s->c_dc_scale; ++ qadd = (qscale - 1) | 1; ++ }else{ ++ qadd = 0; ++ level= block[0]; ++ } ++ if(s->ac_pred) ++ nCoeffs=63; ++ else ++ nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ]; ++ ++ ++ __asm__ volatile( ++// ".set mips3 \n\t" ++ ++ "xor $f12, $f12, $f12 \n\t" ++ "lwc1 $f12, %1 \n\t" ++ ++ "xor $f10, $f10, $f10 \n\t" ++ ++ "packsswh $f12, $f12, $f12 \n\t" ++ ++ "lwc1 $f10, %2 \n\t" ++ ++ "packsswh $f10, $f10, $f10 \n\t" ++ ++ "packsswh $f12, $f12, $f12 \n\t" ++ ++ "xor $f14, $f14, $f14 \n\t" ++ ++ "packsswh $f10, $f10, $f10 \n\t" ++ ++ "xor $f8, $f8, $f8 \n\t" ++ ++ "psubh $f14, $f14, $f10 \n\t" ++ ++ ++ "1: \n\t" ++ "add $12, %0, %3 \n\t" ++ ++ "ldc1 $f0, ($12) \n\t" ++ ++ "ldc1 $f2, 8($12) \n\t" ++ ++ "mov.d $f4, $f0 \n\t" ++ "mov.d $f6, $f2 \n\t" ++ ++ "pmullh $f0, $f0, $f12 \n\t" ++ "pmullh $f2, $f2, $f12 \n\t" ++ ++ "pcmpgth $f4, $f4, $f8 \n\t" ++ "pcmpgth $f6, $f6, $f8 \n\t" ++ ++ "xor $f0, $f0, $f4 \n\t" ++ "xor $f2, $f2, $f6 \n\t" ++ ++ ++ "paddh $f0, $f0, $f14 \n\t" ++ ++ "paddh $f2, $f2, $f14 \n\t" ++ ++ ++ "xor $f4, $f4, $f0 \n\t" ++ ++ "xor $f6, $f6, $f2 \n\t" ++ ++ ++ "pcmpeqh $f0, $f0, $f14 \n\t" ++ ++ "pcmpeqh $f2, $f2, $f14 \n\t" ++ ++ ++ "pandn $f0, $f0, $f4 \n\t" ++ ++ "pandn $f2, $f2, $f6 \n\t" ++ ++ ++ "sdc1 $f0, ($12) \n\t" ++ ++ "sdc1 $f2, 8($12) \n\t" ++ ++ ++ "addiu %3, %3, 16 \n\t" ++ ++ "blez %3, 1b \n\t" ++ "nop \n\t" ++ ::"r" (block+nCoeffs), "m"(qmul), "m" (qadd), "r" (2*(-nCoeffs)) ++ : "memory" ++ ); ++ block[0]= level; ++} ++ ++ ++static void dct_unquantize_h263_inter_loongson2(MpegEncContext *s, ++ DCTELEM *block, int n, int qscale) ++{ ++ long qmul, qadd, nCoeffs; ++ ++ qmul = qscale << 1; ++ qadd = (qscale - 1) | 1; ++ ++ assert(s->block_last_index[n]>=0 || s->h263_aic); ++ ++ nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ]; ++ ++ __asm__ volatile( ++// ".set mips3 \n\t" ++ ++ "xor $f12, $f12, $f12 \n\t" ++ "lwc1 $f12, %1 \n\t" ++ ++ "xor $f10, $f10, $f10 \n\t" ++ ++ "packsswh $f12, $f12, $f12 \n\t" ++ ++ "lwc1 $f10, %2 \n\t" ++ ++ "packsswh $f10, $f10, $f10 \n\t" ++ ++ "xor $f14, $f14, $f14 \n\t" ++ ++ "packsswh $f12, $f12, $f12 \n\t" ++ ++ "packsswh $f10, $f10, $f10 \n\t" ++ ++ "xor $f8, $f8, $f8 \n\t" ++ ++ "psubh $f14, $f14, $f10 \n\t" ++ ++ ++ "1: \n\t" ++ "add $12, %0, %3 \n\t" ++ ++ "ldc1 $f0, ($12) \n\t" ++ ++ "ldc1 $f2, 8($12) \n\t" ++ ++ "mov.d $f4, $f0 \n\t" ++ "mov.d $f6, $f2 \n\t" ++ ++ "pmullh $f0, $f0, $f12 \n\t" ++ ++ "pmullh $f2, $f2, $f12 \n\t" ++ ++ "pcmpgth $f4, $f4, $f8 \n\t" ++ ++ "pcmpgth $f6, $f6, $f8 \n\t" ++ ++ "xor $f0, $f0, $f4 \n\t" ++ ++ "xor $f2, $f2, $f6 \n\t" ++ ++ "paddh $f0, $f0, $f14 \n\t" ++ ++ "paddh $f2, $f2, $f14 \n\t" ++ ++ "xor $f4, $f4, $f0 \n\t" ++ ++ "xor $f6, $f6, $f2 \n\t" ++ ++ "pcmpeqh $f0, $f0, $f14 \n\t" ++ ++ "pcmpeqh $f2, $f2, $f14 \n\t" ++ ++ "pandn $f0, $f0, $f4 \n\t" ++ ++ "pandn $f2, $f2, $f6 \n\t" ++ ++ "sdc1 $f0, ($12) \n\t" ++ ++ "sdc1 $f2, 8($12) \n\t" ++ ++ ++ "addiu %3, %3, 16 \n\t" ++ ++ "blez %3, 1b \n\t" ++ "nop \n\t" ++ ::"r" (block+nCoeffs), "m"(qmul), "m" (qadd), "r" (2*(-nCoeffs)) ++ : "memory" ++ ); ++} ++ ++ ++/* draw the edges of width 'w' of an image of size width, height ++ this mmx version can only handle w==8 || w==16 */ ++ ++static void draw_edges_loongson2(uint8_t *buf, int wrap, int width, int height, int w) ++{ ++ uint8_t *ptr, *last_line; ++ int i; ++ ++ last_line = buf + (height - 1) * wrap; ++ ++ ptr = buf; ++ if(w==8) ++ { ++ __asm__ volatile( ++// ".set mips3 \n\t" ++ ++ "move $9, %0 \n\t" ++ ++ "1: \n\t" ++ ++ "xor $f0, $f0, $f0 \n\t" ++ "lwc1 $f0, ($9) \n\t" ++ ++ "punpcklbh $f0, $f0, $f0 \n\t" ++ ++ "add $12, $9, %2 \n\t" ++ ++ "punpcklhw $f0, $f0, $f0 \n\t" ++ ++ "punpcklwd $f0, $f0, $f0 \n\t" ++ ++ "ldc1 $f2, -8($12) \n\t" ++ ++ "sdc1 $f0, -8($9) \n\t" ++ ++ "punpckhbh $f2, $f2, $f2 \n\t" ++ ++ "add $9, $9, %1 \n\t" ++ ++ "punpckhhw $f2, $f2, $f2 \n\t" ++ ++ "sub $13, $9, %3 \n\t" ++ ++ "punpckhwd $f2, $f2, $f2 \n\t" ++ ++ "bltz $13, 1b \n\t" ++ ++ "sdc1 $f2, ($12) \n\t" ++ ++ : "+r" (ptr) ++ : "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height) ++ : "$9", "$13", "$12", "$f2", "$f0" ++ ); ++ } ++ else ++ { ++ __asm__ volatile( ++// ".set mips3 \n\t" ++ ++ "move $8, %0 \n\t" ++ ++ "1: \n\t" ++ ++ "xor $f0, $f0, $f0 \n\t" ++ "lwc1 $f0, ($8) \n\t" ++ ++ "punpcklbh $f0, $f0, $f0 \n\t" ++ "punpcklhw $f0, $f0, $f0 \n\t" ++ "punpcklwd $f0, $f0, $f0 \n\t" ++ ++ "sdc1 $f0, -8($8) \n\t" ++ "sdc1 $f0, -16($8) \n\t" ++ ++ "add $15, $8, %2 \n\t" ++ "ldc1 $f2, -8($15) \n\t" ++ ++ "punpckhbh $f2, $f2, $f2 \n\t" ++ "punpckhhw $f2, $f2, $f2 \n\t" ++ "punpckhwd $f2, $f2, $f2 \n\t" ++ ++ "sdc1 $f2, ($15) \n\t" ++ "sdc1 $f2, 8($15) \n\t" ++ ++ "add $8, $8, %1 \n\t" ++ ++ "sub $16, $8, %3 \n\t" ++ "bltz $16, 1b \n\t" ++ "nop \n\t" ++ : "+r" (ptr) ++ : "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height) ++ : "$8", "$15", "$16", "$f0", "$f2" ++ ); ++ } ++ ++ for(i=0;i<w;i+=4) { ++ ++ ptr= buf - (i + 1) * wrap - w; ++ __asm__ volatile( ++// ".set mips3 \n\t" ++ "move $8, %0 \n\t" ++ ++ "1: \n\t" ++ ++ "add $9, $8, %1 \n\t" ++ "ldc1 $f0, ($9) \n\t" ++ ++ "add $10, $8, %2 \n\t" ++ "add $11, $10, %2 \n\t" ++ "add $12, $8, %3 \n\t" ++ ++ "sdc1 $f0, ($8) \n\t" ++ "sdc1 $f0, ($10) \n\t" ++ "sdc1 $f0, ($11) \n\t" ++ "sdc1 $f0, ($12) \n\t" ++ ++ "addiu $8, $8, 8 \n\t" ++ ++ "sub $13, $8, %4 \n\t" ++ ++ "bltz $13, 1b \n\t" ++ "nop \n\t" ++ ++ : "+r" (ptr) ++ : "r" (((long)buf - (long)ptr - w)), "r" ((long)-wrap), "r" ((long)-wrap*3), "r" (*(ptr+width+2*w)) ++ : "$8", "$9", "$10", "$11", "$12", "$13", "$f0" ++ ); ++ ++ ptr= last_line + (i + 1) * wrap - w; ++ ++ __asm__ volatile( ++// ".set mips3 \n\t" ++ ++ "move $9, %0 \n\t" ++ ++ "1: \n\t" ++ ++ "add $10, $9, %1 \n\t" ++ "ldc1 $f0, ($10) \n\t" ++ ++ "add $11, $9, %2 \n\t" ++ "add $12, $11, %2 \n\t" ++ "add $13, $9, %3 \n\t" ++ ++ "sdc1 $f0, ($9) \n\t" ++ "sdc1 $f0, ($11) \n\t" ++ "sdc1 $f0, ($12) \n\t" ++ "sdc1 $f0, ($13) \n\t" ++ ++ "addiu $9, $9, 8 \n\t" ++ ++ "sub $14, $9, %4 \n\t" ++ ++ "bltz $14, 1b \n\t" ++ "nop \n\t" ++ : "+r" (ptr) ++ : "r" ((long)last_line - (long)ptr - w), "r" ((long)wrap), "r" ((long)wrap*3), "r" (ptr+width+2*w) ++ : "$9", "$10", "$11", "$12", "$13", "$14", "$f0" ++ ++ ); ++ } ++} ++ ++void MPV_common_init_loongson2(MpegEncContext *s) ++{ ++ s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_loongson2; ++ s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_loongson2; ++ ++// draw_edges = draw_edges_loongson2; ++ ++} +diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile +index 3f4da68..73e4d56 100644 +--- a/libavcodec/mips/Makefile ++++ b/libavcodec/mips/Makefile +@@ -1,3 +1,9 @@ + OBJS-$(HAVE_MMI) += ps2/dsputil_mmi.o \ + ps2/idct_mmi.o \ + ps2/mpegvideo_mmi.o \ ++ ++OBJS-$(HAVE_LOONGSON2MMI) += loongson2/idct_loongson2.o \ ++ loongson2/dsputil_loongson2.o \ ++ loongson2/idct_loongson2_xvid.o \ ++ loongson2/mpegvideo_loongson2.o \ ++ loongson2/motion_est_loongson2.o +diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c +index b47ff9a..af92552 100644 +--- a/libavcodec/mpegvideo.c ++++ b/libavcodec/mpegvideo.c +@@ -176,6 +176,9 @@ av_cold int ff_dct_common_init(MpegEncContext *s) + #elif ARCH_BFIN + MPV_common_init_bfin(s); + #endif ++#ifdef HAVE_LOONGSON2MMI ++ MPV_common_init_loongson2(s); ++#endif + + /* load & permutate scantables + note: only wmv uses different ones +diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h +index 5302be9..8d09906 100644 +--- a/libavcodec/mpegvideo.h ++++ b/libavcodec/mpegvideo.h +@@ -689,6 +689,7 @@ int MPV_encode_picture(AVCodecContext *avctx, unsigned char *buf, int buf_size, + void MPV_common_init_mmx(MpegEncContext *s); + void MPV_common_init_axp(MpegEncContext *s); + void MPV_common_init_mlib(MpegEncContext *s); ++void MPV_common_init_loongson2(MpegEncContext *s); + void MPV_common_init_mmi(MpegEncContext *s); + void MPV_common_init_arm(MpegEncContext *s); + void MPV_common_init_altivec(MpegEncContext *s); +diff --git a/libavcodec/options.c b/libavcodec/options.c +index 7ca1062..c05b3f4 100644 +--- a/libavcodec/options.c.orig 2011-06-22 12:52:11.584428161 -0300 ++++ b/libavcodec/options.c 2011-06-22 12:52:25.003143367 -0300 +@@ -219,6 +219,8 @@ static const AVOption options[]={ + {"simple", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_IDCT_SIMPLE }, INT_MIN, INT_MAX, V|E|D, "idct"}, + {"simplemmx", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_IDCT_SIMPLEMMX }, INT_MIN, INT_MAX, V|E|D, "idct"}, + {"libmpeg2mmx", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_IDCT_LIBMPEG2MMX }, INT_MIN, INT_MAX, V|E|D, "idct"}, ++{"libmpeg2loongson2", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_LIBMPEG2LOONGSON2, INT_MIN, INT_MAX, V|E|D, "idct"}, ++{"xvidloongson2", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_XVIDLOONGSON2, INT_MIN, INT_MAX, V|E|D, "idct"}, + {"ps2", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_IDCT_PS2 }, INT_MIN, INT_MAX, V|E|D, "idct"}, + {"mlib", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_IDCT_MLIB }, INT_MIN, INT_MAX, V|E|D, "idct"}, + {"arm", NULL, 0, FF_OPT_TYPE_CONST, {.dbl = FF_IDCT_ARM }, INT_MIN, INT_MAX, V|E|D, "idct"}, diff --git a/extra/koffice/PKGBUILD b/extra/koffice/PKGBUILD index 1884dd92c..c13780881 100644 --- a/extra/koffice/PKGBUILD +++ b/extra/koffice/PKGBUILD @@ -38,18 +38,23 @@ pkgname=( ) pkgver=2.3.3 pkgrel=4 -arch=('i686' 'x86_64') +arch=('i686' 'x86_64' 'mips64el') url='http://koffice.kde.org' license=('GPL' 'LGPL' 'FDL') makedepends=('pkg-config' 'cmake' 'automoc4' 'boost' 'eigen' 'gsl' 'lcms' 'glew' 'qimageblitz' 'kdepimlibs' 'pstoedit' 'poppler-qt' 'libwpd' 'libwpg' 'opengtl' 'kdegraphics-libs') +[ "$CARCH" = "mips64el" ] && \ +makedepends=('pkg-config' 'cmake' 'automoc4' 'boost' 'eigen' 'gsl' 'lcms' + 'glew' 'qimageblitz' 'kdepimlibs' 'pstoedit' 'poppler-qt' 'libwpd' + 'libwpg' 'kdegraphics-libs' 'libgsf') groups=('koffice') source=("http://download.kde.org/stable/${pkgbase}-${pkgver}/${pkgbase}-${pkgver}.tar.bz2" 'kde4-koffice-libwpg02.patch' 'gcc46.patch') sha256sums=('31ba0d98c0d29c7b8ab97efdeb6c618b82177b2b0ec85da088178254da43c099' '69106deb4081d71b5bd8f2e4f5af67ca689e4ce9f2bb49c11dbce5fb3409d612' 'e095c0b2bbedf41da6535a68b2275464dafd3f194566028d0135322f596e4739') +options=(!distcc) build() { cd "${srcdir}/${pkgbase}-${pkgver}" @@ -60,6 +65,7 @@ build() { cd "${srcdir}" mkdir build cd build + CXX="g++" \ cmake ../${pkgbase}-${pkgver} \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_SKIP_RPATH=ON \ @@ -216,6 +222,10 @@ package_koffice-krita(){ depends=('hicolor-icon-theme' 'glew' 'qimageblitz' 'koffice-libs' \ 'koffice-templates' 'koffice-plugins' 'poppler-qt' 'shared-mime-info' \ 'openexr' 'opengtl') +[ "$CARCH" = "mips64el" ] && \ + depends=('hicolor-icon-theme' 'glew' 'qimageblitz' 'koffice-libs' \ + 'koffice-templates' 'koffice-plugins' 'poppler-qt' 'shared-mime-info' \ + 'openexr') optdepends=('koffice-filters: import/export filters') install=krita.install cd "${srcdir}/build/krita" diff --git a/extra/pixman/PKGBUILD b/extra/pixman/PKGBUILD index bf392b456..3c842ff4b 100644 --- a/extra/pixman/PKGBUILD +++ b/extra/pixman/PKGBUILD @@ -4,18 +4,22 @@ pkgname=pixman pkgver=0.22.0 -pkgrel=1 +pkgrel=2 pkgdesc="Pixman library" arch=(i686 x86_64 'mips64el') url="http://xorg.freedesktop.org" license=('custom') depends=('glibc') options=('!libtool') -source=(http://xorg.freedesktop.org/releases/individual/lib/${pkgname}-${pkgver}.tar.bz2) -sha1sums=('d24ea233755d7dce9f0d93136ad99fba8d4e4fa0') +source=(http://xorg.freedesktop.org/releases/individual/lib/${pkgname}-${pkgver}.tar.bz2 + pixman-loongson2f.patch) +sha1sums=('d24ea233755d7dce9f0d93136ad99fba8d4e4fa0' + 'ce4d69ea341f21fdc30f6d401ee479cd3571dab3') build() { cd "${srcdir}/${pkgname}-${pkgver}" + [ "$CARCH" = "mips64el" ] && patch -Np1 -i $srcdir/pixman-loongson2f.patch + autoreconf -i ./configure --prefix=/usr --disable-static make } diff --git a/extra/pixman/pixman-loongson2f.patch b/extra/pixman/pixman-loongson2f.patch new file mode 100644 index 000000000..15e01cb6b --- /dev/null +++ b/extra/pixman/pixman-loongson2f.patch @@ -0,0 +1,2745 @@ +diff -urN pixman//configure.ac Pixman.Loongson//configure.ac +--- pixman//configure.ac 2010-12-25 18:46:00.018699000 +0800 ++++ Pixman.Loongson//configure.ac 2010-12-25 18:39:15.298778000 +0800 +@@ -264,6 +264,43 @@ + ]) + + dnl =========================================================================== ++dnl Check for Loongson SIMD ++ ++have_loongson_intrinsics=no ++AC_MSG_CHECKING(whether to use Loongson SIMD intrinsics) ++ ++AC_COMPILE_IFELSE([ ++#if defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 4)) ++error "Need GCC >= 4.4 for Loongson SIMD compilation" ++#endif ++int main () { ++ /* Test with a loongson SIMD instruction. */ ++ asm volatile ( ".set arch = loongson2f \n\t" "and \$f0, \$f0, \$f0 \n\t" : : : "cc", "memory" ); ++ return 0; ++}], have_loongson_intrinsics=yes) ++ ++ ++AC_ARG_ENABLE(loongson, ++ [AC_HELP_STRING([--disable-loongson], ++ [disable Loongson fast paths])], ++ [enable_loongson=$enableval], [enable_loongson=auto]) ++ ++if test $enable_loongson = no ; then ++ have_loongson_intrinsics=disabled ++fi ++ ++if test $have_loongson_intrinsics = yes ; then ++ AC_DEFINE(USE_LS, 1, [use Loongson compiler intrinsics]) ++fi ++ ++AC_MSG_RESULT($have_loongson_intrinsics) ++if test $enable_loongson = yes && test $have_loongson_intrinsics = no ; then ++ AC_MSG_ERROR([Loongson intrinsics not detected]) ++fi ++ ++AM_CONDITIONAL(USE_LS, test $have_loongson_intrinsics = yes) ++ ++dnl =========================================================================== + dnl Check for MMX + + if test "x$MMX_CFLAGS" = "x" ; then +diff -urN pixman//pixman/Makefile.am Pixman.Loongson//pixman/Makefile.am +--- pixman//pixman/Makefile.am 2010-12-25 18:46:00.025027000 +0800 ++++ Pixman.Loongson//pixman/Makefile.am 2010-12-25 18:39:15.303599000 +0800 +@@ -55,6 +55,19 @@ + pixman-combine.h.template solaris-hwcap.mapfile pixman-x64-mmx-emulation.h + CLEANFILES = pixman-combine32.c pixman-combine64.c pixman-combine32.h pixman-combine64.h + ++# loongson code ++if USE_LS ++noinst_LTLIBRARIES += libpixman-ls.la ++libpixman_ls_la_SOURCES = \ ++ pixman-ls.c ++libpixman_ls_la_CFLAGS = $(DEP_CFLAGS) $(LS_CFLAGS) ++libpixman_ls_la_LIBADD = $(DEP_LIBS) ++libpixman_1_la_LDFLAGS += $(LS_LDFLAGS) ++libpixman_1_la_LIBADD += libpixman-ls.la ++ ++ASM_CFLAGS_ls=$(LS_CFLAGS) ++endif ++ + # mmx code + if USE_MMX + noinst_LTLIBRARIES += libpixman-mmx.la +diff -urN pixman//pixman/pixman-combine-ls.c Pixman.Loongson//pixman/pixman-combine-ls.c +--- pixman//pixman/pixman-combine-ls.c 1970-01-01 08:00:00.000000000 +0800 ++++ Pixman.Loongson//pixman/pixman-combine-ls.c 2010-12-25 18:39:15.344171000 +0800 +@@ -0,0 +1,911 @@ ++static force_inline uint32_t ++combine (const uint32_t *src, const uint32_t *mask) ++{ ++ uint32_t ssrc = *src; ++ ++ if (mask) ++ { ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load8888r(%1,$f22) ++ load8888r(%0,$f20) ++ expand_alpha($f22,$f22)
++ pix_multiply($f20,$f22)
++ store8888r($f8,%0) ++ :"+r"(ssrc):"r"(*mask):clobber ++ ); ++ } ++ return ssrc; ++} ++ ++static void ++ls_combine_saturate_u (pixman_implementation_t *imp, ++ pixman_op_t op, ++ uint32_t * dest, ++ const uint32_t * src, ++ const uint32_t * mask, ++ int width) ++{ ++ const uint32_t *end = dest + width; ++ ++ while (dest < end) ++ { ++ uint32_t s = combine (src, mask); ++ uint32_t d = *dest; ++ ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load8888r(%1,$f22) ++ load8888r(%0,$f20) ++ :"+r"(d):"r"(s):clobber ++ ); ++ ++ uint32_t sa = s >> 24; ++ uint32_t da = ~d >> 24; ++ ++ if (sa > da) ++ { ++ uint32_t dds = DIV_UN8 (da, sa) << 24; ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load8888r(%0,$f24) ++ expand_alpha($f24,$f24) ++ pix_multiply($f22,$f24) ++ save_to($f22) ++ ::"r"(dds):clobber ++ ); ++ } ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ pix_add($f20,$f22) ++ store8888r($f8,%0) ++ :"=r"(*dest)::clobber ++ ); ++ ++ ++src; ++ ++dest; ++ if (mask) ++ mask++; ++ } ++} ++static void ++ls_combine_out_u (pixman_implementation_t *imp, ++ pixman_op_t op, ++ uint32_t * dest, ++ const uint32_t * src, ++ const uint32_t * mask, ++ int width) ++{ ++ const uint32_t *end = dest + width; ++ ++ while (dest < end) ++ { ++ if (mask) ++ { ++ ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ ++ load8888r(%2,$f22) ++ load8888r(%1,$f20) ++ expand_alpha($f22,$f22) ++ pix_multiply($f20,$f22) ++ save_to ($f20) ++ ++ load8888r(%0,$f24) ++ expand_alpha($f24,$f24) ++ negate($f24,$f24) ++ pix_multiply($f20,$f24) ++ store8888r($f8,%0) ++ :"+r"(*dest):"r"(*src),"r"(*mask):clobber ++ ); ++ ++ mask++; ++ }else { ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ ++ ++ load8888r(%1,$f20) ++ ++ load8888r(%0,$f24) ++ expand_alpha($f24,$f24) ++ negate($f24,$f24) ++ pix_multiply($f20,$f24) ++ store8888r($f8,%0) ++ :"+r"(*dest):"r"(*src):clobber ++ ); ++ ++ } ++ ++dest; ++ ++src; ++ } ++} ++ ++static void ++ls_combine_out_reverse_u (pixman_implementation_t *imp, ++ pixman_op_t op, ++ uint32_t * dest, ++ const uint32_t * src, ++ const uint32_t * mask, ++ int width) ++{ ++ const uint32_t *end = dest + width; ++ ++ while (dest < end) ++ { ++ if (mask) ++ { ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ ++ load8888r(%2,$f22) ++ load8888r(%1,$f20) ++ expand_alpha($f22,$f22) ++ pix_multiply($f20,$f22) ++ save_to ($f20) ++ ++ load8888r(%0,$f24) ++ expand_alpha($f20,$f20) ++ negate($f20,$f20) ++ pix_multiply($f20,$f24) ++ store8888r($f8,%0) ++ :"+r"(*dest):"r"(*src),"r"(*mask):clobber ++ ); ++ ++ mask++; ++ }else{ ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ ++ load8888r(%1,$f20) ++ ++ load8888r(%0,$f24) ++ expand_alpha($f20,$f20) ++ negate($f20,$f20) ++ pix_multiply($f20,$f24) ++ store8888r($f8,%0) ++ :"+r"(*dest):"r"(*src):clobber ++ ); ++ } ++ ++dest; ++ ++src; ++ ++ } ++} ++ ++static void ++ls_combine_out_ca (pixman_implementation_t *imp, ++ pixman_op_t op, ++ uint32_t * dest, ++ const uint32_t * src, ++ const uint32_t * mask, ++ int width) ++{ ++ const uint32_t *end = src + width; ++ ++ while (src < end) ++ { ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load8888r(%2,$f22) ++ load8888r(%1,$f20) ++ load8888r(%0,$f24) ++ expand_alpha($f24,$f26) ++ negate($f26,$f26) ++ pix_multiply($f20,$f22) ++ save_to($f20) ++ pix_multiply($f20,$f26) ++ store8888r($f8,%0) ++ :"+r"(*dest):"r"(*src),"r"(*mask):clobber ++ ); ++ ++src; ++ ++dest; ++ ++mask; ++ } ++} ++ ++static void ++ls_combine_out_reverse_ca (pixman_implementation_t *imp, ++ pixman_op_t op, ++ uint32_t * dest, ++ const uint32_t * src, ++ const uint32_t * mask, ++ int width) ++{ ++ const uint32_t *end = src + width; ++ ++ while (src < end) ++ { ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load8888r(%2,$f22) ++ load8888r(%1,$f20) ++ load8888r(%0,$f24) ++ expand_alpha($f20,$f28) ++ pix_multiply($f22,$f28) ++ save_to($f22) ++ negate($f22,$f22) ++ pix_multiply($f24,$f22) ++ store8888r($f8,%0) ++ :"+r"(*dest):"r"(*src),"r"(*mask):clobber ++ ); ++ ++src; ++ ++dest; ++ ++mask; ++ } ++} ++ ++ ++static void ++ls_combine_atop_u (pixman_implementation_t *imp, ++ pixman_op_t op, ++ uint32_t * dest, ++ const uint32_t * src, ++ const uint32_t * mask, ++ int width) ++{ ++ const uint32_t *end = dest + width; ++ ++ while (dest < end) ++ { ++ if (mask) ++ { ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ ++ load8888r(%2,$f22) ++ load8888r(%1,$f20) ++ expand_alpha($f22,$f22) ++ pix_multiply($f20,$f22) ++ save_to ($f20) ++ ++ load8888r(%0,$f24) ++ expand_alpha($f20,$f26) ++ expand_alpha($f24,$f28) ++ negate($f26,$f26) ++ pix_add_mul($f20,$f28,$f24,$f26) ++ store8888r($f8,%0) ++ :"+r"(*dest):"r"(*src),"r"(*mask):clobber ++ ); ++ ++ mask++; ++ }else { ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load8888r(%1,$f20) ++ ++ load8888r(%0,$f24) ++ expand_alpha($f20,$f26) ++ expand_alpha($f24,$f28) ++ negate($f26,$f26) ++ pix_add_mul($f20,$f28,$f24,$f26) ++ store8888r($f8,%0) ++ :"+r"(*dest):"r"(*src):clobber ++ ); ++ } ++ ++dest; ++ ++src; ++ ++ } ++} ++ ++static void ++ls_combine_atop_reverse_u (pixman_implementation_t *imp, ++ pixman_op_t op, ++ uint32_t * dest, ++ const uint32_t * src, ++ const uint32_t * mask, ++ int width) ++{ ++ const uint32_t *end; ++ ++ end = dest + width; ++ ++ while (dest < end) ++ { ++ if (mask){ ++ ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ ++ load8888r(%2,$f22) ++ load8888r(%1,$f20) ++ expand_alpha($f22,$f22) ++ pix_multiply($f20,$f22) ++ save_to ($f20) ++ ++ load8888r(%0,$f24) ++ expand_alpha($f20,$f26) ++ expand_alpha($f24,$f28) ++ negate($f28,$f28) ++ pix_add_mul($f20,$f28,$f24,$f26) ++ store8888r($f8,%0) ++ :"+r"(*dest):"r"(*src),"r"(*mask):clobber ++ ); ++ mask++; ++ }else{ ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ ++ load8888r(%1,$f20) ++ ++ load8888r(%0,$f24) ++ expand_alpha($f20,$f26) ++ expand_alpha($f24,$f28) ++ negate($f28,$f28) ++ pix_add_mul($f20,$f28,$f24,$f26) ++ store8888r($f8,%0) ++ :"+r"(*dest):"r"(*src):clobber ++ ); ++ } ++ ++dest; ++ ++src; ++ } ++} ++ ++ ++static void ++ls_combine_atop_ca (pixman_implementation_t *imp, ++ pixman_op_t op, ++ uint32_t * dest, ++ const uint32_t * src, ++ const uint32_t * mask, ++ int width) ++{ ++ const uint32_t *end = src + width; ++ ++ while (src < end) ++ { ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load8888r(%2,$f22) ++ load8888r(%1,$f20) ++ load8888r(%0,$f24) ++ expand_alpha($f24,$f26) ++ expand_alpha($f20,$f28) ++ pix_multiply($f20,$f22) ++ save_to($f20) ++ pix_multiply($f22,$f28) ++ save_to($f22) ++ negate($f22,$f22) ++ pix_add_mul($f24,$f22,$f20,$f26) ++ store8888r($f8,%0) ++ :"+r"(*dest):"r"(*src),"r"(*mask):clobber ++ ); ++ ++src; ++ ++dest; ++ ++mask; ++ } ++} ++ ++static void ++ls_combine_atop_reverse_ca (pixman_implementation_t *imp, ++ pixman_op_t op, ++ uint32_t * dest, ++ const uint32_t * src, ++ const uint32_t * mask, ++ int width) ++{ ++ const uint32_t *end = src + width; ++ ++ while (src < end) ++ { ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load8888r(%2,$f22) ++ load8888r(%1,$f20) ++ load8888r(%0,$f24) ++ expand_alpha($f24,$f26) ++ expand_alpha($f20,$f28) ++ pix_multiply($f20,$f22) ++ save_to($f20) ++ pix_multiply($f22,$f28) ++ save_to($f22) ++ negate($f26,$f26) ++ pix_add_mul($f24,$f22,$f20,$f26) ++ store8888r($f8,%0) ++ :"+r"(*dest):"r"(*src),"r"(*mask):clobber ++ ); ++ ++src; ++ ++dest; ++ ++mask; ++ } ++} ++ ++static void ++ls_combine_xor_u (pixman_implementation_t *imp, ++ pixman_op_t op, ++ uint32_t * dest, ++ const uint32_t * src, ++ const uint32_t * mask, ++ int width) ++{ ++ const uint32_t *end = dest + width; ++ ++ while (dest < end) ++ { ++ if (mask) ++ { ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load8888r(%2,$f22) ++ load8888r(%1,$f20) ++ expand_alpha($f22,$f22) ++ pix_multiply($f20,$f22) ++ save_to ($f20) ++ ++ ++ load8888r(%0,$f24) ++ expand_alpha($f20,$f26) ++ expand_alpha($f24,$f28) ++ negate($f26,$f26) ++ negate($f28,$f28) ++ pix_add_mul($f20,$f28,$f24,$f26) ++ store8888r($f8,%0) ++ :"+r"(*dest):"r"(*src),"r"(*mask):clobber ++ ); ++ mask++; ++ }else{ ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load8888r(%1,$f20) ++ ++ load8888r(%0,$f24) ++ expand_alpha($f20,$f26) ++ expand_alpha($f24,$f28) ++ negate($f26,$f26) ++ negate($f28,$f28) ++ pix_add_mul($f20,$f28,$f24,$f26) ++ store8888r($f8,%0) ++ :"+r"(*dest):"r"(*src):clobber ++ ); ++ } ++ ++dest; ++ ++src; ++ ++ } ++} ++ ++static void ++ls_combine_xor_ca (pixman_implementation_t *imp, ++ pixman_op_t op, ++ uint32_t * dest, ++ const uint32_t * src, ++ const uint32_t * mask, ++ int width) ++{ ++ const uint32_t *end = src + width; ++ ++ while (src < end) ++ { ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load8888r(%2,$f22) ++ load8888r(%1,$f20) ++ load8888r(%0,$f24) ++ expand_alpha($f24,$f26) ++ expand_alpha($f20,$f28) ++ pix_multiply($f20,$f22) ++ save_to($f20) ++ pix_multiply($f22,$f28) ++ save_to($f22) ++ negate($f26,$f26) ++ negate($f22,$f22) ++ pix_add_mul($f24,$f22,$f20,$f26) ++ store8888r($f8,%0) ++ :"+r"(*dest):"r"(*src),"r"(*mask):clobber ++ ); ++ ++src; ++ ++dest; ++ ++mask; ++ } ++} ++ ++ ++static void ++ls_combine_in_reverse_u (pixman_implementation_t *imp, ++ pixman_op_t op, ++ uint32_t * dest, ++ const uint32_t * src, ++ const uint32_t * mask, ++ int width) ++{ ++ const uint32_t *end = dest + width; ++ ++ while (dest < end) ++ { ++ ++ if (mask) ++ { ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load8888r(%2,$f22) ++ load8888r(%1,$f20) ++ expand_alpha($f22,$f22) ++ pix_multiply($f20,$f22) ++ save_to ($f20) ++ ++ load8888r(%0,$f24) ++ expand_alpha($f20,$f26) ++ pix_multiply($f24,$f26) ++ store8888r($f8,%0) ++ :"+r"(*dest):"r"(*src),"r"(*mask):clobber ++ ); ++ mask++; ++ } else { ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load8888r(%1,$f20) ++ ++ load8888r(%0,$f24) ++ expand_alpha($f20,$f26) ++ pix_multiply($f24,$f26) ++ store8888r($f8,%0) ++ :"+r"(*dest):"r"(*src):clobber ++ ); ++ } ++ ++dest; ++ ++src; ++ } ++} ++ ++static void ++ls_combine_in_reverse_ca (pixman_implementation_t *imp, ++ pixman_op_t op, ++ uint32_t * dest, ++ const uint32_t * src, ++ const uint32_t * mask, ++ int width) ++{ ++ const uint32_t *end = src + width; ++ ++ while (src < end) ++ { ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load8888r(%2,$f22) ++ load8888r(%1,$f20) ++ load8888r(%0,$f24) ++ expand_alpha($f20,$f20) ++ pix_multiply($f22,$f20) ++ save_to($f26) ++ pix_multiply($f24,$f26) ++ store8888r($f8,%0) ++ :"+r"(*dest):"r"(*src),"r"(*mask):clobber ++ ); ++ ++src; ++ ++dest; ++ ++mask; ++ } ++} ++ ++static void ++ls_combine_in_u (pixman_implementation_t *imp, ++ pixman_op_t op, ++ uint32_t * dest, ++ const uint32_t * src, ++ const uint32_t * mask, ++ int width) ++{ ++ const uint32_t *end = dest + width; ++ ++ while (dest < end) ++ { ++ if (mask) ++ { ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load8888r(%2,$f22) ++ load8888r(%1,$f20) ++ expand_alpha($f22,$f22) ++ pix_multiply($f20,$f22) ++ save_to ($f20) ++ ++ load8888r(%0,$f24) ++ expand_alpha($f24,$f24) ++ pix_multiply($f20,$f24) ++ store8888r($f8,%0) ++ :"+r"(*dest):"r"(*src),"r"(*mask):clobber ++ ); ++ mask++; ++ } else { ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load8888r(%1,$f20) ++ ++ load8888r(%0,$f24) ++ expand_alpha($f24,$f24) ++ pix_multiply($f20,$f24) ++ store8888r($f8,%0) ++ :"+r"(*dest):"r"(*src):clobber ++ ); ++ ++ } ++ ++dest; ++ ++src; ++ } ++} ++ ++static void ++ls_combine_in_ca (pixman_implementation_t *imp, ++ pixman_op_t op, ++ uint32_t * dest, ++ const uint32_t * src, ++ const uint32_t * mask, ++ int width) ++{ ++ const uint32_t *end = src + width; ++ ++ while (src < end) ++ { ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load8888r(%2,$f22) ++ load8888r(%1,$f20) ++ load8888r(%0,$f24) ++ expand_alpha($f24,$f24) ++ pix_multiply($f20,$f22) ++ save_to($f26) ++ pix_multiply($f26,$f24) ++ store8888r($f8,%0) ++ :"+r"(*dest):"r"(*src),"r"(*mask):clobber ++ ); ++ ++src; ++ ++dest; ++ ++mask; ++ } ++ } ++static void ++ls_combine_src_ca (pixman_implementation_t *imp, ++ pixman_op_t op, ++ uint32_t * dest, ++ const uint32_t * src, ++ const uint32_t * mask, ++ int width) ++{ ++ const uint32_t *end = src + width; ++ ++ while (src < end) ++ { ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load8888r(%2,$f22) ++ load8888r(%1,$f20) ++ pix_multiply($f20,$f22) ++ store8888r($f8,%0) ++ :"+r"(*dest):"r"(*src),"r"(*mask):clobber ++ ); ++ ++src; ++ ++mask; ++ ++dest; ++ } ++ ++} ++ ++ ++static void ++ls_combine_over_u (pixman_implementation_t *imp, ++ pixman_op_t op, ++ uint32_t * dest, ++ const uint32_t * src, ++ const uint32_t * mask, ++ int width) ++{ ++ const uint32_t *end = dest + width; ++ ++ while (dest < end) ++ { ++ ++ uint32_t ssrc = combine (src, mask); ++ uint32_t a = ssrc >> 24; ++ ++ if (a == 0xff) ++ { ++ *dest = ssrc; ++ } ++ else if (ssrc) ++ { ++ ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load8888r(%1,$f20) ++ ++ expand_alpha($f20,$f24) ++ load8888r(%0,$f26) ++ over($f20,$f24,$f26) ++ store8888r($f8,%0) ++ :"+r"(*dest):"r"(ssrc):clobber ++ ); ++ } ++ ++ ++dest; ++ ++src; ++ if (mask) ++ ++mask; ++ } ++} ++ ++static void ++ls_combine_over_reverse_u (pixman_implementation_t *imp, ++ pixman_op_t op, ++ uint32_t * dest, ++ const uint32_t * src, ++ const uint32_t * mask, ++ int width) ++{ ++ const uint32_t *end = dest + width; ++ ++ while (dest < end) ++ { ++ if (mask) ++ { ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ ++ load8888r(%2,$f22) ++ load8888r(%1,$f20) ++ expand_alpha($f22,$f22) ++ pix_multiply($f20,$f22) ++ save_to ($f20) ++ ++ load8888r(%0,$f26) ++ expand_alpha($f26,$f28) ++ over($f26,$f28,$f20) ++ store8888r($f8,%0) ++ :"+r"(*dest):"r"(*src),"r"(*mask):clobber ++ ); ++ mask++; ++ }else{ ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load8888r(%1,$f20) ++ ++ load8888r(%0,$f26) ++ expand_alpha($f26,$f28) ++ over($f26,$f28,$f20) ++ store8888r($f8,%0) ++ :"+r"(*dest):"r"(*src):clobber ++ ); ++ ++ } ++ ++dest; ++ ++src; ++ } ++} ++ ++ ++static void ++ls_combine_over_ca (pixman_implementation_t *imp, ++ pixman_op_t op, ++ uint32_t * dest, ++ const uint32_t * src, ++ const uint32_t * mask, ++ int width) ++{ ++ const uint32_t *end = src + width; ++ ++ while (src < end) ++ { ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load8888r(%0,$f20) ++ load8888r(%1,$f22) ++ load8888r(%2,$f24) ++ expand_alpha($f22,$f26) ++ in_over($f22,$f26,$f24,$f20) ++ store8888r($f8,%0) ++ :"+r"(*dest):"r"(*src),"r"(*mask):clobber ++ ); ++ ++src; ++ ++dest; ++ ++mask; ++ } ++ ++} ++ ++static void ++ls_combine_over_reverse_ca (pixman_implementation_t *imp, ++ pixman_op_t op, ++ uint32_t * dest, ++ const uint32_t * src, ++ const uint32_t * mask, ++ int width) ++{ ++ const uint32_t *end = src + width; ++ ++ while (src < end) ++ { ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load8888r(%0,$f20) ++ load8888r(%1,$f22) ++ load8888r(%2,$f24) ++ in($f22,$f24) ++ save_to($f22) ++ expand_alpha($f20,$f28) ++ over($f20,$f28,$f22) ++ store8888r($f8,%0) ++ :"+r"(*dest):"r"(*src),"r"(*mask):clobber ++ ); ++ ++src; ++ ++dest; ++ ++mask; ++ } ++ ++} ++ ++static void ++ls_combine_add_u (pixman_implementation_t *imp, ++ pixman_op_t op, ++ uint32_t * dest, ++ const uint32_t * src, ++ const uint32_t * mask, ++ int width) ++{ ++ const uint32_t *end = dest + width; ++ ++ while (dest < end) ++ { ++ ++ if (mask) ++ { ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load8888r(%2,$f22) ++ load8888r(%1,$f20) ++ expand_alpha($f22,$f22) ++ pix_multiply($f20,$f22) ++ save_to ($f20) ++ ++ load8888r(%0,$f22) ++ pix_add($f20,$f22) ++ store8888r($f8,%0) ++ :"+r"(*dest):"r"(*src),"r"(*mask):clobber ++ ); ++ mask++; ++ }else{ ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load8888r(%1,$f20) ++ ++ load8888r(%0,$f22) ++ pix_add($f20,$f22) ++ store8888r($f8,%0) ++ :"+r"(*dest):"r"(*src):clobber ++ ); ++ ++ } ++ ++dest; ++ ++src; ++ } ++} ++ ++static void ++ls_combine_add_ca (pixman_implementation_t *imp, ++ pixman_op_t op, ++ uint32_t * dest, ++ const uint32_t * src, ++ const uint32_t * mask, ++ int width) ++{ ++ const uint32_t *end = src + width; ++ ++ while (src < end) ++ { ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load8888r(%0,$f20) ++ load8888r(%1,$f22) ++ load8888r(%2,$f24) ++ pix_multiply($f22,$f24) ++ save_to($f22) ++ pix_add($f22,$f20) ++ store8888r($f8,%0) ++ :"+r"(*dest):"r"(*src),"r"(*mask):clobber ++ ); ++ ++src; ++ ++dest; ++ ++mask; ++ } ++} +diff -urN pixman//pixman/pixman-composite-ls.c Pixman.Loongson//pixman/pixman-composite-ls.c +--- pixman//pixman/pixman-composite-ls.c 1970-01-01 08:00:00.000000000 +0800 ++++ Pixman.Loongson//pixman/pixman-composite-ls.c 2010-12-25 18:39:15.356667000 +0800 +@@ -0,0 +1,967 @@ ++static void ++ls_composite_over_x888_8_8888 (pixman_implementation_t *imp, ++ pixman_op_t op, ++ pixman_image_t * src_image, ++ pixman_image_t * mask_image, ++ pixman_image_t * dst_image, ++ int32_t src_x, ++ int32_t src_y, ++ int32_t mask_x, ++ int32_t mask_y, ++ int32_t dest_x, ++ int32_t dest_y, ++ int32_t width, ++ int32_t height) ++{ ++ ++ uint32_t *src, *src_line; ++ uint32_t *dst, *dst_line; ++ uint8_t *mask, *mask_line; ++ int src_stride, mask_stride, dst_stride; ++ uint32_t m; ++ uint32_t s, d; ++ int32_t w; ++ ++ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); ++ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); ++ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); ++ ++ while (height--) ++ { ++ src = src_line; ++ src_line += src_stride; ++ dst = dst_line; ++ dst_line += dst_stride; ++ mask = mask_line; ++ mask_line += mask_stride; ++ ++ w = width; ++ while (w--) ++ { ++ m = *mask++; ++ if (m) ++ { ++ s = *src | 0xff000000; ++ ++ if (m == 0xff) ++ { ++ *dst = s; ++ } ++ else ++ { ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load8888r(%0,$f20) ++ load8888r(%1,$f22) ++ load8888r(%2,$f24) ++ expand_alpha($f22,$f26) ++ expand_alpha_rev($f24,$f28) ++ in_over($f22,$f26,$f28,$f20) ++ store8888r($f8,%0) ++ :"+r"(*dst):"r"(s),"r"(m):clobber ++ ); ++ ++// __m64 sa = expand_alpha (s); ++// __m64 vm = expand_alpha_rev (to_m64 (m)); ++// __m64 vdest = in_over (s, sa, vm, load8888 (*dst)); ++// *dst = store8888 (vdest); ++ ++ } ++ } ++ src++; ++ dst++; ++ } ++ } ++} ++ ++ ++ ++ ++ ++static void ++ls_composite_over_8888_8888 (pixman_implementation_t *imp, ++ pixman_op_t op, ++ pixman_image_t * src_image, ++ pixman_image_t * mask_image, ++ pixman_image_t * dst_image, ++ int32_t src_x, ++ int32_t src_y, ++ int32_t mask_x, ++ int32_t mask_y, ++ int32_t dest_x, ++ int32_t dest_y, ++ int32_t width, ++ int32_t height) ++{ ++ uint32_t *dst_line, *dst; ++ uint32_t *src_line, *src; ++ uint32_t s; ++ int dst_stride, src_stride; ++ uint8_t a; ++ int32_t w; ++ ++ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); ++ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); ++ ++ while (height--) ++ { ++ dst = dst_line; ++ dst_line += dst_stride; ++ src = src_line; ++ src_line += src_stride; ++ w = width; ++ ++ while (w--) ++ { ++ s = *src; ++ a = s >> 24; ++ ++ if (a == 0xff) ++ { ++ *dst = s; ++ } ++ else if (s) ++ { ++ ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load8888r(%1,$f24) ++ load8888r(%0,$f20) ++ expand_alpha($f24,$f26) ++ over($f24,$f26,$f20) ++ store8888r($f8,%0) ++ :"+r"(*dst):"r"(*src):clobber ++ ); ++ } ++ dst++; ++ src++; ++ ++ } ++ } ++} ++ ++ ++static void ++ls_composite_over_8888_n_8888 (pixman_implementation_t *imp, ++ pixman_op_t op, ++ pixman_image_t * src_image, ++ pixman_image_t * mask_image, ++ pixman_image_t * dst_image, ++ int32_t src_x, ++ int32_t src_y, ++ int32_t mask_x, ++ int32_t mask_y, ++ int32_t dest_x, ++ int32_t dest_y, ++ int32_t width, ++ int32_t height) ++{ ++ uint32_t *dst_line, *dst; ++ uint32_t *src_line, *src; ++ uint32_t mask; ++ __m64 vmask; ++ int dst_stride, src_stride; ++ int32_t w; ++ __m64 srca; ++ ++ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); ++ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); ++ ++ mask = _pixman_image_get_solid (mask_image, dst_image->bits.format); ++ mask = mask | mask >> 8 | mask >> 16 | mask >> 24; ++ ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load8888(%1,$f24) ++ store64a($f24,%0) ++ :"=m"(vmask):"m"(mask):clobber ++ ); ++ ++ srca = ls_4x00ff; ++ ++ while (height--) ++ { ++ dst = dst_line; ++ dst_line += dst_stride; ++ src = src_line; ++ src_line += src_stride; ++ w = width; ++ ++ while (w) ++ { ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load8888r(%1,$f20) ++ load8888r(%0,$f22) ++ expand_alpha($f20,$f28) ++ in_over($f20,$f28,$f24,$f22) ++ store8888r($f8,%0) ++ :"+r"(*dst):"r"(*src):clobber ++ ); ++ ++ w--; ++ dst++; ++ src++; ++ } ++ } ++} ++ ++static void ++ls_composite_over_n_8888 (pixman_implementation_t *imp, ++ pixman_op_t op, ++ pixman_image_t * src_image, ++ pixman_image_t * mask_image, ++ pixman_image_t * dst_image, ++ int32_t src_x, ++ int32_t src_y, ++ int32_t mask_x, ++ int32_t mask_y, ++ int32_t dest_x, ++ int32_t dest_y, ++ int32_t width, ++ int32_t height) ++{ ++ uint32_t src; ++ uint32_t *dst_line, *dst; ++ int32_t w; ++ int dst_stride; ++ __m64 vsrc, vsrca; ++ ++ src = _pixman_image_get_solid (src_image, dst_image->bits.format); ++ ++ if (src == 0) ++ return; ++ ++ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); ++ ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load8888r(%2,$f24) ++ store64($f24,%0) ++ expand_alpha($f24,$f26) ++ store64($f26,%1) ++ :"=m"(vsrc), "=m"(vsrca):"r"(src):clobber ++ ); ++ ++ while (height--) ++ { ++ dst = dst_line; ++ dst_line += dst_stride; ++ w = width; ++ ++ while (w) ++ { ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load8888r(%0,$f28) ++ over($f24,$f26,$f28) ++ store8888r($f8,%0) ++ :"+r"(*dst)::clobber ++ ); ++ ++ w--; ++ dst++; ++ } ++ } ++} ++ ++static void ++ls_composite_over_n_8888_8888_ca (pixman_implementation_t *imp, ++ pixman_op_t op, ++ pixman_image_t * src_image, ++ pixman_image_t * mask_image, ++ pixman_image_t * dst_image, ++ int32_t src_x, ++ int32_t src_y, ++ int32_t mask_x, ++ int32_t mask_y, ++ int32_t dest_x, ++ int32_t dest_y, ++ int32_t width, ++ int32_t height) ++{ ++ uint32_t src, srca; ++ uint32_t *dst_line; ++ uint32_t *mask_line; ++ int dst_stride, mask_stride; ++ __m64 vsrc, vsrca; ++ ++ src = _pixman_image_get_solid (src_image, dst_image->bits.format); ++ ++ srca = src >> 24; ++ if (src == 0) ++ return; ++ ++ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); ++ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); ++ ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load8888r(%2,$f24) ++ store64($f24,%0) ++ expand_alpha($f24,$f26) ++ store64($f26,%1) ++ :"=m"(vsrc), "=m"(vsrca):"r"(src):clobber ++ ); ++ ++ while (height--) ++ { ++ int twidth = width; ++ uint32_t *p = (uint32_t *)mask_line; ++ uint32_t *q = (uint32_t *)dst_line; ++ ++ while (twidth) ++ { ++ ++ if (*p) ++ { ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load8888r(%0,$f28) ++ load8888r(%1,$f20) ++ in_over($f24,$f26,$f20,$f28) ++ store8888r($f8,%0) ++ :"+r"(*q):"r"(*p):clobber ++ ); ++ } ++ twidth--; ++ p++; ++ q++; ++ } ++ ++ dst_line += dst_stride; ++ mask_line += mask_stride; ++ } ++} ++ ++ ++static void ++ls_composite_over_n_8_8888 (pixman_implementation_t *imp, ++ pixman_op_t op, ++ pixman_image_t * src_image, ++ pixman_image_t * mask_image, ++ pixman_image_t * dst_image, ++ int32_t src_x, ++ int32_t src_y, ++ int32_t mask_x, ++ int32_t mask_y, ++ int32_t dest_x, ++ int32_t dest_y, ++ int32_t width, ++ int32_t height) ++{ ++ uint32_t src, srca; ++ uint32_t *dst_line, *dst; ++ uint8_t *mask_line, *mask; ++ int dst_stride, mask_stride; ++ int32_t w; ++ __m64 vsrc, vsrca; ++ uint64_t srcsrc; ++ ++ src = _pixman_image_get_solid (src_image, dst_image->bits.format); ++ ++ srca = src >> 24; ++ if (src == 0) ++ return; ++ ++ srcsrc = (uint64_t)src << 32 | src; ++ ++ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); ++ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); ++ ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load8888r(%2,$f24) ++ store64a($f24,%0) ++ expand_alpha($f24,$f26) ++ store64a($f26,%1) ++ :"=m"(vsrc), "=m"(vsrca):"r"(src):clobber ++ ); ++ ++ while (height--) ++ { ++ dst = dst_line; ++ dst_line += dst_stride; ++ mask = mask_line; ++ mask_line += mask_stride; ++ w = width; ++ ++ while (w) ++ { ++ uint32_t m = *mask; ++ ++ if (m) ++ { ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load8888r(%0,$f20) ++ load32r(%1,$f22) ++ expand_alpha_rev($f22,$f28) ++ in_over($f24,$f26,$f28,$f20) ++ store8888r($f8,%0) ++ :"+r"(*dst):"r"(m):clobber ++ ); ++ } ++ ++ w--; ++ mask++; ++ dst++; ++ } ++ } ++ ++} ++ ++static void ++ls_composite_over_x888_n_8888 (pixman_implementation_t *imp, ++ pixman_op_t op, ++ pixman_image_t * src_image, ++ pixman_image_t * mask_image, ++ pixman_image_t * dst_image, ++ int32_t src_x, ++ int32_t src_y, ++ int32_t mask_x, ++ int32_t mask_y, ++ int32_t dest_x, ++ int32_t dest_y, ++ int32_t width, ++ int32_t height) ++{ ++ uint32_t *dst_line, *dst; ++ uint32_t *src_line, *src; ++ uint32_t mask; ++ __m64 vmask; ++ int dst_stride, src_stride; ++ int32_t w; ++ __m64 srca; ++ ++ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); ++ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); ++ mask = _pixman_image_get_solid (mask_image, dst_image->bits.format); ++ ++ mask &= 0xff000000; ++ mask = mask | mask >> 8 | mask >> 16 | mask >> 24; ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load8888r(%1,$f24) ++ store64a($f24,%0) ++ :"=m"(vmask):"r"(mask):clobber ++ ); ++ ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load64a(%1,$f26) ++ store64a($f26,%0) ++ :"=m"(srca):"m"(ls_4x00ff):clobber ++ ); ++ ++ while (height--) ++ { ++ dst = dst_line; ++ dst_line += dst_stride; ++ src = src_line; ++ src_line += src_stride; ++ w = width; ++ ++ while (w) ++ { ++ uint32_t src_tmp = *src | 0xff000000; ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load8888r(%1,$f20) ++ load8888r(%0,$f22) ++ in_over($f20,$f26,$f24,$f22) ++ store8888r($f8,%0) ++ :"+r"(*dst):"r"(src_tmp):clobber ++ ); ++ ++ w--; ++ dst++; ++ src++; ++ } ++ } ++} ++ ++ ++static void ++ls_composite_over_8888_0565 (pixman_implementation_t *imp, ++ pixman_op_t op, ++ pixman_image_t * src_image, ++ pixman_image_t * mask_image, ++ pixman_image_t * dst_image, ++ int32_t src_x, ++ int32_t src_y, ++ int32_t mask_x, ++ int32_t mask_y, ++ int32_t dest_x, ++ int32_t dest_y, ++ int32_t width, ++ int32_t height) ++{ ++ uint16_t *dst_line, *dst; ++ uint32_t d; ++ uint32_t *src_line, *src, s; ++ uint8_t a; ++ int dst_stride, src_stride; ++ int32_t w; ++ ++ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); ++ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); ++ ++ while (height--) ++ { ++ dst = dst_line; ++ dst_line += dst_stride; ++ src = src_line; ++ src_line += src_stride; ++ w = width; ++ ++ while (w--) ++ { ++ s = *src++; ++ a = s >> 24; ++ if (s) ++ { ++ if (a == 0xff) ++ { ++ d = s; ++ } ++ else ++ { ++ d = *dst; ++ d = CONVERT_0565_TO_0888 (d); ++ ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load8888r(%1,$f24) ++ load8888r(%0,$f20) ++ expand_alpha($f24,$f26) ++ over($f24,$f26,$f20) ++ store8888r($f8,%0) ++ :"+r"(d):"r"(s):clobber ++ ); ++ ++ ++ } ++ *dst = CONVERT_8888_TO_0565 (d); ++ } ++ dst++; ++ } ++ } ++} ++ ++static void ++ls_composite_over_n_0565 (pixman_implementation_t *imp, ++ pixman_op_t op, ++ pixman_image_t * src_image, ++ pixman_image_t * mask_image, ++ pixman_image_t * dst_image, ++ int32_t src_x, ++ int32_t src_y, ++ int32_t mask_x, ++ int32_t mask_y, ++ int32_t dest_x, ++ int32_t dest_y, ++ int32_t width, ++ int32_t height) ++{ ++ uint32_t src; ++ uint32_t d; ++ uint16_t *dst_line, *dst; ++ int32_t w; ++ int dst_stride; ++ __m64 vsrc, vsrca; ++ ++ src = _pixman_image_get_solid (src_image, dst_image->bits.format); ++ ++ if (src == 0) ++ return; ++ ++ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load8888r(%2,$f24) ++ store64a($f24,%0) ++ expand_alpha($f24,$f26) ++ store64a($f26,%1) ++ :"=m"(vsrc), "=m"(vsrca):"r"(src):clobber ++ ); ++ ++ while (height--) ++ { ++ dst = dst_line; ++ dst_line += dst_stride; ++ w = width; ++ ++ while (w) ++ { ++ ++ d = *dst; ++ d = CONVERT_0565_TO_0888 (d); ++ ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load8888r(%0,$f20) ++ ++ over($f24,$f26,$f20) ++ store8888r($f8,%0) ++ :"+r"(d)::clobber ++ ); ++ ++ *dst = CONVERT_8888_TO_0565 (d); ++ ++ w--; ++ dst++; ++ } ++ } ++} ++ ++static void ++ls_composite_over_n_8_0565 (pixman_implementation_t *imp, ++ pixman_op_t op, ++ pixman_image_t * src_image, ++ pixman_image_t * mask_image, ++ pixman_image_t * dst_image, ++ int32_t src_x, ++ int32_t src_y, ++ int32_t mask_x, ++ int32_t mask_y, ++ int32_t dest_x, ++ int32_t dest_y, ++ int32_t width, ++ int32_t height) ++{ ++ uint32_t src, srca, m, d; ++ uint16_t *dst_line, *dst; ++ uint8_t *mask_line, *mask; ++ int dst_stride, mask_stride; ++ int32_t w; ++ __m64 vsrc, vsrca; ++ ++ src = _pixman_image_get_solid (src_image, dst_image->bits.format); ++ ++ srca = src >> 24; ++ if (src == 0) ++ return; ++ ++ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); ++ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load8888r(%2,$f24) ++ store64a($f24,%0) ++ expand_alpha($f24,$f26) ++ store64a($f26,%1) ++ :"=m"(vsrc), "=m"(vsrca):"r"(src):clobber ++ ); ++ ++ while (height--) ++ { ++ dst = dst_line; ++ dst_line += dst_stride; ++ mask = mask_line; ++ mask_line += mask_stride; ++ w = width; ++ ++ while (w) ++ { ++ m = *mask; ++ d = *dst; ++ ++ if (m) ++ { ++ ++ d = CONVERT_0565_TO_0888 (d); ++ ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load8888r(%0,$f20) ++ load32r(%1,$f22) ++ expand_alpha_rev($f22,$f28) ++ in_over($f24,$f26,$f28,$f20) ++ store8888r($f8,%0) ++ :"+r"(d):"r"(m):clobber ++ ); ++ ++ *dst = CONVERT_8888_TO_0565 (d); ++ ++ } ++ ++ w--; ++ mask++; ++ dst++; ++ } ++ } ++} ++ ++static void ++ls_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, ++ pixman_op_t op, ++ pixman_image_t * src_image, ++ pixman_image_t * mask_image, ++ pixman_image_t * dst_image, ++ int32_t src_x, ++ int32_t src_y, ++ int32_t mask_x, ++ int32_t mask_y, ++ int32_t dest_x, ++ int32_t dest_y, ++ int32_t width, ++ int32_t height) ++{ ++ uint32_t src, srca, m, d; ++ uint16_t *dst_line; ++ uint32_t *mask_line; ++ int dst_stride, mask_stride; ++ __m64 vsrc, vsrca; ++ ++ src = _pixman_image_get_solid (src_image, dst_image->bits.format); ++ ++ srca = src >> 24; ++ if (src == 0) ++ return; ++ ++ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); ++ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load8888r(%2,$f24) ++ store64a($f24,%0) ++ expand_alpha($f24,$f26) ++ store64a($f26,%1) ++ :"=m"(vsrc), "=m"(vsrca):"r"(src):clobber ++ ); ++ ++ while (height--) ++ { ++ int twidth = width; ++ uint32_t *p = (uint32_t *)mask_line; ++ uint16_t *q = (uint16_t *)dst_line; ++ ++ while (twidth) ++ { ++ ++ m = *(uint32_t *)p; ++ d = *q; ++ ++ if (m) ++ { ++ ++ d = CONVERT_0565_TO_0888 (d); ++ ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load8888r(%0,$f20) ++ load8888r(%1,$f22) ++ in_over($f24,$f26,$f22,$f20) ++ store8888r($f8,%0) ++ :"+r"(d):"r"(m):clobber ++ ); ++ ++ *q = CONVERT_8888_TO_0565 (d); ++ ++ } ++ ++ twidth--; ++ p++; ++ q++; ++ } ++ ++ mask_line += mask_stride; ++ dst_line += dst_stride; ++ } ++} ++static void ++ls_composite_over_pixbuf_8888 (pixman_implementation_t *imp, ++ pixman_op_t op, ++ pixman_image_t * src_image, ++ pixman_image_t * mask_image, ++ pixman_image_t * dst_image, ++ int32_t src_x, ++ int32_t src_y, ++ int32_t mask_x, ++ int32_t mask_y, ++ int32_t dest_x, ++ int32_t dest_y, ++ int32_t width, ++ int32_t height) ++{ ++ uint32_t *dst_line, *dst; ++ uint32_t *src_line, *src; ++ int dst_stride, src_stride; ++ int32_t w; ++ ++ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); ++ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); ++ ++#if 0 ++ /* FIXME */ ++ assert (src_image->drawable == mask_image->drawable); ++#endif ++ ++ while (height--) ++ { ++ dst = dst_line; ++ dst_line += dst_stride; ++ src = src_line; ++ src_line += src_stride; ++ w = width; ++ ++ while (w) ++ { ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load8888r(%1,$f22) ++ load8888r(%0,$f20) ++ over_rev_non_pre($f22,$f20) ++ store8888r($f8,%0) ++ :"+r"(*dst):"r"(*src):clobber ++ ); ++ ++ w--; ++ dst++; ++ src++; ++ } ++ } ++} ++static void ++ls_composite_over_pixbuf_0565 (pixman_implementation_t *imp, ++ pixman_op_t op, ++ pixman_image_t * src_image, ++ pixman_image_t * mask_image, ++ pixman_image_t * dst_image, ++ int32_t src_x, ++ int32_t src_y, ++ int32_t mask_x, ++ int32_t mask_y, ++ int32_t dest_x, ++ int32_t dest_y, ++ int32_t width, ++ int32_t height) ++{ ++ uint16_t *dst_line, *dst; ++ uint32_t *src_line, *src, d; ++ int dst_stride, src_stride; ++ int32_t w; ++ ++ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); ++ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); ++ ++#if 0 ++ /* FIXME */ ++ assert (src_image->drawable == mask_image->drawable); ++#endif ++ ++ while (height--) ++ { ++ dst = dst_line; ++ dst_line += dst_stride; ++ src = src_line; ++ src_line += src_stride; ++ w = width; ++ ++ while (w) ++ { ++ ++ d = *dst; ++ d = CONVERT_0565_TO_0888 (d); ++ ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load8888r(%1,$f20) ++ load8888r(%0,$f24) ++ over_rev_non_pre($f20,$f24) ++ store8888r($f8,%0) ++ :"+r"(d):"r"(*src):clobber ++ ); ++ ++ *dst = CONVERT_8888_TO_0565 (d); ++ ++ w--; ++ dst++; ++ src++; ++ } ++ } ++} ++ ++static void ++ls_composite_src_n_8_8888 (pixman_implementation_t *imp, ++ pixman_op_t op, ++ pixman_image_t * src_image, ++ pixman_image_t * mask_image, ++ pixman_image_t * dst_image, ++ int32_t src_x, ++ int32_t src_y, ++ int32_t mask_x, ++ int32_t mask_y, ++ int32_t dest_x, ++ int32_t dest_y, ++ int32_t width, ++ int32_t height) ++{ ++ uint32_t src, srca; ++ uint32_t *dst_line, *dst, m; ++ uint8_t *mask_line, *mask; ++ int dst_stride, mask_stride; ++ int32_t w; ++ __m64 vsrc, vsrca; ++ uint64_t srcsrc; ++ ++ src = _pixman_image_get_solid (src_image, dst_image->bits.format); ++ ++ srca = src >> 24; ++ if (src == 0) ++ { ++ pixman_fill_ls (dst_image->bits.bits, dst_image->bits.rowstride, ++ PIXMAN_FORMAT_BPP (dst_image->bits.format), ++ dest_x, dest_y, width, height, 0); ++ return; ++ } ++ ++ srcsrc = (uint64_t)src << 32 | src; ++ ++ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); ++ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); ++ ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load8888r(%2,$f24) ++ store64a($f24,%0) ++ expand_alpha($f24,$f26) ++ store64a($f26,%1) ++ :"=m"(vsrc), "=m"(vsrca):"r"(src):clobber ++ ); ++ while (height--) ++ { ++ dst = dst_line; ++ dst_line += dst_stride; ++ mask = mask_line; ++ mask_line += mask_stride; ++ w = width; ++ ++ while (w) ++ { ++ m = *mask; ++ ++ if (m) ++ { ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ load32r(%1,$f20) ++ expand_alpha_rev($f20,$f28) ++ in($f24,$f28) ++ store8888r($f8,%0) ++ :"=r"(*dst):"r"(m):clobber ++ ); ++ ++ } ++ else ++ { ++ *dst = 0; ++ } ++ ++ w--; ++ mask++; ++ dst++; ++ } ++ } ++} +diff -urN pixman//pixman/pixman-cpu.c Pixman.Loongson//pixman/pixman-cpu.c +--- pixman//pixman/pixman-cpu.c 2010-12-25 18:46:00.073234000 +0800 ++++ Pixman.Loongson//pixman/pixman-cpu.c 2010-12-25 18:39:15.360337000 +0800 +@@ -579,7 +579,9 @@ + if (pixman_have_mmx ()) + return _pixman_implementation_create_mmx (); + #endif +- ++#ifdef USE_LS ++ return _pixman_implementation_create_ls (); ++#endif + #ifdef USE_ARM_NEON + if (pixman_have_arm_neon ()) + return _pixman_implementation_create_arm_neon (); +diff -urN pixman//pixman/pixman-ls.c Pixman.Loongson//pixman/pixman-ls.c +--- pixman//pixman/pixman-ls.c 1970-01-01 08:00:00.000000000 +0800 ++++ Pixman.Loongson//pixman/pixman-ls.c 2010-12-25 18:39:15.386759000 +0800 +@@ -0,0 +1,538 @@ ++/* ++* Based on pixman-mmx.c ++* Implemented for loongson 2F only. ++* Free software based on GPL licence. ++* Copyright 2010 WG Ge. ++*/ ++ ++#ifdef HAVE_CONFIG_H ++#include <config.h> ++#endif ++#include <stdlib.h> ++#include <string.h> ++#include <math.h> ++#include <limits.h> ++#include <stdio.h> ++#include "pixman-private.h" ++#include "pixman-combine32.h" ++#include "primitive.h" ++ ++#define __m64 __attribute__ ((aligned (8))) uint64_t ++#define DECLARE_ALIGNED(n,t,v) t __attribute__ ((aligned (n))) v ++#define DECLARE_ALIGNED_8(t, v, ...) DECLARE_ALIGNED(8, t, v) ++ ++DECLARE_ALIGNED_8 (const uint64_t, ls_4x00ff ) = 0x00ff00ff00ff00ffULL; ++DECLARE_ALIGNED_8 (const uint64_t, ls_4x0080 ) = 0x0080008000800080ULL; ++DECLARE_ALIGNED_8 (const uint64_t, ls_565_rgb ) = 0x000001f0003f001fULL; ++DECLARE_ALIGNED_8 (const uint64_t, ls_565_unpack_multiplier ) = 0x0000008404100840ULL; ++DECLARE_ALIGNED_8 (const uint64_t, ls_565_r ) = 0x000000f800000000ULL; ++DECLARE_ALIGNED_8 (const uint64_t, ls_565_g ) = 0x0000000000fc0000ULL; ++DECLARE_ALIGNED_8 (const uint64_t, ls_565_b ) = 0x00000000000000f8ULL; ++DECLARE_ALIGNED_8 (const uint64_t, ls_mask_0 ) = 0xffffffffffff0000ULL; ++DECLARE_ALIGNED_8 (const uint64_t, ls_mask_1 ) = 0xffffffff0000ffffULL; ++DECLARE_ALIGNED_8 (const uint64_t, ls_mask_2 ) = 0xffff0000ffffffffULL; ++DECLARE_ALIGNED_8 (const uint64_t, ls_mask_3 ) = 0x0000ffffffffffffULL; ++DECLARE_ALIGNED_8 (const uint64_t, ls_full_alpha ) = 0x00ff000000000000ULL; ++DECLARE_ALIGNED_8 (const uint64_t, ls_ffff0000ffff0000 ) = 0xffff0000ffff0000ULL; ++DECLARE_ALIGNED_8 (const uint64_t, ls_0000ffff00000000 ) = 0x0000ffff00000000ULL; ++DECLARE_ALIGNED_8 (const uint64_t, ls_000000000000ffff ) = 0x000000000000ffffULL; ++ ++ ++pixman_bool_t ++pixman_fill_ls (uint32_t *bits, ++ int stride, ++ int bpp, ++ int x, ++ int y, ++ int width, ++ int height, ++ uint32_t xor) ++{ ++ uint64_t fill; ++ uint32_t byte_width; ++ uint8_t *byte_line; ++ ++ ++ ++ if (bpp != 16 && bpp != 32 && bpp != 8) ++ return FALSE; ++ ++ if (bpp == 8) ++ { ++ stride = stride * (int) sizeof (uint32_t) / 1; ++ byte_line = (uint8_t *)(((uint8_t *)bits) + stride * y + x); ++ byte_width = width; ++ stride *= 1; ++ xor = (xor & 0xff) * 0x01010101; ++ } ++ else if (bpp == 16) ++ { ++ stride = stride * (int) sizeof (uint32_t) / 2; ++ byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x); ++ byte_width = 2 * width; ++ stride *= 2; ++ xor = (xor & 0xffff) * 0x00010001; ++ } ++ else ++ { ++ stride = stride * (int) sizeof (uint32_t) / 4; ++ byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x); ++ byte_width = 4 * width; ++ stride *= 4; ++ } ++ ++ fill = ((uint64_t)xor << 32) | xor; ++ ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ "ldc1 $f24, %0 \n\t" ++ ::"m"(fill):"$f24" ++ ); ++ while (height--) ++ { ++ int w; ++ uint8_t *d = byte_line; ++ ++ byte_line += stride; ++ w = byte_width; ++ ++ while (w >= 1 && ((unsigned long)d & 1)) ++ { ++ *(uint8_t *)d = (xor & 0xff); ++ w--; ++ d++; ++ } ++ ++ while (w >= 2 && ((unsigned long)d & 3)) ++ { ++ *(uint16_t *)d = xor; ++ w -= 2; ++ d += 2; ++ } ++ ++ while (w >= 4 && ((unsigned long)d & 7)) ++ { ++ *(uint32_t *)d = xor; ++ ++ w -= 4; ++ d += 4; ++ } ++ ++ while (w >= 64) ++ { ++ ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ "dmfc1 $8, $f24 \n\t" ++ "sd $8 , (%0) \n\t" ++ "sd $8 , 8(%0) \n\t" ++ "sd $8 , 16(%0) \n\t" ++ "sd $8 , 24(%0) \n\t" ++ "sd $8 , 32(%0) \n\t" ++ "sd $8 , 40(%0) \n\t" ++ "sd $8 , 48(%0) \n\t" ++ "sd $8 , 56(%0) \n\t" ++ ::"r"(d):"$8","memory","$f24" ++ ); ++ w -= 64; ++ d += 64; ++ } ++ ++ while (w >= 4) ++ { ++ *(uint32_t *)d = xor; ++ ++ w -= 4; ++ d += 4; ++ } ++ while (w >= 2) ++ { ++ *(uint16_t *)d = xor; ++ w -= 2; ++ d += 2; ++ } ++ while (w >= 1) ++ { ++ *(uint8_t *)d = (xor & 0xff); ++ w--; ++ d++; ++ } ++ ++ } ++ return TRUE; ++} ++ ++static pixman_bool_t ++pixman_blt_ls (uint32_t *src_bits, ++ uint32_t *dst_bits, ++ int src_stride, ++ int dst_stride, ++ int src_bpp, ++ int dst_bpp, ++ int src_x, ++ int src_y, ++ int dst_x, ++ int dst_y, ++ int width, ++ int height) ++{ ++ uint8_t * src_bytes; ++ uint8_t * dst_bytes; ++ int byte_width; ++ ++ if (src_bpp != dst_bpp) ++ return FALSE; ++ ++ if (src_bpp == 16) ++ { ++ src_stride = src_stride * (int) sizeof (uint32_t) / 2; ++ dst_stride = dst_stride * (int) sizeof (uint32_t) / 2; ++ src_bytes = (uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x)); ++ dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dst_y) + (dst_x)); ++ byte_width = 2 * width; ++ src_stride *= 2; ++ dst_stride *= 2; ++ } ++ else if (src_bpp == 32) ++ { ++ src_stride = src_stride * (int) sizeof (uint32_t) / 4; ++ dst_stride = dst_stride * (int) sizeof (uint32_t) / 4; ++ src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x)); ++ dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dst_y) + (dst_x)); ++ byte_width = 4 * width; ++ src_stride *= 4; ++ dst_stride *= 4; ++ } ++ else ++ { ++ return FALSE; ++ } ++ ++ while (height--) ++ { ++ int w; ++ uint8_t *s = src_bytes; ++ uint8_t *d = dst_bytes; ++ src_bytes += src_stride; ++ dst_bytes += dst_stride; ++ w = byte_width; ++ ++ while (w >= 2 && ((unsigned long)d & 3)) ++ { ++ *(uint16_t *)d = *(uint16_t *)s; ++ w -= 2; ++ s += 2; ++ d += 2; ++ } ++ ++ while (w >= 4 && ((unsigned long)d & 7)) ++ { ++ *(uint32_t *)d = *(uint32_t *)s; ++ ++ w -= 4; ++ s += 4; ++ d += 4; ++ } ++ if ((unsigned long)s & 7) ++{ ++ while (w >= 64) ++ { ++ ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ "uld $8 , (%1) \n\t" ++ "uld $9 , 8(%1) \n\t" ++ "uld $10, 16(%1) \n\t" ++ "uld $11, 24(%1) \n\t" ++ "sd $8 , (%0) \n\t" ++ "sd $9 , 8(%0) \n\t" ++ "sd $10, 16(%0) \n\t" ++ "sd $11, 24(%0) \n\t" ++ ++ "uld $8 , 32(%1) \n\t" ++ "uld $9 , 40(%1) \n\t" ++ "uld $10, 48(%1) \n\t" ++ "uld $11, 56(%1) \n\t" ++ "sd $8 , 32(%0) \n\t" ++ "sd $9 , 40(%0) \n\t" ++ "sd $10, 48(%0) \n\t" ++ "sd $11, 56(%0) \n\t" ++ ::"r"(d),"r"(s):"$8","$9","$10","$11","memory" ++ ); ++ w -= 64; ++ s += 64; ++ d += 64; ++ } ++} ++else ++{ ++ while (w >= 64) ++ { ++ ++ __asm__ volatile ( ++ ".set arch=loongson2f \n\t" ++ "ld $8 , (%1) \n\t" ++ "ld $9 , 8(%1) \n\t" ++ "ld $10, 16(%1) \n\t" ++ "ld $11, 24(%1) \n\t" ++ "sd $8 , (%0) \n\t" ++ "sd $9 , 8(%0) \n\t" ++ "sd $10, 16(%0) \n\t" ++ "sd $11, 24(%0) \n\t" ++ ++ "ld $8 , 32(%1) \n\t" ++ "ld $9 , 40(%1) \n\t" ++ "ld $10, 48(%1) \n\t" ++ "ld $11, 56(%1) \n\t" ++ "sd $8 , 32(%0) \n\t" ++ "sd $9 , 40(%0) \n\t" ++ "sd $10, 48(%0) \n\t" ++ "sd $11, 56(%0) \n\t" ++ ::"r"(d),"r"(s):"$8","$9","$10","$11","memory" ++ ); ++ w -= 64; ++ s += 64; ++ d += 64; ++ } ++} ++ ++ while (w >= 4) ++ { ++ *(uint32_t *)d = *(uint32_t *)s; ++ ++ w -= 4; ++ s += 4; ++ d += 4; ++ } ++ if (w >= 2) ++ { ++ *(uint16_t *)d = *(uint16_t *)s; ++ w -= 2; ++ s += 2; ++ d += 2; ++ } ++ } ++ return TRUE; ++} ++ ++ ++#include "pixman-composite-ls.c" ++#include "pixman-combine-ls.c" ++ ++static pixman_bool_t ++ls_blt (pixman_implementation_t *imp, ++ uint32_t * src_bits, ++ uint32_t * dst_bits, ++ int src_stride, ++ int dst_stride, ++ int src_bpp, ++ int dst_bpp, ++ int src_x, ++ int src_y, ++ int dst_x, ++ int dst_y, ++ int width, ++ int height) ++{ ++ if (!pixman_blt_ls ( ++ src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp, ++ src_x, src_y, dst_x, dst_y, width, height)) ++ { ++ return _pixman_implementation_blt ( ++ imp->delegate, ++ src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp, ++ src_x, src_y, dst_x, dst_y, width, height); ++ } ++ ++ return TRUE; ++} ++ ++static pixman_bool_t ++ls_fill (pixman_implementation_t *imp, ++ uint32_t * bits, ++ int stride, ++ int bpp, ++ int x, ++ int y, ++ int width, ++ int height, ++ uint32_t xor) ++{ ++ if (!pixman_fill_ls (bits, stride, bpp, x, y, width, height, xor)) ++ { ++ return _pixman_implementation_fill ( ++ imp->delegate, bits, stride, bpp, x, y, width, height, xor); ++ } ++ ++ return TRUE; ++} ++
++static void ++ls_composite_copy_area (pixman_implementation_t *imp, ++ pixman_op_t op, ++ pixman_image_t * src_image, ++ pixman_image_t * mask_image, ++ pixman_image_t * dst_image, ++ int32_t src_x, ++ int32_t src_y, ++ int32_t mask_x, ++ int32_t mask_y, ++ int32_t dest_x, ++ int32_t dest_y, ++ int32_t width, ++ int32_t height) ++{ ++ pixman_blt_ls (src_image->bits.bits, ++ dst_image->bits.bits, ++ src_image->bits.rowstride, ++ dst_image->bits.rowstride, ++ PIXMAN_FORMAT_BPP (src_image->bits.format), ++ PIXMAN_FORMAT_BPP (dst_image->bits.format), ++ src_x, src_y, dest_x, dest_y, width, height); ++} ++ ++ ++static const pixman_fast_path_t ls_fast_paths[] = ++{ ++ ++//these are implemented so far ++#if 1 ++ PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, ls_composite_over_x888_8_8888 ), ++ PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, ls_composite_over_x888_8_8888 ), ++ PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, ls_composite_over_x888_8_8888 ), ++ PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, ls_composite_over_x888_8_8888 ), ++#endif ++ ++#if 1 ++//over_8888_0565 significant perf improvement, slight better L1, L2, 30% better RT ++ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, ls_composite_over_8888_0565 ), ++ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, ls_composite_over_8888_0565 ), ++ PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, r5g6b5, ls_composite_over_pixbuf_0565 ), ++ PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, b5g6r5, ls_composite_over_pixbuf_0565 ), ++ ++//big improvement some closing 100% ++ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, ls_composite_over_n_8888_0565_ca ), ++ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, ls_composite_over_n_8888_0565_ca ), ++ PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, ls_composite_over_n_8_0565 ), ++ PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, ls_composite_over_n_8_0565 ), ++ PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, ls_composite_over_n_0565 ), ++ ++//ubalbe to bench with lowlevel bench, believe it is a gain in perf ++ PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, solid, a8r8g8b8, ls_composite_over_x888_n_8888 ), ++ PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, solid, x8r8g8b8, ls_composite_over_x888_n_8888 ), ++ PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, solid, a8b8g8r8, ls_composite_over_x888_n_8888 ), ++ PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, solid, x8b8g8r8, ls_composite_over_x888_n_8888 ), ++ ++//performance regress 30% in L1,L2, but significant improvement in RT ++ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, ls_composite_over_8888_8888 ), ++ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, ls_composite_over_8888_8888 ), ++ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, ls_composite_over_8888_8888 ), ++ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, ls_composite_over_8888_8888 ), ++ PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, a8r8g8b8, ls_composite_over_pixbuf_8888 ), ++ PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, x8r8g8b8, ls_composite_over_pixbuf_8888 ), ++ PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, a8b8g8r8, ls_composite_over_pixbuf_8888 ), ++ PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, x8b8g8r8, ls_composite_over_pixbuf_8888 ), ++ ++//same performance in L1,L2, but significant improvement in RT 30-40% ++ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, ls_composite_over_8888_n_8888 ), ++ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, ls_composite_over_8888_n_8888 ), ++ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, ls_composite_over_8888_n_8888 ), ++ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, ls_composite_over_8888_n_8888 ), ++ ++//significant perf improvement 20% ++ PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, ls_composite_over_n_8_8888 ), ++ PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, ls_composite_over_n_8_8888 ), ++ PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, ls_composite_over_n_8_8888 ), ++ PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, ls_composite_over_n_8_8888 ), ++ ++//3 times perf improvements ++ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, ls_composite_over_n_8888_8888_ca ), ++ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, ls_composite_over_n_8888_8888_ca ), ++ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, ls_composite_over_n_8888_8888_ca ), ++ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, ls_composite_over_n_8888_8888_ca ), ++ ++//significant performance boost ++ PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, ls_composite_over_n_8888 ), ++ PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, ls_composite_over_n_8888 ), ++//simple add, expect better perf in generic code ++// PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, ls_composite_add_8888_8888 ), ++// PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, ls_composite_add_8888_8888 ), ++ ++// FIXME: Copy memory are not better than geneic code ++#if 0 ++ PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, ls_composite_copy_area ), ++ PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, ls_composite_copy_area ), ++ PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, ls_composite_copy_area ), ++ PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, ls_composite_copy_area ), ++ PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, ls_composite_copy_area ), ++ PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, ls_composite_copy_area ), ++ PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, ls_composite_copy_area ), ++ PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, ls_composite_copy_area ), ++ PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, x8r8g8b8, ls_composite_copy_area ), ++ PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, x8b8g8r8, ls_composite_copy_area ), ++#endif ++ ++//significant improvement ++ PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8r8g8b8, ls_composite_src_n_8_8888 ), ++ PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8r8g8b8, ls_composite_src_n_8_8888 ), ++ PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8b8g8r8, ls_composite_src_n_8_8888 ), ++ PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8b8g8r8, ls_composite_src_n_8_8888 ), ++ ++#endif ++ ++//these are not yet implemented ++ ++#if 0 ++ ++ PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, ls_composite_add_8000_8000 ), ++ PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, ls_composite_add_n_8_8 ), ++ PIXMAN_STD_FAST_PATH (IN, a8, null, a8, ls_composite_in_8_8 ), ++ PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, ls_composite_in_n_8_8 ), ++#endif ++ ++ ++ { PIXMAN_OP_NONE }, ++}; ++ ++pixman_implementation_t * ++_pixman_implementation_create_ls (void) ++{ ++ pixman_implementation_t *general = _pixman_implementation_create_fast_path (); ++ pixman_implementation_t *imp = _pixman_implementation_create (general, ls_fast_paths); ++ ++//Turned on but unable to benchmark. ++#if 1 ++ imp->combine_32[PIXMAN_OP_OVER] = ls_combine_over_u; ++ imp->combine_32[PIXMAN_OP_OVER_REVERSE] = ls_combine_over_reverse_u; ++ imp->combine_32[PIXMAN_OP_IN] = ls_combine_in_u; ++ imp->combine_32[PIXMAN_OP_IN_REVERSE] = ls_combine_in_reverse_u; ++ imp->combine_32[PIXMAN_OP_OUT] = ls_combine_out_u; ++ imp->combine_32[PIXMAN_OP_OUT_REVERSE] = ls_combine_out_reverse_u; ++ imp->combine_32[PIXMAN_OP_ATOP] = ls_combine_atop_u; ++ imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = ls_combine_atop_reverse_u; ++ imp->combine_32[PIXMAN_OP_XOR] = ls_combine_xor_u; ++ imp->combine_32[PIXMAN_OP_ADD] = ls_combine_add_u; ++ imp->combine_32[PIXMAN_OP_SATURATE] = ls_combine_saturate_u; ++ ++ imp->combine_32_ca[PIXMAN_OP_SRC] = ls_combine_src_ca; ++ imp->combine_32_ca[PIXMAN_OP_OVER] = ls_combine_over_ca; ++ imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = ls_combine_over_reverse_ca; ++ imp->combine_32_ca[PIXMAN_OP_IN] = ls_combine_in_ca; ++ imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = ls_combine_in_reverse_ca; ++ imp->combine_32_ca[PIXMAN_OP_OUT] = ls_combine_out_ca; ++ imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = ls_combine_out_reverse_ca; ++ imp->combine_32_ca[PIXMAN_OP_ATOP] = ls_combine_atop_ca; ++ imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = ls_combine_atop_reverse_ca; ++ imp->combine_32_ca[PIXMAN_OP_XOR] = ls_combine_xor_ca; ++ imp->combine_32_ca[PIXMAN_OP_ADD] = ls_combine_add_ca; ++#endif ++ ++//FIXME blt and fill not shown better perf than geneic code ++#if 0 ++ imp->blt = ls_blt; ++ imp->fill = ls_fill; ++#endif ++ ++ return imp; ++} ++ +diff -urN pixman//pixman/pixman-private.h Pixman.Loongson//pixman/pixman-private.h +--- pixman//pixman/pixman-private.h 2010-12-25 18:46:00.102841000 +0800 ++++ Pixman.Loongson//pixman/pixman-private.h 2010-12-25 18:39:15.401808000 +0800 +@@ -493,6 +493,11 @@ + pixman_implementation_t * + _pixman_implementation_create_fast_path (void); + ++#ifdef USE_LS ++pixman_implementation_t * ++_pixman_implementation_create_ls (void); ++#endif ++ + #ifdef USE_MMX + pixman_implementation_t * + _pixman_implementation_create_mmx (void); +diff -urN pixman//pixman/primitive.h Pixman.Loongson//pixman/primitive.h +--- pixman//pixman/primitive.h 1970-01-01 08:00:00.000000000 +0800 ++++ Pixman.Loongson//pixman/primitive.h 2010-12-25 18:39:15.457084000 +0800 +@@ -0,0 +1,214 @@ ++/*
++* MMX register usage protocal
++* return result: f8
++* tmp immediate f12
++* tmp register in primtive f14 f16 f18
++* tmp register in pixman f0,f4,f6,f10,f20,f22,
++* globals in function f24, f26, f28,f30
++* Exceptions for load and store:
++* load will specify dest FPR register
++* store will specify src FPR register
++* expand_alpha(_rev) implemented with GPR, dest FPR as the 2nd parameter
++*
++* Special alert: don't use return result $f8 as input, it might be overwritten
++*/
++
++
++/*primitive macros */
++
++#define clobber "$8","$9","$f0","$f2","$f8",\
++ "$f12","$f14","$f16","$f18","$f20",\
++ "$f22","$f24","$f26","$f28","$f30"
++
++#define DMTC1_IMM(regc1,imm) \
++ "dli $8, "#imm" \n\t" \
++ "dmtc1 $8, "#regc1" \n\t"
++
++#define MTC1_IMM(regc1,imm) \
++ "li $8, "#imm" \n\t" \
++ "dmtc1 $8, "#regc1" \n\t"
++
++
++#define save_to(reg1) "mov.d "#reg1", $f8 \n\t"
++#define zero(reg1) "xor "#reg1","#reg1","#reg1" \n\t"
++
++#define load32(sp,reg1) \
++ "ulw $8, "#sp" \n\t" \
++ "dmtc1 $8, "#reg1" \n\t"
++
++#define load32a(sp,reg1) \
++ "lw $8, "#sp" \n\t" \
++ "dmtc1 $8, "#reg1" \n\t"
++
++#define load32r(sp,reg1) \
++ "dmtc1 "#sp", "#reg1" \n\t"
++
++#define load64(sp,reg1) \
++ "uld $8, "#sp" \n\t" \
++ "dmtc1 $8, "#reg1" \n\t"
++
++#define load64a(sp,reg1) \
++ "ld $8, "#sp" \n\t" \
++ "dmtc1 $8, "#reg1" \n\t"
++
++
++#define store32(reg1,sp) \
++ "dmfc1 $8, "#reg1" \n\t" \
++ "usw $8, "#sp" \n\t"
++
++#define store32r(reg1,sp) \
++ "dmfc1 "#sp", "#reg1" \n\t"
++
++#define store32a(reg1,sp) \
++ "swc1 "#reg1", "#sp" \n\t"
++
++#define store64(reg1,sp) \
++ "dmfc1 $8, "#reg1" \n\t" \
++ "usd $8, "#sp" \n\t"
++
++#define store64a(reg1,sp) \
++ "sdc1 "#reg1", "#sp" \n\t"
++
++#define load8888(sp,reg1) \
++ load64(sp,reg1) \
++ "xor $f12, $f12, $f12 \n\t" \
++ "punpcklbh "#reg1", "#reg1", $f12 \n\t"
++
++#define load8888r(sp,reg1) \
++ load32r(sp,reg1) \
++ "xor $f12, $f12, $f12 \n\t" \
++ "punpcklbh "#reg1", "#reg1", $f12 \n\t"
++
++#define load8888a(sp,reg1) \
++ load64a(sp,reg1) \
++ "xor $f12, $f12, $f12 \n\t" \
++ "punpcklbh "#reg1", "#reg1", $f12 \n\t"
++
++#define load8888ah(sp,reg1) \
++ load64a(sp,reg1) \
++ "xor $f12, $f12, $f12 \n\t" \
++ "punpckhbh "#reg1", "#reg1", $f12 \n\t"
++
++#define store8888(reg1,sp) \
++ "xor $f12, $f12, $f12 \n\t" \
++ "packushb "#reg1", "#reg1", $f12 \n\t" \
++ store64(reg1,sp)
++
++#define store8888r(reg1,sp) \
++ "xor $f12, $f12, $f12 \n\t" \
++ "packushb "#reg1", "#reg1", $f12 \n\t" \
++ store32r(reg1,sp)
++
++#define store8888a(reg1,sp) \
++ "xor $f12, $f12, $f12 \n\t" \
++ "packushb "#reg1", "#reg1", $f12 \n\t" \
++ store64a(reg1,sp)
++
++#define pack8888(reg1,reg2) \
++ "packushb $f8, "#reg1","#reg2" \n\t"
++
++#define unpack8888(reg1,reg2) \
++ "punpcklbh $f8, "#reg1","#reg2" \n\t"
++
++
++#define negate(sreg,dreg) \
++ DMTC1_IMM($f12, 0x00ff00ff00ff00ff)\
++ "xor "#dreg", "#sreg", $f12 \n\t"
++
++#define pix_add(reg1,reg2) \
++ "paddusb $f8, "#reg1", "#reg2" \n\t"
++
++#define pix_multiply(reg1,reg2) \
++ "pmullh $f14, "#reg1", "#reg2" \n\t " \
++ DMTC1_IMM($f12, 0x0080008000800080) \
++ "paddush $f14, $f14, $f12 \n\t "\
++ MTC1_IMM($f12, 8) \
++ "psrlh $f16, $f14, $f12 \n\t" \
++ "paddush $f14, $f14, $f16 \n\t" \
++ "psrlh $f8, $f14, $f12 \n\t"
++
++#define pix_add_mul(reg1,reg2,reg3,reg4) \ ++ pix_multiply(reg1,reg2) \
++ "mov.d $f18, $f8 \n\t" \
++ pix_multiply(reg3,reg4) \
++ pix_add($f18,$f8)
++
++#define expand_alpha(sreg,dreg) \
++ "dmfc1 $8, "#sreg" \n\t" \
++ "dsrl32 $8, $8, 16 \n\t" \
++ "dsll $9, $8, 16 \n\t" \
++ "or $8, $8, $9 \n\t" \
++ "dsll32 $9, $8, 0 \n\t" \
++ "or $8, $8, $9 \n\t" \
++ "dmtc1 $8, "#dreg" \n\t"
++
++#define expand_alpha_rev(sreg,dreg)\
++ "dmfc1 $8, "#sreg" \n\t" \
++ "dsll32 $8, $8, 16 \n\t" \
++ "dsrl32 $8, $8, 16 \n\t" \
++ "dsll $9, $8, 16 \n\t" \
++ "or $8, $8, $9 \n\t" \
++ "dsll32 $9, $8, 0 \n\t" \
++ "or $8, $8, $9 \n\t" \
++ "dmtc1 $8, "#dreg" \n\t"
++
++#define expand8888(reg1,pos) expand8888_##pos(reg1)
++
++#define expand8888_0(reg1) \
++ "xor $f12, $f12, $f12 \n\t" \
++ "punpcklbh $f8, "#reg1", $f12 \n\t"
++
++#define expand8888_1(reg1) \
++ "xor $f12, $f12, $f12 \n\t" \
++ "punpckhbh $f8, "#reg1", $f12 \n\t"
++
++#define expandx888(reg1,pos) \
++ expand8888(reg1,pos) \
++ DMTC1_IMM($f12, 0x00ff000000000000) \
++ "or $f8, $f8, $f12 \n\t"
++
++#define invert_colors(reg1) \
++ DMTC1_IMM($f12, 0xffff0000ffff0000) \
++ "and $f14, "#reg1", $f12 \n\t" \
++ DMTC1_IMM($f12, 0x000000000000ffff) \
++ "and $f16, "#reg1", $f12 \n\t" \
++ DMTC1_IMM($f12, 0x0000ffff00000000) \
++ "and $f18, "#reg1", $f12 \n\t" \
++ MTC1_IMM($f12, 32) \
++ "dsll $f16, $f16, $f12 \n\t" \
++ "dsrl $f18, $f18, $f12 \n\t" \
++ "or $f14, $f14, $f16 \n\t" \
++ "or $f8, $f14, $f18 \n\t"
++
++#define over(reg1,reg2,reg3) \
++ negate(reg2,$f8) \
++ pix_multiply(reg3, $f8)\
++ pix_add(reg1, $f8)
++
++
++#define over_rev_non_pre(reg1,reg2) \
++ expand_alpha(reg1,$f0) \
++ DMTC1_IMM($f12,0x00ff000000000000) \
++ "or $f2, $f0, $f12 \n\t" \
++ invert_colors(reg1) \
++ pix_multiply($f8,$f2) \
++ save_to($f2) \
++ over($f2, $f0, reg2)
++
++#define in(reg1,reg2) pix_multiply(reg1,reg2)
++
++#define in_over_full_src_alpha(reg1,reg2,reg3) \
++ DMTC1_IMM($f12,0x00ff000000000000) \
++ "or $f0, "#reg1", $f12 \n\t" \
++ in($f0,reg2) \
++ save_to($f0) \
++ over($f0,reg2,reg3)
++
++#define in_over(reg1,reg2,reg3,reg4) \
++ in(reg1,reg3) \
++ "mov.d $f0, $f8 \n\t" \
++ pix_multiply(reg2,reg3) \
++ "mov.d $f2, $f8 \n\t" \
++ over($f0,$f2,reg4)
++
++
diff --git a/extra/pygobject/fix-pycairo-capi-declaration.patch b/extra/pygobject/fix-pycairo-capi-declaration.patch new file mode 100644 index 000000000..1f0364c7a --- /dev/null +++ b/extra/pygobject/fix-pycairo-capi-declaration.patch @@ -0,0 +1,17 @@ +--- pygobject-2.28.4.orig/gi/pygi-foreign-cairo.c 2011-04-18 17:36:47.000000000 +0200 ++++ pygobject-2.28.4/gi/pygi-foreign-cairo.c 2011-06-21 20:05:11.015628222 +0200 +@@ -26,12 +26,12 @@ + + #if PY_VERSION_HEX < 0x03000000 + #include <pycairo.h> ++static Pycairo_CAPI_t *Pycairo_CAPI; + #else + #include <pycairo/py3cairo.h> ++#define Pycairo_IMPORT import_cairo() + #endif + +-Pycairo_CAPI_t *Pycairo_CAPI; +- + #include "pygi-foreign.h" + + #include <pyglib-python-compat.h> diff --git a/extra/x264/PKGBUILD b/extra/x264/PKGBUILD index 178359631..48aa32925 100644 --- a/extra/x264/PKGBUILD +++ b/extra/x264/PKGBUILD @@ -18,7 +18,9 @@ md5sums=('7579aff8166a974a1b293cd18b9ead92') build() { cd "$srcdir/$pkgname-snapshot-$pkgver-2245" - ./configure --enable-shared + [ "$CARCH" = "mips64el" ] && extra="--enable-pic" + + ./configure --enable-shared $extra make make DESTDIR="$pkgdir" \ diff --git a/extra/xulrunner/PKGBUILD b/extra/xulrunner/PKGBUILD index 8156af39a..1196fd176 100644 --- a/extra/xulrunner/PKGBUILD +++ b/extra/xulrunner/PKGBUILD @@ -22,24 +22,24 @@ options=('!emptydirs') build() { cd "${srcdir}/icecat-${_ffoxver}/" - cp "${srcdir}/mozconfig" .mozconfig +# cp "${srcdir}/mozconfig" .mozconfig #fix libdir/sdkdir - fedora - patch -Np1 -i "${srcdir}/mozilla-pkgconfig.patch" +# patch -Np1 -i "${srcdir}/mozilla-pkgconfig.patch" #Force installation to the same path for every version - patch -Np1 -i "${srcdir}/xulrunner-version.patch" +# patch -Np1 -i "${srcdir}/xulrunner-version.patch" #https://bugzilla.mozilla.org/show_bug.cgi?id=620931 - patch -Np1 -i "${srcdir}/xulrunner-omnijar.patch" +# patch -Np1 -i "${srcdir}/xulrunner-omnijar.patch" #https://bugzilla.mozilla.org/show_bug.cgi?id=494163 - patch -Np1 -i "${srcdir}/port_gnomevfs_to_gio.patch" +# patch -Np1 -i "${srcdir}/port_gnomevfs_to_gio.patch" - [[ "$CARCH" == "mips64el" ]] && { - echo "ac_add_options --disable-ipc" >> .mozconfig - patch -Np0 -i "${srcdir}/mips.patch" - } +# [[ "$CARCH" == "mips64el" ]] && { +# echo "ac_add_options --disable-ipc" >> .mozconfig +# patch -Np0 -i "${srcdir}/mips.patch" +# } unset CFLAGS unset CXXFLAGS diff --git a/extra/zziplib/PKGBUILD b/extra/zziplib/PKGBUILD index 4783fc5ee..73d728632 100644 --- a/extra/zziplib/PKGBUILD +++ b/extra/zziplib/PKGBUILD @@ -7,7 +7,7 @@ pkgname=zziplib pkgver=0.13.60 pkgrel=1 pkgdesc="A lightweight library that offers the ability to easily extract data from files archived in a single zip file" -arch=('i686' 'x86_64') +arch=('i686' 'x86_64' 'mips64el') url="http://zziplib.sourceforge.net" license=('LGPL' 'MPL') depends=('zlib') diff --git a/libre/ffmpeg-libre/PKGBUILD b/libre/ffmpeg-libre/PKGBUILD index 41c332dca..40bd3a579 100644 --- a/libre/ffmpeg-libre/PKGBUILD +++ b/libre/ffmpeg-libre/PKGBUILD @@ -14,8 +14,10 @@ license=('GPL') depends=('bzip2' 'lame' 'sdl' 'libvorbis' 'xvidcore' 'zlib' 'x264' 'libtheora' 'opencore-amr' 'alsa-lib' 'libvdpau' 'libxfixes' 'schroedinger' 'libvpx' 'libva' 'openjpeg') makedepends=('yasm' 'git') #git clone git://git.videolan.org/ffmpeg.git -source=(ftp://ftp.archlinux.org/other/ffmpeg/ffmpeg-${pkgver}.tar.xz) -md5sums=('dd682a876a496b9f9ae8afb3b3b70389') +source=(ftp://ftp.archlinux.org/other/ffmpeg/ffmpeg-${pkgver}.tar.xz + ffmpeg-loongson.patch) +md5sums=('dd682a876a496b9f9ae8afb3b3b70389' + 'a178dab43d73388543689df4828fb2d2') #source=(http://ffmpeg.org/releases//releases/ffmpeg-${pkgver}.tar.bz2) provides=("ffmpeg=$pkgver") conflicts=('ffmpeg') diff --git a/libre/icecat/PKGBUILD b/libre/icecat/PKGBUILD index 89f6755fb..b18691b41 100644 --- a/libre/icecat/PKGBUILD +++ b/libre/icecat/PKGBUILD @@ -65,6 +65,9 @@ build() { msg2 "Starting build..." cp "${srcdir}/mozconfig" .mozconfig + + [ "$CARCH" = "mips64el" ] && echo "ac_add_options --disable-ipc" >> .mozconfig + unset CFLAGS unset CXXFLAGS |