diff --git a/configure.ac b/configure.ac index 33307db..424b922 100644 --- a/configure.ac +++ b/configure.ac @@ -52,46 +52,10 @@ AC_C_CONST AC_C_INLINE AC_C_BIGENDIAN(AC_MSG_WARN([*** You have a big endian system, Infinity have not been tested on these systems]),,) -AC_CHECK_SIZEOF(long long) -if test x$ac_cv_sizeof_long_long = x8; then - : -else - AC_MSG_ERROR([ -*** Infinity requires a 64 bit long long type. You might want to consider -*** using the GNU C compiler. - ]) -fi - -AC_CHECK_SIZEOF([size_t]) -AC_MSG_CHECKING([whether compiler is 32 or 64 bit]) -if test x$ac_cv_sizeof_size_t = x8; then - AC_MSG_RESULT([64]) - compiler_width=64 -else - AC_MSG_RESULT([32]) - compiler_width=32 -fi - # Check for library functions. AC_CHECK_FUNCS([floor sqrt]) # Arguments to specify certain features. -AC_ARG_ENABLE([mmx], - AS_HELP_STRING([--enable-mmx],[use MMX if available and if building with a 32 bit compiler @<:@default=enabled@:>@]), - [mmx=$enableval], - [mmx=yes]) -AC_MSG_CHECKING([whether to enable MMX]) -if test x$mmx = xyes; then - if test x$compiler_width = x32; then - AC_MSG_RESULT([yes]) - AC_DEFINE([MMX_DETECTION], [1], [Activate MMX Extensions support]) - else - AC_MSG_RESULT([no, was requested but feature is not supported with a 64bit compiler]) - fi -else - AC_MSG_RESULT([no]) -fi - AC_ARG_ENABLE([debug], AS_HELP_STRING([--enable-debug],[turn on debug mode @<:@default=disabled@:>@]), [debug=$enableval], diff --git a/src/Makefile.am b/src/Makefile.am index 509a9e5..af39154 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -20,8 +20,7 @@ libinfinity_la_SOURCES = \ compute.c compute.h \ display.c display.h \ effects.c effects.h \ - cputest.c cputest.h \ - mmx.h music-player.h types.h + music-player.h types.h libinfinite_la_SOURCES = audacious.cc libinfinite_la_LDFLAGS = ${CXX} diff --git a/src/compute.c b/src/compute.c index e5fa447..187f6c0 100644 --- a/src/compute.c +++ b/src/compute.c @@ -19,9 +19,6 @@ #include "compute.h" #include "config.h" #include "types.h" -#ifdef MMX_DETECTION -#include "mmx.h" -#endif typedef struct t_coord { gint32 x, y; @@ -274,48 +271,3 @@ inline byte *compute_surface(t_interpol *vector, gint32 width, gint32 height) return surface1; } - -#if MMX_DETECTION -inline byte *compute_surface_mmx(t_interpol *vector, gint32 width, gint32 height) -{ - /*@unused@*/ - volatile mmx_t mm0, mm1, mm2; - volatile mmx_t offsets, r; - t_interpol *interpol; - gint32 i, j, color; - gint32 add_dest = 0; - guint32 add_src; - register byte *ptr_pix; - byte *ptr_swap; - - for (j = 0; j < height; ++j) - for (i = 0; i < width; ++i) { - interpol = &vector[add_dest]; - add_src = (interpol->coord & 0xFFFF) * width + (interpol->coord >> 16); - ptr_pix = &((byte *)surface1)[add_src]; - ((guint16 *)&offsets)[0] = (guint16) * (ptr_pix + width + 1); - ((guint16 *)&offsets)[1] = (guint16) * (ptr_pix + width); - ((guint16 *)&offsets)[2] = (guint16) * (ptr_pix + 1); - ((guint16 *)&offsets)[3] = (guint16) * (ptr_pix); - /* MMX mode entry */ - movd_m2r(interpol->weight, mm1); - movq_m2r(offsets, mm2); - pxor_r2r(mm0, mm0); - punpcklbw_r2r(mm0, mm1); - pmaddwd_r2r(mm1, mm2); - movq_r2m(mm2, r); - emms(); - /* MMX mode exit */ - color = (((gint32 *)&r)[0] + ((gint32 *)&r)[1]) >> 8; - if (color > 255) - surface2[add_dest] = 255; - else - surface2[add_dest] = (byte)color; - ++add_dest; - } - ptr_swap = surface1; - surface1 = surface2; - surface2 = ptr_swap; - return surface1; -} -#endif /* MMX_DETECTION */ diff --git a/src/compute.h b/src/compute.h index 1b7af0f..46ea150 100644 --- a/src/compute.h +++ b/src/compute.h @@ -73,6 +73,5 @@ void compute_resize(gint32 width, gint32 height); void compute_generate_vector_field(vector_field_t *vector_field); byte *compute_surface(t_interpol *vector, gint32 width, gint32 height); -byte *compute_surface_mmx(t_interpol *vector, gint32 width, gint32 height); #endif /* __INFINITY_COMPUTE__ */ diff --git a/src/cputest.c b/src/cputest.c deleted file mode 100644 index cab9cc0..0000000 --- a/src/cputest.c +++ /dev/null @@ -1,181 +0,0 @@ -#include "config.h" -#include "cputest.h" -#include "glib.h" - -// This code is 32-bit only - -/* ebx saving is necessary for PIC. gcc seems unable to see it alone */ -#if MMX_DETECTION -#define cpuid(index, eax, ebx, ecx, edx) \ - __asm __volatile \ - ("movl %%ebx, %%esi\n\t" \ - "cpuid\n\t" \ - "xchgl %%ebx, %%esi" \ - : "=a" (eax), "=S" (ebx), \ - "=c" (ecx), "=d" (edx) \ - : "0" (index)); -#endif - - -/* Function to test if multimedia instructions are supported... */ -int mm_support(void) -{ -#if MMX_DETECTION - int rval; - int eax, ebx, ecx, edx; - - __asm__ __volatile__ ( - /* See if CPUID instruction is supported ... */ - /* ... Get copies of EFLAGS into eax and ecx */ - "pushf\n\t" - "popl %0\n\t" - "movl %0, %1\n\t" - - /* ... Toggle the ID bit in one copy and store */ - /* to the EFLAGS reg */ - "xorl $0x200000, %0\n\t" - "push %0\n\t" - "popf\n\t" - - /* ... Get the (hopefully modified) EFLAGS */ - "pushf\n\t" - "popl %0\n\t" - : "=a" (eax), "=c" (ecx) - : - : "cc" - ); - - if (eax == ecx) - return 0; /* CPUID not supported */ - - cpuid(0, eax, ebx, ecx, edx); - - if (ebx == 0x756e6547 && - edx == 0x49656e69 && - ecx == 0x6c65746e) { - /* intel */ -inteltest: - cpuid(1, eax, ebx, ecx, edx); - if ((edx & 0x00800000) == 0) - return 0; - rval = MM_MMX; - if (edx & 0x02000000) - rval |= MM_MMXEXT | MM_SSE; - if (edx & 0x04000000) - rval |= MM_SSE2; - return rval; - } else if (ebx == 0x68747541 && - edx == 0x69746e65 && - ecx == 0x444d4163) { - /* AMD */ - cpuid(0x80000000, eax, ebx, ecx, edx); - if ((guint32)eax < 0x80000001) - goto inteltest; - cpuid(0x80000001, eax, ebx, ecx, edx); - if ((edx & 0x00800000) == 0) - return 0; - rval = MM_MMX; - if (edx & 0x80000000) - rval |= MM_3DNOW; - if (edx & 0x00400000) - rval |= MM_MMXEXT; - return rval; - } else if (ebx == 0x746e6543 && - edx == 0x48727561 && - ecx == 0x736c7561) { /* "CentaurHauls" */ - /* VIA C3 */ - cpuid(0x80000000, eax, ebx, ecx, edx); - if ((guint32)eax < 0x80000001) - goto inteltest; - cpuid(0x80000001, eax, ebx, ecx, edx); - rval = 0; - if (edx & (1 << 31)) - rval |= MM_3DNOW; - if (edx & (1 << 23)) - rval |= MM_MMX; - if (edx & (1 << 24)) - rval |= MM_MMXEXT; - return rval; - } else if (ebx == 0x69727943 && - edx == 0x736e4978 && - ecx == 0x64616574) { - /* Cyrix Section */ - /* See if extended CPUID level 80000001 is supported */ - /* The value of CPUID/80000001 for the 6x86MX is undefined - * according to the Cyrix CPU Detection Guide (Preliminary - * Rev. 1.01 table 1), so we'll check the value of eax for - * CPUID/0 to see if standard CPUID level 2 is supported. - * According to the table, the only CPU which supports level - * 2 is also the only one which supports extended CPUID levels. - */ - if (eax != 2) - goto inteltest; - cpuid(0x80000001, eax, ebx, ecx, edx); - if ((eax & 0x00800000) == 0) - return 0; - rval = MM_MMX; - if (eax & 0x01000000) - rval |= MM_MMXEXT; - return rval; - } else { - return 0; - } -#else /* not MMX_DETECTION */ - return 0; -#endif -} - - -int mm_support_check_and_show() -{ - int r; - gchar *msg, *tmp; - - r = mm_support(); - if (r & 0) { - g_message("Infinity: There is not MMX support\n"); - return r; - } - msg = g_strdup("Infinity: Looking for Multimedia Extensions Support..."); - if (r & MM_MMX) { - tmp = g_strconcat(msg, " MMX", NULL); - g_free(msg); - msg = tmp; - } - if (r & MM_3DNOW) { - tmp = g_strconcat(msg, " 3DNOW", NULL); - g_free(msg); - msg = tmp; - } - if (r & MM_MMXEXT) { - tmp = g_strconcat(msg, " MMX2", NULL); - g_free(msg); - msg = tmp; - } -/* - * for now this extensions are not used - * if (r & MM_SSE) { - * tmp = g_strconcat (msg, " SSE", 0); - * g_free (msg); - * msg = tmp; - * } - * if (r & MM_SSE2) { - * tmp = g_strconcat (msg, " SSE2", 0); - * g_free (msg); - * msg = tmp; - * } - */ - tmp = g_strconcat(msg, " detected", NULL); - g_free(msg); - msg = tmp; - g_message("%s", msg); - g_free(msg); - - return r; -} - - -int mmx_ok(void) -{ - return mm_support() & 0x1; -} diff --git a/src/cputest.h b/src/cputest.h deleted file mode 100644 index 9f1143d..0000000 --- a/src/cputest.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Cpu detection code, extracted from mmx.h ((c)1997-99 by H. Dietz - * and R. Fisher). Converted to C and improved by Fabrice Bellard - */ -#ifndef _CPUTEST_H_ -#define _CPUTEST_H_ - - -#define MM_MMX 0x0001 /* standard MMX */ -#define MM_3DNOW 0x0004 /* AMD 3DNOW */ -#define MM_MMXEXT 0x0002 /* SSE integer functions or AMD MMX ext */ -#define MM_SSE 0x0008 /* SSE functions */ -#define MM_SSE2 0x0010 /* PIV SSE2 functions */ - -/* should be defined by architectures supporting - * one or more MultiMedia extension */ -int mm_support(void); - -/* return the result of mm_support and show the results - * to stdout */ -int mm_support_check_and_show(void); - -/* test if mmx instructions are supported... - * returns 1 if MMX instructions are supported, 0 otherwise */ -int mmx_ok(void); - -extern unsigned int mm_flags; - -#endif /* _CPUTEST_H_ */ diff --git a/src/display.c b/src/display.c index b23d11b..0252b55 100644 --- a/src/display.c +++ b/src/display.c @@ -313,15 +313,6 @@ inline void display_blur(guint32 vector_index) display_surface(); } -#if MMX_DETECTION -inline void display_blur_mmx(guint32 vector_index) -{ - surface1 = compute_surface_mmx(&(vector_field->vector[vector_index]), - vector_field->width, vector_field->height); - display_surface(); -} -#endif - void spectral(t_effect *current_effect) { gint32 i, halfheight, halfwidth; diff --git a/src/display.h b/src/display.h index 64d1b15..82b4972 100644 --- a/src/display.h +++ b/src/display.h @@ -72,7 +72,6 @@ void display_show(void); void change_color(gint32 old_p, gint32 p, gint32 w); void display_blur(guint32 vector_index); -void display_blur_mmx(guint32 vector_index); void spectral(t_effect *current_effect); void curve(t_effect *current_effect); diff --git a/src/infinity.c b/src/infinity.c index 116549a..c26231f 100644 --- a/src/infinity.c +++ b/src/infinity.c @@ -27,10 +27,6 @@ #include "infinity.h" #include "types.h" -#if MMX_DETECTION -#include "cputest.h" -#endif - #define wrap(a) (a < 0 ? 0 : (a > 255 ? 255 : a)) #define next_effect() (t_last_effect++) #define next_color() (t_last_color++) @@ -313,15 +309,11 @@ static int renderer(void *arg) gint32 frame_length; gint32 fps, new_fps; gint32 t_between_effects, t_between_colors; - gint32 has_mmx = 0; fps = params->get_max_fps(); frame_length = calculate_frame_length_usecs(fps, __LINE__); t_between_effects = params->get_effect_interval(); t_between_colors = params->get_color_interval(); -#if MMX_DETECTION - has_mmx = mm_support_check_and_show(); -#endif initializing = FALSE; for (;; ) { /* ever... */ if (!visible) { @@ -347,10 +339,7 @@ static int renderer(void *arg) G_UNLOCK(resizing); } t_begin = g_get_monotonic_time(); - if (has_mmx) - display_blur_mmx(width * height * current_effect.num_effect); - else - display_blur(width * height * current_effect.num_effect); + display_blur(width * height * current_effect.num_effect); spectral(¤t_effect); curve(¤t_effect); if (t_last_color <= 32) diff --git a/src/mmx.h b/src/mmx.h deleted file mode 100644 index 60bc5ac..0000000 --- a/src/mmx.h +++ /dev/null @@ -1,537 +0,0 @@ -/* mmx.h - * - * MultiMedia eXtensions GCC interface library for IA32. - * - * To use this library, simply include this header file - * and compile with GCC. You MUST have inlining enabled - * in order for mmx_ok() to work; this can be done by - * simply using -O on the GCC command line. - * - * Compiling with -DMMX_TRACE will cause detailed trace - * output to be sent to stderr for each mmx operation. - * This adds lots of code, and obviously slows execution to - * a crawl, but can be very useful for debugging. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT - * LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR ANY PARTICULAR PURPOSE. - * - * 1997-99 by H. Dietz and R. Fisher - * - * Notes: - * It appears that the latest gas has the pand problem fixed, therefore - * I'll undefine BROKEN_PAND by default. - */ - -#ifndef _MMX_H_ -#define _MMX_H_ - - -/* Warning: at this writing, the version of GAS packaged - * with most Linux distributions does not handle the - * parallel AND operation mnemonic correctly. If the - * symbol BROKEN_PAND is defined, a slower alternative - * coding will be used. If execution of mmxtest results - * in an illegal instruction fault, define this symbol. - */ -#undef BROKEN_PAND - - -/* The type of an value that fits in an MMX register - * (note that long long constant values MUST be suffixed - * by LL and unsigned long long values by ULL, lest - * they be truncated by the compiler) - */ -typedef union { - gint64 q; /* Quadword (64-bit) value */ - guint64 uq; /* Unsigned Quadword */ - gint32 d[2]; /* 2 Doubleword (32-bit) values */ - guint32 ud[2]; /* 2 Unsigned Doubleword */ - gint16 w[4]; /* 4 Word (16-bit) values */ - guint16 uw[4]; /* 4 Unsigned Word */ - gchar b[8]; /* 8 Byte (8-bit) values */ - guchar ub[8]; /* 8 Unsigned Byte */ - gfloat s[2]; /* Single-precision (32-bit) value */ -} __attribute__ ((aligned(8))) mmx_t; /* On an 8-byte (64-bit) boundary */ - - - -/* Helper functions for the instruction macros that follow... - * (note that memory-to-register, m2r, instructions are nearly - * as efficient as register-to-register, r2r, instructions; - * however, memory-to-memory instructions are really simulated - * as a convenience, and are only 1/3 as efficient) - */ -#ifdef MMX_TRACE - -/* Include the stuff for printing a trace to stderr... - */ - -#include - -#define mmx_i2r(op, imm, reg) \ - { \ - mmx_t mmx_trace; \ - mmx_trace.uq = (imm); \ - printf(#op "_i2r(" #imm "=0x%08x%08x, ", \ - mmx_trace.d[1], mmx_trace.d[0]); \ - __asm__ __volatile__ ("movq %%" #reg ", %0" \ - : "=X" (mmx_trace) \ - : /* nothing */); \ - printf(#reg "=0x%08x%08x) => ", \ - mmx_trace.d[1], mmx_trace.d[0]); \ - __asm__ __volatile__ (#op " %0, %%" #reg \ - : /* nothing */ \ - : "X" (imm)); \ - __asm__ __volatile__ ("movq %%" #reg ", %0" \ - : "=X" (mmx_trace) \ - : /* nothing */); \ - printf(#reg "=0x%08x%08x\n", \ - mmx_trace.d[1], mmx_trace.d[0]); \ - } - -#define mmx_m2r(op, mem, reg) \ - { \ - mmx_t mmx_trace; \ - mmx_trace = (mem); \ - printf(#op "_m2r(" #mem "=0x%08x%08x, ", \ - mmx_trace.d[1], mmx_trace.d[0]); \ - __asm__ __volatile__ ("movq %%" #reg ", %0" \ - : "=X" (mmx_trace) \ - : /* nothing */); \ - printf(#reg "=0x%08x%08x) => ", \ - mmx_trace.d[1], mmx_trace.d[0]); \ - __asm__ __volatile__ (#op " %0, %%" #reg \ - : /* nothing */ \ - : "X" (mem)); \ - __asm__ __volatile__ ("movq %%" #reg ", %0" \ - : "=X" (mmx_trace) \ - : /* nothing */); \ - printf(#reg "=0x%08x%08x\n", \ - mmx_trace.d[1], mmx_trace.d[0]); \ - } - -#define mmx_r2m(op, reg, mem) \ - { \ - mmx_t mmx_trace; \ - __asm__ __volatile__ ("movq %%" #reg ", %0" \ - : "=X" (mmx_trace) \ - : /* nothing */); \ - printf(#op "_r2m(" #reg "=0x%08x%08x, ", \ - mmx_trace.d[1], mmx_trace.d[0]); \ - mmx_trace = (mem); \ - printf(#mem "=0x%08x%08x) => ", \ - mmx_trace.d[1], mmx_trace.d[0]); \ - __asm__ __volatile__ (#op " %%" #reg ", %0" \ - : "=X" (mem) \ - : /* nothing */); \ - mmx_trace = (mem); \ - printf(#mem "=0x%08x%08x\n", \ - mmx_trace.d[1], mmx_trace.d[0]); \ - } - -#define mmx_r2r(op, regs, regd) \ - { \ - mmx_t mmx_trace; \ - __asm__ __volatile__ ("movq %%" #regs ", %0" \ - : "=X" (mmx_trace) \ - : /* nothing */); \ - printf(#op "_r2r(" #regs "=0x%08x%08x, ", \ - mmx_trace.d[1], mmx_trace.d[0]); \ - __asm__ __volatile__ ("movq %%" #regd ", %0" \ - : "=X" (mmx_trace) \ - : /* nothing */); \ - printf(#regd "=0x%08x%08x) => ", \ - mmx_trace.d[1], mmx_trace.d[0]); \ - __asm__ __volatile__ (#op " %" #regs ", %" #regd); \ - __asm__ __volatile__ ("movq %%" #regd ", %0" \ - : "=X" (mmx_trace) \ - : /* nothing */); \ - printf(#regd "=0x%08x%08x\n", \ - mmx_trace.d[1], mmx_trace.d[0]); \ - } - -#define mmx_m2m(op, mems, memd) \ - { \ - mmx_t mmx_trace; \ - mmx_trace = (mems); \ - printf(#op "_m2m(" #mems "=0x%08x%08x, ", \ - mmx_trace.d[1], mmx_trace.d[0]); \ - mmx_trace = (memd); \ - printf(#memd "=0x%08x%08x) => ", \ - mmx_trace.d[1], mmx_trace.d[0]); \ - __asm__ __volatile__ ("movq %0, %%mm0\n\t" \ - #op " %1, %%mm0\n\t" \ - "movq %%mm0, %0" \ - : "=X" (memd) \ - : "X" (mems)); \ - mmx_trace = (memd); \ - printf(#memd "=0x%08x%08x\n", \ - mmx_trace.d[1], mmx_trace.d[0]); \ - } - -#else - -/* These macros are a lot simpler without the tracing... - */ - -#define mmx_i2r(op, imm, reg) \ - __asm__ __volatile__ (#op " %0, %%" #reg \ - : /* nothing */ \ - : "X" (imm)) - -#define mmx_m2r(op, mem, reg) \ - __asm__ __volatile__ (#op " %0, %%" #reg \ - : /* nothing */ \ - : "X" (mem)) - -#define mmx_r2m(op, reg, mem) \ - __asm__ __volatile__ (#op " %%" #reg ", %0" \ - : "=X" (mem) \ - : /* nothing */) - -#define mmx_r2r(op, regs, regd) \ - __asm__ __volatile__ (#op " %" #regs ", %" #regd) - -#define mmx_m2m(op, mems, memd) \ - __asm__ __volatile__ ("movq %0, %%mm0\n\t" \ - #op " %1, %%mm0\n\t" \ - "movq %%mm0, %0" \ - : "=X" (memd) \ - : "X" (mems)) - -#endif - - -/* 1x64 MOVe Quadword - * (this is both a load and a store... - * in fact, it is the only way to store) - */ -#define movq_m2r(var, reg) mmx_m2r(movq, var, reg) -#define movq_r2m(reg, var) mmx_r2m(movq, reg, var) -#define movq_r2r(regs, regd) mmx_r2r(movq, regs, regd) -#define movq(vars, vard) \ - __asm__ __volatile__ ("movq %1, %%mm0\n\t" \ - "movq %%mm0, %0" \ - : "=X" (vard) \ - : "X" (vars)) - - -/* 1x32 MOVe Doubleword - * (like movq, this is both load and store... - * but is most useful for moving things between - * mmx registers and ordinary registers) - */ -#define movd_m2r(var, reg) mmx_m2r(movd, var, reg) -#define movd_r2m(reg, var) mmx_r2m(movd, reg, var) -#define movd_r2r(regs, regd) mmx_r2r(movd, regs, regd) -#define movd(vars, vard) \ - __asm__ __volatile__ ("movd %1, %%mm0\n\t" \ - "movd %%mm0, %0" \ - : "=X" (vard) \ - : "X" (vars)) - - -/* 2x32, 4x16, and 8x8 Parallel ADDs - */ -#define paddd_m2r(var, reg) mmx_m2r(paddd, var, reg) -#define paddd_r2r(regs, regd) mmx_r2r(paddd, regs, regd) -#define paddd(vars, vard) mmx_m2m(paddd, vars, vard) - -#define paddw_m2r(var, reg) mmx_m2r(paddw, var, reg) -#define paddw_r2r(regs, regd) mmx_r2r(paddw, regs, regd) -#define paddw(vars, vard) mmx_m2m(paddw, vars, vard) - -#define paddb_m2r(var, reg) mmx_m2r(paddb, var, reg) -#define paddb_r2r(regs, regd) mmx_r2r(paddb, regs, regd) -#define paddb(vars, vard) mmx_m2m(paddb, vars, vard) - - -/* 4x16 and 8x8 Parallel ADDs using Saturation arithmetic - */ -#define paddsw_m2r(var, reg) mmx_m2r(paddsw, var, reg) -#define paddsw_r2r(regs, regd) mmx_r2r(paddsw, regs, regd) -#define paddsw(vars, vard) mmx_m2m(paddsw, vars, vard) - -#define paddsb_m2r(var, reg) mmx_m2r(paddsb, var, reg) -#define paddsb_r2r(regs, regd) mmx_r2r(paddsb, regs, regd) -#define paddsb(vars, vard) mmx_m2m(paddsb, vars, vard) - - -/* 4x16 and 8x8 Parallel ADDs using Unsigned Saturation arithmetic - */ -#define paddusw_m2r(var, reg) mmx_m2r(paddusw, var, reg) -#define paddusw_r2r(regs, regd) mmx_r2r(paddusw, regs, regd) -#define paddusw(vars, vard) mmx_m2m(paddusw, vars, vard) - -#define paddusb_m2r(var, reg) mmx_m2r(paddusb, var, reg) -#define paddusb_r2r(regs, regd) mmx_r2r(paddusb, regs, regd) -#define paddusb(vars, vard) mmx_m2m(paddusb, vars, vard) - - -/* 2x32, 4x16, and 8x8 Parallel SUBs - */ -#define psubd_m2r(var, reg) mmx_m2r(psubd, var, reg) -#define psubd_r2r(regs, regd) mmx_r2r(psubd, regs, regd) -#define psubd(vars, vard) mmx_m2m(psubd, vars, vard) - -#define psubw_m2r(var, reg) mmx_m2r(psubw, var, reg) -#define psubw_r2r(regs, regd) mmx_r2r(psubw, regs, regd) -#define psubw(vars, vard) mmx_m2m(psubw, vars, vard) - -#define psubb_m2r(var, reg) mmx_m2r(psubb, var, reg) -#define psubb_r2r(regs, regd) mmx_r2r(psubb, regs, regd) -#define psubb(vars, vard) mmx_m2m(psubb, vars, vard) - - -/* 4x16 and 8x8 Parallel SUBs using Saturation arithmetic - */ -#define psubsw_m2r(var, reg) mmx_m2r(psubsw, var, reg) -#define psubsw_r2r(regs, regd) mmx_r2r(psubsw, regs, regd) -#define psubsw(vars, vard) mmx_m2m(psubsw, vars, vard) - -#define psubsb_m2r(var, reg) mmx_m2r(psubsb, var, reg) -#define psubsb_r2r(regs, regd) mmx_r2r(psubsb, regs, regd) -#define psubsb(vars, vard) mmx_m2m(psubsb, vars, vard) - - -/* 4x16 and 8x8 Parallel SUBs using Unsigned Saturation arithmetic - */ -#define psubusw_m2r(var, reg) mmx_m2r(psubusw, var, reg) -#define psubusw_r2r(regs, regd) mmx_r2r(psubusw, regs, regd) -#define psubusw(vars, vard) mmx_m2m(psubusw, vars, vard) - -#define psubusb_m2r(var, reg) mmx_m2r(psubusb, var, reg) -#define psubusb_r2r(regs, regd) mmx_r2r(psubusb, regs, regd) -#define psubusb(vars, vard) mmx_m2m(psubusb, vars, vard) - - -/* 4x16 Parallel MULs giving Low 4x16 portions of results - */ -#define pmullw_m2r(var, reg) mmx_m2r(pmullw, var, reg) -#define pmullw_r2r(regs, regd) mmx_r2r(pmullw, regs, regd) -#define pmullw(vars, vard) mmx_m2m(pmullw, vars, vard) - - -/* 4x16 Parallel MULs giving High 4x16 portions of results - */ -#define pmulhw_m2r(var, reg) mmx_m2r(pmulhw, var, reg) -#define pmulhw_r2r(regs, regd) mmx_r2r(pmulhw, regs, regd) -#define pmulhw(vars, vard) mmx_m2m(pmulhw, vars, vard) - - -/* 4x16->2x32 Parallel Mul-ADD - * (muls like pmullw, then adds adjacent 16-bit fields - * in the multiply result to make the final 2x32 result) - */ -#define pmaddwd_m2r(var, reg) mmx_m2r(pmaddwd, var, reg) -#define pmaddwd_r2r(regs, regd) mmx_r2r(pmaddwd, regs, regd) -#define pmaddwd(vars, vard) mmx_m2m(pmaddwd, vars, vard) - - -/* 1x64 bitwise AND - */ -#ifdef BROKEN_PAND -#define pand_m2r(var, reg) \ - { \ - mmx_m2r(pandn, (mmx_t)-1LL, reg); \ - mmx_m2r(pandn, var, reg); \ - } -#define pand_r2r(regs, regd) \ - { \ - mmx_m2r(pandn, (mmx_t)-1LL, regd); \ - mmx_r2r(pandn, regs, regd) \ - } -#define pand(vars, vard) \ - { \ - movq_m2r(vard, mm0); \ - mmx_m2r(pandn, (mmx_t)-1LL, mm0); \ - mmx_m2r(pandn, vars, mm0); \ - movq_r2m(mm0, vard); \ - } -#else -#define pand_m2r(var, reg) mmx_m2r(pand, var, reg) -#define pand_r2r(regs, regd) mmx_r2r(pand, regs, regd) -#define pand(vars, vard) mmx_m2m(pand, vars, vard) -#endif - - -/* 1x64 bitwise AND with Not the destination - */ -#define pandn_m2r(var, reg) mmx_m2r(pandn, var, reg) -#define pandn_r2r(regs, regd) mmx_r2r(pandn, regs, regd) -#define pandn(vars, vard) mmx_m2m(pandn, vars, vard) - - -/* 1x64 bitwise OR - */ -#define por_m2r(var, reg) mmx_m2r(por, var, reg) -#define por_r2r(regs, regd) mmx_r2r(por, regs, regd) -#define por(vars, vard) mmx_m2m(por, vars, vard) - - -/* 1x64 bitwise eXclusive OR - */ -#define pxor_m2r(var, reg) mmx_m2r(pxor, var, reg) -#define pxor_r2r(regs, regd) mmx_r2r(pxor, regs, regd) -#define pxor(vars, vard) mmx_m2m(pxor, vars, vard) - - -/* 2x32, 4x16, and 8x8 Parallel CoMPare for EQuality - * (resulting fields are either 0 or -1) - */ -#define pcmpeqd_m2r(var, reg) mmx_m2r(pcmpeqd, var, reg) -#define pcmpeqd_r2r(regs, regd) mmx_r2r(pcmpeqd, regs, regd) -#define pcmpeqd(vars, vard) mmx_m2m(pcmpeqd, vars, vard) - -#define pcmpeqw_m2r(var, reg) mmx_m2r(pcmpeqw, var, reg) -#define pcmpeqw_r2r(regs, regd) mmx_r2r(pcmpeqw, regs, regd) -#define pcmpeqw(vars, vard) mmx_m2m(pcmpeqw, vars, vard) - -#define pcmpeqb_m2r(var, reg) mmx_m2r(pcmpeqb, var, reg) -#define pcmpeqb_r2r(regs, regd) mmx_r2r(pcmpeqb, regs, regd) -#define pcmpeqb(vars, vard) mmx_m2m(pcmpeqb, vars, vard) - - -/* 2x32, 4x16, and 8x8 Parallel CoMPare for Greater Than - * (resulting fields are either 0 or -1) - */ -#define pcmpgtd_m2r(var, reg) mmx_m2r(pcmpgtd, var, reg) -#define pcmpgtd_r2r(regs, regd) mmx_r2r(pcmpgtd, regs, regd) -#define pcmpgtd(vars, vard) mmx_m2m(pcmpgtd, vars, vard) - -#define pcmpgtw_m2r(var, reg) mmx_m2r(pcmpgtw, var, reg) -#define pcmpgtw_r2r(regs, regd) mmx_r2r(pcmpgtw, regs, regd) -#define pcmpgtw(vars, vard) mmx_m2m(pcmpgtw, vars, vard) - -#define pcmpgtb_m2r(var, reg) mmx_m2r(pcmpgtb, var, reg) -#define pcmpgtb_r2r(regs, regd) mmx_r2r(pcmpgtb, regs, regd) -#define pcmpgtb(vars, vard) mmx_m2m(pcmpgtb, vars, vard) - - -/* 1x64, 2x32, and 4x16 Parallel Shift Left Logical - */ -#define psllq_i2r(imm, reg) mmx_i2r(psllq, imm, reg) -#define psllq_m2r(var, reg) mmx_m2r(psllq, var, reg) -#define psllq_r2r(regs, regd) mmx_r2r(psllq, regs, regd) -#define psllq(vars, vard) mmx_m2m(psllq, vars, vard) - -#define pslld_i2r(imm, reg) mmx_i2r(pslld, imm, reg) -#define pslld_m2r(var, reg) mmx_m2r(pslld, var, reg) -#define pslld_r2r(regs, regd) mmx_r2r(pslld, regs, regd) -#define pslld(vars, vard) mmx_m2m(pslld, vars, vard) - -#define psllw_i2r(imm, reg) mmx_i2r(psllw, imm, reg) -#define psllw_m2r(var, reg) mmx_m2r(psllw, var, reg) -#define psllw_r2r(regs, regd) mmx_r2r(psllw, regs, regd) -#define psllw(vars, vard) mmx_m2m(psllw, vars, vard) - - -/* 1x64, 2x32, and 4x16 Parallel Shift Right Logical - */ -#define psrlq_i2r(imm, reg) mmx_i2r(psrlq, imm, reg) -#define psrlq_m2r(var, reg) mmx_m2r(psrlq, var, reg) -#define psrlq_r2r(regs, regd) mmx_r2r(psrlq, regs, regd) -#define psrlq(vars, vard) mmx_m2m(psrlq, vars, vard) - -#define psrld_i2r(imm, reg) mmx_i2r(psrld, imm, reg) -#define psrld_m2r(var, reg) mmx_m2r(psrld, var, reg) -#define psrld_r2r(regs, regd) mmx_r2r(psrld, regs, regd) -#define psrld(vars, vard) mmx_m2m(psrld, vars, vard) - -#define psrlw_i2r(imm, reg) mmx_i2r(psrlw, imm, reg) -#define psrlw_m2r(var, reg) mmx_m2r(psrlw, var, reg) -#define psrlw_r2r(regs, regd) mmx_r2r(psrlw, regs, regd) -#define psrlw(vars, vard) mmx_m2m(psrlw, vars, vard) - - -/* 2x32 and 4x16 Parallel Shift Right Arithmetic - */ -#define psrad_i2r(imm, reg) mmx_i2r(psrad, imm, reg) -#define psrad_m2r(var, reg) mmx_m2r(psrad, var, reg) -#define psrad_r2r(regs, regd) mmx_r2r(psrad, regs, regd) -#define psrad(vars, vard) mmx_m2m(psrad, vars, vard) - -#define psraw_i2r(imm, reg) mmx_i2r(psraw, imm, reg) -#define psraw_m2r(var, reg) mmx_m2r(psraw, var, reg) -#define psraw_r2r(regs, regd) mmx_r2r(psraw, regs, regd) -#define psraw(vars, vard) mmx_m2m(psraw, vars, vard) - - -/* 2x32->4x16 and 4x16->8x8 PACK and Signed Saturate - * (packs source and dest fields into dest in that order) - */ -#define packssdw_m2r(var, reg) mmx_m2r(packssdw, var, reg) -#define packssdw_r2r(regs, regd) mmx_r2r(packssdw, regs, regd) -#define packssdw(vars, vard) mmx_m2m(packssdw, vars, vard) - -#define packsswb_m2r(var, reg) mmx_m2r(packsswb, var, reg) -#define packsswb_r2r(regs, regd) mmx_r2r(packsswb, regs, regd) -#define packsswb(vars, vard) mmx_m2m(packsswb, vars, vard) - - -/* 4x16->8x8 PACK and Unsigned Saturate - * (packs source and dest fields into dest in that order) - */ -#define packuswb_m2r(var, reg) mmx_m2r(packuswb, var, reg) -#define packuswb_r2r(regs, regd) mmx_r2r(packuswb, regs, regd) -#define packuswb(vars, vard) mmx_m2m(packuswb, vars, vard) - - -/* 2x32->1x64, 4x16->2x32, and 8x8->4x16 UNPaCK Low - * (interleaves low half of dest with low half of source - * as padding in each result field) - */ -#define punpckldq_m2r(var, reg) mmx_m2r(punpckldq, var, reg) -#define punpckldq_r2r(regs, regd) mmx_r2r(punpckldq, regs, regd) -#define punpckldq(vars, vard) mmx_m2m(punpckldq, vars, vard) - -#define punpcklwd_m2r(var, reg) mmx_m2r(punpcklwd, var, reg) -#define punpcklwd_r2r(regs, regd) mmx_r2r(punpcklwd, regs, regd) -#define punpcklwd(vars, vard) mmx_m2m(punpcklwd, vars, vard) - -#define punpcklbw_m2r(var, reg) mmx_m2r(punpcklbw, var, reg) -#define punpcklbw_r2r(regs, regd) mmx_r2r(punpcklbw, regs, regd) -#define punpcklbw(vars, vard) mmx_m2m(punpcklbw, vars, vard) - - -/* 2x32->1x64, 4x16->2x32, and 8x8->4x16 UNPaCK High - * (interleaves high half of dest with high half of source - * as padding in each result field) - */ -#define punpckhdq_m2r(var, reg) mmx_m2r(punpckhdq, var, reg) -#define punpckhdq_r2r(regs, regd) mmx_r2r(punpckhdq, regs, regd) -#define punpckhdq(vars, vard) mmx_m2m(punpckhdq, vars, vard) - -#define punpckhwd_m2r(var, reg) mmx_m2r(punpckhwd, var, reg) -#define punpckhwd_r2r(regs, regd) mmx_r2r(punpckhwd, regs, regd) -#define punpckhwd(vars, vard) mmx_m2m(punpckhwd, vars, vard) - -#define punpckhbw_m2r(var, reg) mmx_m2r(punpckhbw, var, reg) -#define punpckhbw_r2r(regs, regd) mmx_r2r(punpckhbw, regs, regd) -#define punpckhbw(vars, vard) mmx_m2m(punpckhbw, vars, vard) - - -/* Empty MMx State - * (used to clean-up when going from mmx to float use - * of the registers that are shared by both; note that - * there is no float-to-mmx operation needed, because - * only the float tag word info is corruptible) - */ -#ifdef MMX_TRACE - -#define emms() \ - { \ - printf("emms()\n"); \ - __asm__ __volatile__ ("emms"); \ - } - -#else - -#define emms() __asm__ __volatile__ ("emms") - -#endif - - -#endif /* _MMX_H_ */