|
|
|
@ -206,6 +206,7 @@ static void EndThread( vdec_thread_t *p_vdec ) |
|
|
|
/*******************************************************************************
|
|
|
|
* AddBlock : add a block |
|
|
|
*******************************************************************************/ |
|
|
|
#ifndef HAVE_MMX |
|
|
|
static __inline__ void AddBlock( vdec_thread_t * p_vdec, dctelem_t * p_block, |
|
|
|
yuv_data_t * p_data, int i_incr ) |
|
|
|
{ |
|
|
|
@ -221,10 +222,102 @@ static __inline__ void AddBlock( vdec_thread_t * p_vdec, dctelem_t * p_block, |
|
|
|
p_data += i_incr; |
|
|
|
} |
|
|
|
} |
|
|
|
#else |
|
|
|
static __inline__ void AddBlock( vdec_thread_t * p_vdec, dctelem_t * p_block, |
|
|
|
yuv_data_t * p_data, int i_incr ) |
|
|
|
{ |
|
|
|
asm __volatile__ ( |
|
|
|
"pxor %%mm7,%%mm7\n\t" |
|
|
|
|
|
|
|
"movq (%0),%%mm1\n\t" |
|
|
|
"movq %%mm1,%%mm2\n\t" |
|
|
|
"punpckhbw %%mm7,%%mm1\n\t" |
|
|
|
"punpcklbw %%mm7,%%mm2\n\t" |
|
|
|
"paddw (%1),%%mm2\n\t" |
|
|
|
"paddw 8(%1),%%mm1\n\t" |
|
|
|
"packuswb %%mm1,%%mm2\n\t" |
|
|
|
"movq %%mm2,(%0)\n\t" |
|
|
|
"addl %2,%0\n\t" |
|
|
|
|
|
|
|
"movq (%0),%%mm1\n\t" |
|
|
|
"movq %%mm1,%%mm2\n\t" |
|
|
|
"punpckhbw %%mm7,%%mm1\n\t" |
|
|
|
"punpcklbw %%mm7,%%mm2\n\t" |
|
|
|
"paddw 16(%1),%%mm2\n\t" |
|
|
|
"paddw 24(%1),%%mm1\n\t" |
|
|
|
"packuswb %%mm1,%%mm2\n\t" |
|
|
|
"movq %%mm2,(%0)\n\t" |
|
|
|
"addl %2,%0\n\t" |
|
|
|
|
|
|
|
"movq (%0),%%mm1\n\t" |
|
|
|
"movq %%mm1,%%mm2\n\t" |
|
|
|
"punpckhbw %%mm7,%%mm1\n\t" |
|
|
|
"punpcklbw %%mm7,%%mm2\n\t" |
|
|
|
"paddw 32(%1),%%mm2\n\t" |
|
|
|
"paddw 40(%1),%%mm1\n\t" |
|
|
|
"packuswb %%mm1,%%mm2\n\t" |
|
|
|
"movq %%mm2,(%0)\n\t" |
|
|
|
"addl %2,%0\n\t" |
|
|
|
|
|
|
|
"movq (%0),%%mm1\n\t" |
|
|
|
"movq %%mm1,%%mm2\n\t" |
|
|
|
"punpckhbw %%mm7,%%mm1\n\t" |
|
|
|
"punpcklbw %%mm7,%%mm2\n\t" |
|
|
|
"paddw 48(%1),%%mm2\n\t" |
|
|
|
"paddw 56(%1),%%mm1\n\t" |
|
|
|
"packuswb %%mm1,%%mm2\n\t" |
|
|
|
"movq %%mm2,(%0)\n\t" |
|
|
|
"addl %2,%0\n\t" |
|
|
|
|
|
|
|
"movq (%0),%%mm1\n\t" |
|
|
|
"movq %%mm1,%%mm2\n\t" |
|
|
|
"punpckhbw %%mm7,%%mm1\n\t" |
|
|
|
"punpcklbw %%mm7,%%mm2\n\t" |
|
|
|
"paddw 64(%1),%%mm2\n\t" |
|
|
|
"paddw 72(%1),%%mm1\n\t" |
|
|
|
"packuswb %%mm1,%%mm2\n\t" |
|
|
|
"movq %%mm2,(%0)\n\t" |
|
|
|
"addl %2,%0\n\t" |
|
|
|
|
|
|
|
"movq (%0),%%mm1\n\t" |
|
|
|
"movq %%mm1,%%mm2\n\t" |
|
|
|
"punpckhbw %%mm7,%%mm1\n\t" |
|
|
|
"punpcklbw %%mm7,%%mm2\n\t" |
|
|
|
"paddw 80(%1),%%mm2\n\t" |
|
|
|
"paddw 88(%1),%%mm1\n\t" |
|
|
|
"packuswb %%mm1,%%mm2\n\t" |
|
|
|
"movq %%mm2,(%0)\n\t" |
|
|
|
"addl %2,%0\n\t" |
|
|
|
|
|
|
|
"movq (%0),%%mm1\n\t" |
|
|
|
"movq %%mm1,%%mm2\n\t" |
|
|
|
"punpckhbw %%mm7,%%mm1\n\t" |
|
|
|
"punpcklbw %%mm7,%%mm2\n\t" |
|
|
|
"paddw 96(%1),%%mm2\n\t" |
|
|
|
"paddw 104(%1),%%mm1\n\t" |
|
|
|
"packuswb %%mm1,%%mm2\n\t" |
|
|
|
"movq %%mm2,(%0)\n\t" |
|
|
|
"addl %2,%0\n\t" |
|
|
|
|
|
|
|
"movq (%0),%%mm1\n\t" |
|
|
|
"movq %%mm1,%%mm2\n\t" |
|
|
|
"punpckhbw %%mm7,%%mm1\n\t" |
|
|
|
"punpcklbw %%mm7,%%mm2\n\t" |
|
|
|
"paddw 112(%1),%%mm2\n\t" |
|
|
|
"paddw 120(%1),%%mm1\n\t" |
|
|
|
"packuswb %%mm1,%%mm2\n\t" |
|
|
|
"movq %%mm2,(%0)\n\t" |
|
|
|
|
|
|
|
"emms" |
|
|
|
:"+r" (p_data): "r" (p_block),"r" (i_incr+8)); |
|
|
|
} |
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
/*******************************************************************************
|
|
|
|
* CopyBlock : copy a block |
|
|
|
*******************************************************************************/ |
|
|
|
#ifndef HAVE_MMX |
|
|
|
static __inline__ void CopyBlock( vdec_thread_t * p_vdec, dctelem_t * p_block, |
|
|
|
yuv_data_t * p_data, int i_incr ) |
|
|
|
{ |
|
|
|
@ -239,6 +332,54 @@ static __inline__ void CopyBlock( vdec_thread_t * p_vdec, dctelem_t * p_block, |
|
|
|
p_data += i_incr; |
|
|
|
} |
|
|
|
} |
|
|
|
#else |
|
|
|
static __inline__ void CopyBlock( vdec_thread_t * p_vdec, dctelem_t * p_block, |
|
|
|
yuv_data_t * p_data, int i_incr ) |
|
|
|
{ |
|
|
|
asm __volatile__ ( |
|
|
|
"movq (%1),%%mm0\n\t" |
|
|
|
"packuswb 8(%1),%%mm0\n\t" |
|
|
|
"movq %%mm0,(%0)\n\t" |
|
|
|
"addl %2,%0\n\t" |
|
|
|
|
|
|
|
"movq 16(%1),%%mm0\n\t" |
|
|
|
"packuswb 24(%1),%%mm0\n\t" |
|
|
|
"movq %%mm0,(%0)\n\t" |
|
|
|
"addl %2,%0\n\t" |
|
|
|
|
|
|
|
"movq 32(%1),%%mm0\n\t" |
|
|
|
"packuswb 40(%1),%%mm0\n\t" |
|
|
|
"movq %%mm0,(%0)\n\t" |
|
|
|
"addl %2,%0\n\t" |
|
|
|
|
|
|
|
"movq 48(%1),%%mm0\n\t" |
|
|
|
"packuswb 56(%1),%%mm0\n\t" |
|
|
|
"movq %%mm0,(%0)\n\t" |
|
|
|
"addl %2,%0\n\t" |
|
|
|
|
|
|
|
"movq 64(%1),%%mm0\n\t" |
|
|
|
"packuswb 72(%1),%%mm0\n\t" |
|
|
|
"movq %%mm0,(%0)\n\t" |
|
|
|
"addl %2,%0\n\t" |
|
|
|
|
|
|
|
"movq 80(%1),%%mm0\n\t" |
|
|
|
"packuswb 88(%1),%%mm0\n\t" |
|
|
|
"movq %%mm0,(%0)\n\t" |
|
|
|
"addl %2,%0\n\t" |
|
|
|
|
|
|
|
"movq 96(%1),%%mm0\n\t" |
|
|
|
"packuswb 104(%1),%%mm0\n\t" |
|
|
|
"movq %%mm0,(%0)\n\t" |
|
|
|
"addl %2,%0\n\t" |
|
|
|
|
|
|
|
"movq 112(%1),%%mm0\n\t" |
|
|
|
"packuswb 120(%1),%%mm0\n\t" |
|
|
|
"movq %%mm0,(%0)\n\t" |
|
|
|
"emms" |
|
|
|
:"+r" (p_data): "r" (p_block),"r" (i_incr+8)); |
|
|
|
} |
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
/*******************************************************************************
|
|
|
|
* vdec_DecodeMacroblock : decode a macroblock of a picture |
|
|
|
|