Browse Source
gradfun.h is a stripped copy of mplayer vf_gradfun.c (a function has been renamed).pull/2/head
3 changed files with 527 additions and 0 deletions
@ -0,0 +1,262 @@ |
|||||
|
/*****************************************************************************
|
||||
|
* gradfun.c: wrapper for the gradfun filter from mplayer |
||||
|
***************************************************************************** |
||||
|
* Copyright (C) 2010 Laurent Aimar |
||||
|
* $Id$ |
||||
|
* |
||||
|
* Authors: Laurent Aimar <fenrir _AT_ videolan _DOT_ org> |
||||
|
* |
||||
|
* This program is free software; you can redistribute it and/or modify |
||||
|
* it under the terms of the GNU General Public License as published by |
||||
|
* the Free Software Foundation; either version 2 of the License, or |
||||
|
* (at your option) any later version. |
||||
|
* |
||||
|
* This program is distributed in the hope that it will be useful, |
||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||
|
* GNU General Public License for more details. |
||||
|
* |
||||
|
* You should have received a copy of the GNU General Public License |
||||
|
* along with this program; if not, write to the Free Software |
||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA. |
||||
|
*****************************************************************************/ |
||||
|
|
||||
|
/*****************************************************************************
|
||||
|
* Preamble |
||||
|
*****************************************************************************/ |
||||
|
|
||||
|
#ifdef HAVE_CONFIG_H |
||||
|
# include "config.h" |
||||
|
#endif |
||||
|
|
||||
|
#include <vlc_common.h> |
||||
|
#include <vlc_plugin.h> |
||||
|
#include <vlc_cpu.h> |
||||
|
#include <vlc_filter.h> |
||||
|
|
||||
|
/*****************************************************************************
|
||||
|
* Module descriptor |
||||
|
*****************************************************************************/ |
||||
|
static int Open (vlc_object_t *); |
||||
|
static void Close(vlc_object_t *); |
||||
|
|
||||
|
#define CFG_PREFIX "gradfun-" |
||||
|
|
||||
|
#define RADIUS_MIN (4) |
||||
|
#define RADIUS_MAX (32) |
||||
|
#define RADIUS_TEXT N_("Radius") |
||||
|
#define RADIUS_LONGTEXT N_("Radius in pixels") |
||||
|
|
||||
|
#define STRENGTH_MIN (0.51) |
||||
|
#define STRENGTH_MAX (255) |
||||
|
#define STRENGTH_TEXT N_("Strength") |
||||
|
#define STRENGTH_LONGTEXT N_("Strength used to modify the value of a pixel") |
||||
|
|
||||
|
vlc_module_begin() |
||||
|
set_description(N_("Gradfun video filter")) |
||||
|
set_shortname(N_("Gradfun")) |
||||
|
set_help("Debanding algorithm") |
||||
|
set_capability("video filter2", 0) |
||||
|
set_category(CAT_VIDEO) |
||||
|
set_subcategory(SUBCAT_VIDEO_VFILTER) |
||||
|
add_integer_with_range(CFG_PREFIX "radius", 16, RADIUS_MIN, RADIUS_MAX, |
||||
|
NULL, RADIUS_TEXT, RADIUS_LONGTEXT, false) |
||||
|
add_float_with_range(CFG_PREFIX "strength", 1.2, STRENGTH_MIN, STRENGTH_MAX, |
||||
|
NULL, STRENGTH_TEXT, STRENGTH_LONGTEXT, false) |
||||
|
|
||||
|
set_callbacks(Open, Close) |
||||
|
vlc_module_end() |
||||
|
|
||||
|
/*****************************************************************************
|
||||
|
* Local prototypes |
||||
|
*****************************************************************************/ |
||||
|
#define FFMAX(a,b) __MAX(a,b) |
||||
|
#ifdef CAN_COMPILE_MMXEXT |
||||
|
# define HAVE_MMX2 1 |
||||
|
#else |
||||
|
# define HAVE_MMX2 0 |
||||
|
#endif |
||||
|
#ifdef CAN_COMPILE_SSE2 |
||||
|
# define HAVE_SSE2 1 |
||||
|
#else |
||||
|
# define HAVE_SSE2 0 |
||||
|
#endif |
||||
|
#ifdef CAN_COMPILE_SSSE3 |
||||
|
# define HAVE_SSSE3 1 |
||||
|
#else |
||||
|
# define HAVE_SSSE3 0 |
||||
|
#endif |
||||
|
// FIXME too restrictive
|
||||
|
#ifdef __x86_64__ |
||||
|
# define HAVE_6REGS 1 |
||||
|
#else |
||||
|
# define HAVE_6REGS 0 |
||||
|
#endif |
||||
|
#define av_clip_uint8 clip_uint8_vlc |
||||
|
#include "gradfun.h" |
||||
|
|
||||
|
static picture_t *Filter(filter_t *, picture_t *); |
||||
|
static int Callback(vlc_object_t *, char const *, vlc_value_t, vlc_value_t, void *); |
||||
|
|
||||
|
struct filter_sys_t { |
||||
|
vlc_mutex_t lock; |
||||
|
float strength; |
||||
|
int radius; |
||||
|
int h_shift; |
||||
|
int v_shift; |
||||
|
void *base_buf; |
||||
|
struct vf_priv_s cfg; |
||||
|
}; |
||||
|
|
||||
|
static int Open(vlc_object_t *object) |
||||
|
{ |
||||
|
filter_t *filter = (filter_t *)object; |
||||
|
|
||||
|
int h_shift; |
||||
|
int v_shift; |
||||
|
switch (filter->fmt_in.video.i_chroma) { |
||||
|
case VLC_CODEC_I410: |
||||
|
case VLC_CODEC_YV9: |
||||
|
h_shift = 2; v_shift = 2; |
||||
|
break; |
||||
|
case VLC_CODEC_I411: |
||||
|
h_shift = 2; v_shift = 0; |
||||
|
break; |
||||
|
case VLC_CODEC_I420: |
||||
|
case VLC_CODEC_J420: |
||||
|
case VLC_CODEC_YV12: |
||||
|
h_shift = 1; v_shift = 1; |
||||
|
break; |
||||
|
case VLC_CODEC_I422: |
||||
|
case VLC_CODEC_J422: |
||||
|
h_shift = 1; v_shift = 0; |
||||
|
break; |
||||
|
case VLC_CODEC_I444: |
||||
|
case VLC_CODEC_J444: |
||||
|
case VLC_CODEC_YUVA: |
||||
|
h_shift = 0; v_shift = 0; |
||||
|
break; |
||||
|
case VLC_CODEC_I440: |
||||
|
case VLC_CODEC_J440: |
||||
|
h_shift = 0; v_shift = 1; |
||||
|
break; |
||||
|
default: |
||||
|
return VLC_EGENERIC; |
||||
|
} |
||||
|
|
||||
|
filter_sys_t *sys = malloc(sizeof(*sys)); |
||||
|
if (!sys) |
||||
|
return VLC_ENOMEM; |
||||
|
|
||||
|
vlc_mutex_init(&sys->lock); |
||||
|
sys->h_shift = h_shift; |
||||
|
sys->v_shift = v_shift; |
||||
|
sys->strength = var_CreateGetFloatCommand(filter, CFG_PREFIX "strength"); |
||||
|
sys->radius = var_CreateGetIntegerCommand(filter, CFG_PREFIX "radius"); |
||||
|
var_AddCallback(filter, CFG_PREFIX "strength", Callback, NULL); |
||||
|
var_AddCallback(filter, CFG_PREFIX "radius", Callback, NULL); |
||||
|
sys->base_buf = NULL; |
||||
|
|
||||
|
struct vf_priv_s *cfg = &sys->cfg; |
||||
|
cfg->thresh = 0.0; |
||||
|
cfg->radius = 0; |
||||
|
cfg->buf = NULL; |
||||
|
cfg->filter_line = filter_line_c; |
||||
|
cfg->blur_line = blur_line_c; |
||||
|
|
||||
|
#if HAVE_SSE2 && HAVE_6REGS |
||||
|
if (vlc_CPU() & CPU_CAPABILITY_SSE2) |
||||
|
cfg->blur_line = blur_line_sse2; |
||||
|
#endif |
||||
|
#if HAVE_MMX2 |
||||
|
if (vlc_CPU() & CPU_CAPABILITY_MMXEXT) |
||||
|
cfg->filter_line = filter_line_mmx2; |
||||
|
#endif |
||||
|
#if HAVE_SSSE3 |
||||
|
if (vlc_CPU() & CPU_CAPABILITY_SSSE3) |
||||
|
cfg->filter_line = filter_line_ssse3; |
||||
|
#endif |
||||
|
|
||||
|
filter->p_sys = sys; |
||||
|
filter->pf_video_filter = Filter; |
||||
|
return VLC_SUCCESS; |
||||
|
} |
||||
|
|
||||
|
static void Close(vlc_object_t *object) |
||||
|
{ |
||||
|
filter_t *filter = (filter_t *)object; |
||||
|
filter_sys_t *sys = filter->p_sys; |
||||
|
|
||||
|
free(sys->base_buf); |
||||
|
vlc_mutex_destroy(&sys->lock); |
||||
|
free(sys); |
||||
|
} |
||||
|
|
||||
|
static picture_t *Filter(filter_t *filter, picture_t *src) |
||||
|
{ |
||||
|
filter_sys_t *sys = filter->p_sys; |
||||
|
|
||||
|
picture_t *dst = filter_NewPicture(filter); |
||||
|
if (!dst) { |
||||
|
picture_Release(src); |
||||
|
return NULL; |
||||
|
} |
||||
|
|
||||
|
vlc_mutex_lock(&sys->lock); |
||||
|
float strength = __MIN(__MAX(sys->strength, STRENGTH_MIN), STRENGTH_MAX); |
||||
|
int radius = __MIN(__MAX((sys->radius + 1) & ~1, RADIUS_MIN), RADIUS_MAX); |
||||
|
vlc_mutex_unlock(&sys->lock); |
||||
|
|
||||
|
const video_format_t *fmt = &filter->fmt_in.video; |
||||
|
struct vf_priv_s *cfg = &sys->cfg; |
||||
|
|
||||
|
cfg->thresh = (1 << 15) / strength; |
||||
|
if (cfg->radius != radius) { |
||||
|
cfg->radius = radius; |
||||
|
cfg->buf = vlc_memalign(&sys->base_buf, 16, |
||||
|
(((fmt->i_width + 15) & ~15) * (cfg->radius + 1) / 2 + 32) * sizeof(*cfg->buf)); |
||||
|
} |
||||
|
|
||||
|
for (int i = 0; i < dst->i_planes; i++) { |
||||
|
const plane_t *srcp = &src->p[i]; |
||||
|
plane_t *dstp = &dst->p[i]; |
||||
|
|
||||
|
int w = fmt->i_width; |
||||
|
int h = fmt->i_height; |
||||
|
int r = cfg->radius; |
||||
|
if (i > 0) { |
||||
|
w >>= sys->h_shift; |
||||
|
h >>= sys->v_shift; |
||||
|
r = ((r >> sys->h_shift) + (r >> sys->v_shift)) / 2; |
||||
|
r = __MIN(__MAX((r + 1) & ~1, RADIUS_MIN), RADIUS_MAX); |
||||
|
} |
||||
|
if (__MIN(w, h) > 2 * r && cfg->buf) { |
||||
|
filter_plane(cfg, dstp->p_pixels, srcp->p_pixels, |
||||
|
w, h, dstp->i_pitch, srcp->i_pitch, r); |
||||
|
} else { |
||||
|
plane_CopyPixels(dstp, srcp); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
picture_CopyProperties(dst, src); |
||||
|
picture_Release(src); |
||||
|
return dst; |
||||
|
} |
||||
|
|
||||
|
static int Callback(vlc_object_t *object, char const *cmd, |
||||
|
vlc_value_t oldval, vlc_value_t newval, void *data) |
||||
|
{ |
||||
|
filter_t *filter = (filter_t *)object; |
||||
|
filter_sys_t *sys = filter->p_sys; |
||||
|
VLC_UNUSED(oldval); VLC_UNUSED(data); |
||||
|
|
||||
|
vlc_mutex_lock(&sys->lock); |
||||
|
if (!strcmp(cmd, CFG_PREFIX "strength")) |
||||
|
sys->strength = newval.f_float; |
||||
|
else |
||||
|
sys->radius = newval.i_int; |
||||
|
vlc_mutex_unlock(&sys->lock); |
||||
|
|
||||
|
return VLC_SUCCESS; |
||||
|
} |
||||
|
|
||||
@ -0,0 +1,263 @@ |
|||||
|
/*
|
||||
|
* Copyright (C) 2009 Loren Merritt <lorenm@u.washignton.edu> |
||||
|
* |
||||
|
* This file is part of MPlayer. |
||||
|
* |
||||
|
* MPlayer is free software; you can redistribute it and/or modify |
||||
|
* it under the terms of the GNU General Public License as published by |
||||
|
* the Free Software Foundation; either version 2 of the License, or |
||||
|
* (at your option) any later version. |
||||
|
* |
||||
|
* MPlayer is distributed in the hope that it will be useful, |
||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||
|
* GNU General Public License for more details. |
||||
|
* |
||||
|
* You should have received a copy of the GNU General Public License along |
||||
|
* with MPlayer; if not, write to the Free Software Foundation, Inc., |
||||
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
||||
|
*/ |
||||
|
|
||||
|
/*
|
||||
|
* Debanding algorithm (from gradfun2db by prunedtree): |
||||
|
* Boxblur. |
||||
|
* Foreach pixel, if it's within threshold of the blurred value, make it closer. |
||||
|
* So now we have a smoothed and higher bitdepth version of all the shallow |
||||
|
* gradients, while leaving detailed areas untouched. |
||||
|
* Dither it back to 8bit. |
||||
|
*/ |
||||
|
|
||||
|
struct vf_priv_s { |
||||
|
int thresh; |
||||
|
int radius; |
||||
|
uint16_t *buf; |
||||
|
void (*filter_line)(uint8_t *dst, uint8_t *src, uint16_t *dc, |
||||
|
int width, int thresh, const uint16_t *dithers); |
||||
|
void (*blur_line)(uint16_t *dc, uint16_t *buf, uint16_t *buf1, |
||||
|
uint8_t *src, int sstride, int width); |
||||
|
}; |
||||
|
|
||||
|
static const uint16_t __attribute__((aligned(16))) pw_7f[8] = {127,127,127,127,127,127,127,127}; |
||||
|
static const uint16_t __attribute__((aligned(16))) pw_ff[8] = {255,255,255,255,255,255,255,255}; |
||||
|
static const uint16_t __attribute__((aligned(16))) dither[8][8] = { |
||||
|
{ 0, 96, 24,120, 6,102, 30,126 }, |
||||
|
{ 64, 32, 88, 56, 70, 38, 94, 62 }, |
||||
|
{ 16,112, 8,104, 22,118, 14,110 }, |
||||
|
{ 80, 48, 72, 40, 86, 54, 78, 46 }, |
||||
|
{ 4,100, 28,124, 2, 98, 26,122 }, |
||||
|
{ 68, 36, 92, 60, 66, 34, 90, 58 }, |
||||
|
{ 20,116, 12,108, 18,114, 10,106 }, |
||||
|
{ 84, 52, 76, 44, 82, 50, 74, 42 }, |
||||
|
}; |
||||
|
|
||||
|
static void filter_line_c(uint8_t *dst, uint8_t *src, uint16_t *dc, |
||||
|
int width, int thresh, const uint16_t *dithers) |
||||
|
{ |
||||
|
int x; |
||||
|
for (x=0; x<width; x++, dc+=x&1) { |
||||
|
int pix = src[x]<<7; |
||||
|
int delta = dc[0] - pix; |
||||
|
int m = abs(delta) * thresh >> 16; |
||||
|
m = FFMAX(0, 127-m); |
||||
|
m = m*m*delta >> 14; |
||||
|
pix += m + dithers[x&7]; |
||||
|
dst[x] = av_clip_uint8(pix>>7); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
static void blur_line_c(uint16_t *dc, uint16_t *buf, uint16_t *buf1, |
||||
|
uint8_t *src, int sstride, int width) |
||||
|
{ |
||||
|
int x, v, old; |
||||
|
for (x=0; x<width; x++) { |
||||
|
v = buf1[x] + src[2*x] + src[2*x+1] + src[2*x+sstride] + src[2*x+1+sstride]; |
||||
|
old = buf[x]; |
||||
|
buf[x] = v; |
||||
|
dc[x] = v - old; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
#if HAVE_MMX2 |
||||
|
static void filter_line_mmx2(uint8_t *dst, uint8_t *src, uint16_t *dc, |
||||
|
int width, int thresh, const uint16_t *dithers) |
||||
|
{ |
||||
|
intptr_t x; |
||||
|
if (width&3) { |
||||
|
x = width&~3; |
||||
|
filter_line_c(dst+x, src+x, dc+x/2, width-x, thresh, dithers); |
||||
|
width = x; |
||||
|
} |
||||
|
x = -width; |
||||
|
__asm__ volatile( |
||||
|
"movd %4, %%mm5 \n" |
||||
|
"pxor %%mm7, %%mm7 \n" |
||||
|
"pshufw $0, %%mm5, %%mm5 \n" |
||||
|
"movq %6, %%mm6 \n" |
||||
|
"movq %5, %%mm4 \n" |
||||
|
"1: \n" |
||||
|
"movd (%2,%0), %%mm0 \n" |
||||
|
"movd (%3,%0), %%mm1 \n" |
||||
|
"punpcklbw %%mm7, %%mm0 \n" |
||||
|
"punpcklwd %%mm1, %%mm1 \n" |
||||
|
"psllw $7, %%mm0 \n" |
||||
|
"pxor %%mm2, %%mm2 \n" |
||||
|
"psubw %%mm0, %%mm1 \n" // delta = dc - pix
|
||||
|
"psubw %%mm1, %%mm2 \n" |
||||
|
"pmaxsw %%mm1, %%mm2 \n" |
||||
|
"pmulhuw %%mm5, %%mm2 \n" // m = abs(delta) * thresh >> 16
|
||||
|
"psubw %%mm6, %%mm2 \n" |
||||
|
"pminsw %%mm7, %%mm2 \n" // m = -max(0, 127-m)
|
||||
|
"pmullw %%mm2, %%mm2 \n" |
||||
|
"paddw %%mm4, %%mm0 \n" // pix += dither
|
||||
|
"pmulhw %%mm2, %%mm1 \n" |
||||
|
"psllw $2, %%mm1 \n" // m = m*m*delta >> 14
|
||||
|
"paddw %%mm1, %%mm0 \n" // pix += m
|
||||
|
"psraw $7, %%mm0 \n" |
||||
|
"packuswb %%mm0, %%mm0 \n" |
||||
|
"movd %%mm0, (%1,%0) \n" // dst = clip(pix>>7)
|
||||
|
"add $4, %0 \n" |
||||
|
"jl 1b \n" |
||||
|
"emms \n" |
||||
|
:"+r"(x) |
||||
|
:"r"(dst+width), "r"(src+width), "r"(dc+width/2), |
||||
|
"rm"(thresh), "m"(*dithers), "m"(*pw_7f) |
||||
|
:"memory" |
||||
|
); |
||||
|
} |
||||
|
#endif |
||||
|
|
||||
|
#if HAVE_SSSE3 |
||||
|
static void filter_line_ssse3(uint8_t *dst, uint8_t *src, uint16_t *dc, |
||||
|
int width, int thresh, const uint16_t *dithers) |
||||
|
{ |
||||
|
intptr_t x; |
||||
|
if (width&7) { |
||||
|
// could be 10% faster if I somehow eliminated this
|
||||
|
x = width&~7; |
||||
|
filter_line_c(dst+x, src+x, dc+x/2, width-x, thresh, dithers); |
||||
|
width = x; |
||||
|
} |
||||
|
x = -width; |
||||
|
__asm__ volatile( |
||||
|
"movd %4, %%xmm5 \n" |
||||
|
"pxor %%xmm7, %%xmm7 \n" |
||||
|
"pshuflw $0,%%xmm5, %%xmm5 \n" |
||||
|
"movdqa %6, %%xmm6 \n" |
||||
|
"punpcklqdq %%xmm5, %%xmm5 \n" |
||||
|
"movdqa %5, %%xmm4 \n" |
||||
|
"1: \n" |
||||
|
"movq (%2,%0), %%xmm0 \n" |
||||
|
"movq (%3,%0), %%xmm1 \n" |
||||
|
"punpcklbw %%xmm7, %%xmm0 \n" |
||||
|
"punpcklwd %%xmm1, %%xmm1 \n" |
||||
|
"psllw $7, %%xmm0 \n" |
||||
|
"psubw %%xmm0, %%xmm1 \n" // delta = dc - pix
|
||||
|
"pabsw %%xmm1, %%xmm2 \n" |
||||
|
"pmulhuw %%xmm5, %%xmm2 \n" // m = abs(delta) * thresh >> 16
|
||||
|
"psubw %%xmm6, %%xmm2 \n" |
||||
|
"pminsw %%xmm7, %%xmm2 \n" // m = -max(0, 127-m)
|
||||
|
"pmullw %%xmm2, %%xmm2 \n" |
||||
|
"psllw $1, %%xmm2 \n" |
||||
|
"paddw %%xmm4, %%xmm0 \n" // pix += dither
|
||||
|
"pmulhrsw %%xmm2, %%xmm1 \n" // m = m*m*delta >> 14
|
||||
|
"paddw %%xmm1, %%xmm0 \n" // pix += m
|
||||
|
"psraw $7, %%xmm0 \n" |
||||
|
"packuswb %%xmm0, %%xmm0 \n" |
||||
|
"movq %%xmm0, (%1,%0) \n" // dst = clip(pix>>7)
|
||||
|
"add $8, %0 \n" |
||||
|
"jl 1b \n" |
||||
|
:"+&r"(x) |
||||
|
:"r"(dst+width), "r"(src+width), "r"(dc+width/2), |
||||
|
"rm"(thresh), "m"(*dithers), "m"(*pw_7f) |
||||
|
:"memory" |
||||
|
); |
||||
|
} |
||||
|
#endif // HAVE_SSSE3
|
||||
|
|
||||
|
#if HAVE_SSE2 && HAVE_6REGS |
||||
|
#define BLURV(load)\ |
||||
|
intptr_t x = -2*width;\ |
||||
|
__asm__ volatile(\ |
||||
|
"movdqa %6, %%xmm7 \n"\ |
||||
|
"1: \n"\ |
||||
|
load" (%4,%0), %%xmm0 \n"\ |
||||
|
load" (%5,%0), %%xmm1 \n"\ |
||||
|
"movdqa %%xmm0, %%xmm2 \n"\ |
||||
|
"movdqa %%xmm1, %%xmm3 \n"\ |
||||
|
"psrlw $8, %%xmm0 \n"\ |
||||
|
"psrlw $8, %%xmm1 \n"\ |
||||
|
"pand %%xmm7, %%xmm2 \n"\ |
||||
|
"pand %%xmm7, %%xmm3 \n"\ |
||||
|
"paddw %%xmm1, %%xmm0 \n"\ |
||||
|
"paddw %%xmm3, %%xmm2 \n"\ |
||||
|
"paddw %%xmm2, %%xmm0 \n"\ |
||||
|
"paddw (%2,%0), %%xmm0 \n"\ |
||||
|
"movdqa (%1,%0), %%xmm1 \n"\ |
||||
|
"movdqa %%xmm0, (%1,%0) \n"\ |
||||
|
"psubw %%xmm1, %%xmm0 \n"\ |
||||
|
"movdqa %%xmm0, (%3,%0) \n"\ |
||||
|
"add $16, %0 \n"\ |
||||
|
"jl 1b \n"\ |
||||
|
:"+&r"(x)\ |
||||
|
:"r"(buf+width),\ |
||||
|
"r"(buf1+width),\ |
||||
|
"r"(dc+width),\ |
||||
|
"r"(src+width*2),\ |
||||
|
"r"(src+width*2+sstride),\ |
||||
|
"m"(*pw_ff)\ |
||||
|
:"memory"\ |
||||
|
); |
||||
|
|
||||
|
static void blur_line_sse2(uint16_t *dc, uint16_t *buf, uint16_t *buf1, |
||||
|
uint8_t *src, int sstride, int width) |
||||
|
{ |
||||
|
if (((intptr_t)src|sstride)&15) { |
||||
|
BLURV("movdqu"); |
||||
|
} else { |
||||
|
BLURV("movdqa"); |
||||
|
} |
||||
|
} |
||||
|
#endif // HAVE_6REGS && HAVE_SSE2
|
||||
|
|
||||
|
static void filter_plane(struct vf_priv_s *ctx, uint8_t *dst, uint8_t *src, |
||||
|
int width, int height, int dstride, int sstride, int r) |
||||
|
{ |
||||
|
int bstride = ((width+15)&~15)/2; |
||||
|
int y; |
||||
|
uint32_t dc_factor = (1<<21)/(r*r); |
||||
|
uint16_t *dc = ctx->buf+16; |
||||
|
uint16_t *buf = ctx->buf+bstride+32; |
||||
|
int thresh = ctx->thresh; |
||||
|
|
||||
|
memset(dc, 0, (bstride+16)*sizeof(*buf)); |
||||
|
for (y=0; y<r; y++) |
||||
|
ctx->blur_line(dc, buf+y*bstride, buf+(y-1)*bstride, src+2*y*sstride, sstride, width/2); |
||||
|
for (;;) { |
||||
|
if (y < height-r) { |
||||
|
int mod = ((y+r)/2)%r; |
||||
|
uint16_t *buf0 = buf+mod*bstride; |
||||
|
uint16_t *buf1 = buf+(mod?mod-1:r-1)*bstride; |
||||
|
int x, v; |
||||
|
ctx->blur_line(dc, buf0, buf1, src+(y+r)*sstride, sstride, width/2); |
||||
|
for (x=v=0; x<r; x++) |
||||
|
v += dc[x]; |
||||
|
for (; x<width/2; x++) { |
||||
|
v += dc[x] - dc[x-r]; |
||||
|
dc[x-r] = v * dc_factor >> 16; |
||||
|
} |
||||
|
for (; x<(width+r+1)/2; x++) |
||||
|
dc[x-r] = v * dc_factor >> 16; |
||||
|
for (x=-r/2; x<0; x++) |
||||
|
dc[x] = dc[0]; |
||||
|
} |
||||
|
if (y == r) { |
||||
|
for (y=0; y<r; y++) |
||||
|
ctx->filter_line(dst+y*dstride, src+y*sstride, dc-r/2, width, thresh, dither[y&7]); |
||||
|
} |
||||
|
ctx->filter_line(dst+y*dstride, src+y*sstride, dc-r/2, width, thresh, dither[y&7]); |
||||
|
if (++y >= height) break; |
||||
|
ctx->filter_line(dst+y*dstride, src+y*sstride, dc-r/2, width, thresh, dither[y&7]); |
||||
|
if (++y >= height) break; |
||||
|
} |
||||
|
} |
||||
|
|
||||
Loading…
Reference in new issue