diff --git a/modules/video_filter/Modules.am b/modules/video_filter/Modules.am index b477c69d0e..b6c13773de 100644 --- a/modules/video_filter/Modules.am +++ b/modules/video_filter/Modules.am @@ -72,6 +72,7 @@ SOURCES_atmo = atmo/atmo.cpp \ atmo/AtmoMultiConnection.cpp atmo/AtmoMultiConnection.h \ atmo/MoMoConnection.cpp atmo/MoMoConnection.h \ atmo/AtmoPacketQueue.cpp atmo/AtmoPacketQueue.h +SOURCES_gradfun = gradfun.c gradfun.h noinst_HEADERS = filter_picture.h libvlc_LTLIBRARIES += \ @@ -113,4 +114,5 @@ libvlc_LTLIBRARIES += \ libsharpen_plugin.la \ libwall_plugin.la \ libwave_plugin.la \ + libgradfun_plugin.la \ libyuvp_plugin.la diff --git a/modules/video_filter/gradfun.c b/modules/video_filter/gradfun.c new file mode 100644 index 0000000000..7c0f47aa8c --- /dev/null +++ b/modules/video_filter/gradfun.c @@ -0,0 +1,262 @@ +/***************************************************************************** + * gradfun.c: wrapper for the gradfun filter from mplayer + ***************************************************************************** + * Copyright (C) 2010 Laurent Aimar + * $Id$ + * + * Authors: Laurent Aimar + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA. + *****************************************************************************/ + +/***************************************************************************** + * Preamble + *****************************************************************************/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include +#include + +/***************************************************************************** + * Module descriptor + *****************************************************************************/ +static int Open (vlc_object_t *); +static void Close(vlc_object_t *); + +#define CFG_PREFIX "gradfun-" + +#define RADIUS_MIN (4) +#define RADIUS_MAX (32) +#define RADIUS_TEXT N_("Radius") +#define RADIUS_LONGTEXT N_("Radius in pixels") + +#define STRENGTH_MIN (0.51) +#define STRENGTH_MAX (255) +#define STRENGTH_TEXT N_("Strength") +#define STRENGTH_LONGTEXT N_("Strength used to modify the value of a pixel") + +vlc_module_begin() + set_description(N_("Gradfun video filter")) + set_shortname(N_("Gradfun")) + set_help("Debanding algorithm") + set_capability("video filter2", 0) + set_category(CAT_VIDEO) + set_subcategory(SUBCAT_VIDEO_VFILTER) + add_integer_with_range(CFG_PREFIX "radius", 16, RADIUS_MIN, RADIUS_MAX, + NULL, RADIUS_TEXT, RADIUS_LONGTEXT, false) + add_float_with_range(CFG_PREFIX "strength", 1.2, STRENGTH_MIN, STRENGTH_MAX, + NULL, STRENGTH_TEXT, STRENGTH_LONGTEXT, false) + + set_callbacks(Open, Close) +vlc_module_end() + +/***************************************************************************** + * Local prototypes + *****************************************************************************/ +#define FFMAX(a,b) __MAX(a,b) +#ifdef CAN_COMPILE_MMXEXT +# define HAVE_MMX2 1 +#else +# define HAVE_MMX2 0 +#endif +#ifdef CAN_COMPILE_SSE2 +# define HAVE_SSE2 1 +#else +# define HAVE_SSE2 0 +#endif +#ifdef CAN_COMPILE_SSSE3 +# define HAVE_SSSE3 1 +#else +# define HAVE_SSSE3 0 +#endif +// FIXME too restrictive +#ifdef __x86_64__ +# define HAVE_6REGS 1 +#else +# define HAVE_6REGS 0 +#endif +#define av_clip_uint8 clip_uint8_vlc +#include "gradfun.h" + +static picture_t *Filter(filter_t *, picture_t *); +static int Callback(vlc_object_t *, char const *, vlc_value_t, vlc_value_t, void *); + +struct filter_sys_t { + vlc_mutex_t lock; + float strength; + int radius; + int h_shift; + int v_shift; + void *base_buf; + struct vf_priv_s cfg; +}; + +static int Open(vlc_object_t *object) +{ + filter_t *filter = (filter_t *)object; + + int h_shift; + int v_shift; + switch (filter->fmt_in.video.i_chroma) { + case VLC_CODEC_I410: + case VLC_CODEC_YV9: + h_shift = 2; v_shift = 2; + break; + case VLC_CODEC_I411: + h_shift = 2; v_shift = 0; + break; + case VLC_CODEC_I420: + case VLC_CODEC_J420: + case VLC_CODEC_YV12: + h_shift = 1; v_shift = 1; + break; + case VLC_CODEC_I422: + case VLC_CODEC_J422: + h_shift = 1; v_shift = 0; + break; + case VLC_CODEC_I444: + case VLC_CODEC_J444: + case VLC_CODEC_YUVA: + h_shift = 0; v_shift = 0; + break; + case VLC_CODEC_I440: + case VLC_CODEC_J440: + h_shift = 0; v_shift = 1; + break; + default: + return VLC_EGENERIC; + } + + filter_sys_t *sys = malloc(sizeof(*sys)); + if (!sys) + return VLC_ENOMEM; + + vlc_mutex_init(&sys->lock); + sys->h_shift = h_shift; + sys->v_shift = v_shift; + sys->strength = var_CreateGetFloatCommand(filter, CFG_PREFIX "strength"); + sys->radius = var_CreateGetIntegerCommand(filter, CFG_PREFIX "radius"); + var_AddCallback(filter, CFG_PREFIX "strength", Callback, NULL); + var_AddCallback(filter, CFG_PREFIX "radius", Callback, NULL); + sys->base_buf = NULL; + + struct vf_priv_s *cfg = &sys->cfg; + cfg->thresh = 0.0; + cfg->radius = 0; + cfg->buf = NULL; + cfg->filter_line = filter_line_c; + cfg->blur_line = blur_line_c; + +#if HAVE_SSE2 && HAVE_6REGS + if (vlc_CPU() & CPU_CAPABILITY_SSE2) + cfg->blur_line = blur_line_sse2; +#endif +#if HAVE_MMX2 + if (vlc_CPU() & CPU_CAPABILITY_MMXEXT) + cfg->filter_line = filter_line_mmx2; +#endif +#if HAVE_SSSE3 + if (vlc_CPU() & CPU_CAPABILITY_SSSE3) + cfg->filter_line = filter_line_ssse3; +#endif + + filter->p_sys = sys; + filter->pf_video_filter = Filter; + return VLC_SUCCESS; +} + +static void Close(vlc_object_t *object) +{ + filter_t *filter = (filter_t *)object; + filter_sys_t *sys = filter->p_sys; + + free(sys->base_buf); + vlc_mutex_destroy(&sys->lock); + free(sys); +} + +static picture_t *Filter(filter_t *filter, picture_t *src) +{ + filter_sys_t *sys = filter->p_sys; + + picture_t *dst = filter_NewPicture(filter); + if (!dst) { + picture_Release(src); + return NULL; + } + + vlc_mutex_lock(&sys->lock); + float strength = __MIN(__MAX(sys->strength, STRENGTH_MIN), STRENGTH_MAX); + int radius = __MIN(__MAX((sys->radius + 1) & ~1, RADIUS_MIN), RADIUS_MAX); + vlc_mutex_unlock(&sys->lock); + + const video_format_t *fmt = &filter->fmt_in.video; + struct vf_priv_s *cfg = &sys->cfg; + + cfg->thresh = (1 << 15) / strength; + if (cfg->radius != radius) { + cfg->radius = radius; + cfg->buf = vlc_memalign(&sys->base_buf, 16, + (((fmt->i_width + 15) & ~15) * (cfg->radius + 1) / 2 + 32) * sizeof(*cfg->buf)); + } + + for (int i = 0; i < dst->i_planes; i++) { + const plane_t *srcp = &src->p[i]; + plane_t *dstp = &dst->p[i]; + + int w = fmt->i_width; + int h = fmt->i_height; + int r = cfg->radius; + if (i > 0) { + w >>= sys->h_shift; + h >>= sys->v_shift; + r = ((r >> sys->h_shift) + (r >> sys->v_shift)) / 2; + r = __MIN(__MAX((r + 1) & ~1, RADIUS_MIN), RADIUS_MAX); + } + if (__MIN(w, h) > 2 * r && cfg->buf) { + filter_plane(cfg, dstp->p_pixels, srcp->p_pixels, + w, h, dstp->i_pitch, srcp->i_pitch, r); + } else { + plane_CopyPixels(dstp, srcp); + } + } + + picture_CopyProperties(dst, src); + picture_Release(src); + return dst; +} + +static int Callback(vlc_object_t *object, char const *cmd, + vlc_value_t oldval, vlc_value_t newval, void *data) +{ + filter_t *filter = (filter_t *)object; + filter_sys_t *sys = filter->p_sys; + VLC_UNUSED(oldval); VLC_UNUSED(data); + + vlc_mutex_lock(&sys->lock); + if (!strcmp(cmd, CFG_PREFIX "strength")) + sys->strength = newval.f_float; + else + sys->radius = newval.i_int; + vlc_mutex_unlock(&sys->lock); + + return VLC_SUCCESS; +} + diff --git a/modules/video_filter/gradfun.h b/modules/video_filter/gradfun.h new file mode 100644 index 0000000000..4b30748b51 --- /dev/null +++ b/modules/video_filter/gradfun.h @@ -0,0 +1,263 @@ +/* + * Copyright (C) 2009 Loren Merritt + * + * This file is part of MPlayer. + * + * MPlayer is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * MPlayer is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with MPlayer; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +/* + * Debanding algorithm (from gradfun2db by prunedtree): + * Boxblur. + * Foreach pixel, if it's within threshold of the blurred value, make it closer. + * So now we have a smoothed and higher bitdepth version of all the shallow + * gradients, while leaving detailed areas untouched. + * Dither it back to 8bit. + */ + +struct vf_priv_s { + int thresh; + int radius; + uint16_t *buf; + void (*filter_line)(uint8_t *dst, uint8_t *src, uint16_t *dc, + int width, int thresh, const uint16_t *dithers); + void (*blur_line)(uint16_t *dc, uint16_t *buf, uint16_t *buf1, + uint8_t *src, int sstride, int width); +}; + +static const uint16_t __attribute__((aligned(16))) pw_7f[8] = {127,127,127,127,127,127,127,127}; +static const uint16_t __attribute__((aligned(16))) pw_ff[8] = {255,255,255,255,255,255,255,255}; +static const uint16_t __attribute__((aligned(16))) dither[8][8] = { + { 0, 96, 24,120, 6,102, 30,126 }, + { 64, 32, 88, 56, 70, 38, 94, 62 }, + { 16,112, 8,104, 22,118, 14,110 }, + { 80, 48, 72, 40, 86, 54, 78, 46 }, + { 4,100, 28,124, 2, 98, 26,122 }, + { 68, 36, 92, 60, 66, 34, 90, 58 }, + { 20,116, 12,108, 18,114, 10,106 }, + { 84, 52, 76, 44, 82, 50, 74, 42 }, +}; + +static void filter_line_c(uint8_t *dst, uint8_t *src, uint16_t *dc, + int width, int thresh, const uint16_t *dithers) +{ + int x; + for (x=0; x> 16; + m = FFMAX(0, 127-m); + m = m*m*delta >> 14; + pix += m + dithers[x&7]; + dst[x] = av_clip_uint8(pix>>7); + } +} + +static void blur_line_c(uint16_t *dc, uint16_t *buf, uint16_t *buf1, + uint8_t *src, int sstride, int width) +{ + int x, v, old; + for (x=0; x> 16 + "psubw %%mm6, %%mm2 \n" + "pminsw %%mm7, %%mm2 \n" // m = -max(0, 127-m) + "pmullw %%mm2, %%mm2 \n" + "paddw %%mm4, %%mm0 \n" // pix += dither + "pmulhw %%mm2, %%mm1 \n" + "psllw $2, %%mm1 \n" // m = m*m*delta >> 14 + "paddw %%mm1, %%mm0 \n" // pix += m + "psraw $7, %%mm0 \n" + "packuswb %%mm0, %%mm0 \n" + "movd %%mm0, (%1,%0) \n" // dst = clip(pix>>7) + "add $4, %0 \n" + "jl 1b \n" + "emms \n" + :"+r"(x) + :"r"(dst+width), "r"(src+width), "r"(dc+width/2), + "rm"(thresh), "m"(*dithers), "m"(*pw_7f) + :"memory" + ); +} +#endif + +#if HAVE_SSSE3 +static void filter_line_ssse3(uint8_t *dst, uint8_t *src, uint16_t *dc, + int width, int thresh, const uint16_t *dithers) +{ + intptr_t x; + if (width&7) { + // could be 10% faster if I somehow eliminated this + x = width&~7; + filter_line_c(dst+x, src+x, dc+x/2, width-x, thresh, dithers); + width = x; + } + x = -width; + __asm__ volatile( + "movd %4, %%xmm5 \n" + "pxor %%xmm7, %%xmm7 \n" + "pshuflw $0,%%xmm5, %%xmm5 \n" + "movdqa %6, %%xmm6 \n" + "punpcklqdq %%xmm5, %%xmm5 \n" + "movdqa %5, %%xmm4 \n" + "1: \n" + "movq (%2,%0), %%xmm0 \n" + "movq (%3,%0), %%xmm1 \n" + "punpcklbw %%xmm7, %%xmm0 \n" + "punpcklwd %%xmm1, %%xmm1 \n" + "psllw $7, %%xmm0 \n" + "psubw %%xmm0, %%xmm1 \n" // delta = dc - pix + "pabsw %%xmm1, %%xmm2 \n" + "pmulhuw %%xmm5, %%xmm2 \n" // m = abs(delta) * thresh >> 16 + "psubw %%xmm6, %%xmm2 \n" + "pminsw %%xmm7, %%xmm2 \n" // m = -max(0, 127-m) + "pmullw %%xmm2, %%xmm2 \n" + "psllw $1, %%xmm2 \n" + "paddw %%xmm4, %%xmm0 \n" // pix += dither + "pmulhrsw %%xmm2, %%xmm1 \n" // m = m*m*delta >> 14 + "paddw %%xmm1, %%xmm0 \n" // pix += m + "psraw $7, %%xmm0 \n" + "packuswb %%xmm0, %%xmm0 \n" + "movq %%xmm0, (%1,%0) \n" // dst = clip(pix>>7) + "add $8, %0 \n" + "jl 1b \n" + :"+&r"(x) + :"r"(dst+width), "r"(src+width), "r"(dc+width/2), + "rm"(thresh), "m"(*dithers), "m"(*pw_7f) + :"memory" + ); +} +#endif // HAVE_SSSE3 + +#if HAVE_SSE2 && HAVE_6REGS +#define BLURV(load)\ + intptr_t x = -2*width;\ + __asm__ volatile(\ + "movdqa %6, %%xmm7 \n"\ + "1: \n"\ + load" (%4,%0), %%xmm0 \n"\ + load" (%5,%0), %%xmm1 \n"\ + "movdqa %%xmm0, %%xmm2 \n"\ + "movdqa %%xmm1, %%xmm3 \n"\ + "psrlw $8, %%xmm0 \n"\ + "psrlw $8, %%xmm1 \n"\ + "pand %%xmm7, %%xmm2 \n"\ + "pand %%xmm7, %%xmm3 \n"\ + "paddw %%xmm1, %%xmm0 \n"\ + "paddw %%xmm3, %%xmm2 \n"\ + "paddw %%xmm2, %%xmm0 \n"\ + "paddw (%2,%0), %%xmm0 \n"\ + "movdqa (%1,%0), %%xmm1 \n"\ + "movdqa %%xmm0, (%1,%0) \n"\ + "psubw %%xmm1, %%xmm0 \n"\ + "movdqa %%xmm0, (%3,%0) \n"\ + "add $16, %0 \n"\ + "jl 1b \n"\ + :"+&r"(x)\ + :"r"(buf+width),\ + "r"(buf1+width),\ + "r"(dc+width),\ + "r"(src+width*2),\ + "r"(src+width*2+sstride),\ + "m"(*pw_ff)\ + :"memory"\ + ); + +static void blur_line_sse2(uint16_t *dc, uint16_t *buf, uint16_t *buf1, + uint8_t *src, int sstride, int width) +{ + if (((intptr_t)src|sstride)&15) { + BLURV("movdqu"); + } else { + BLURV("movdqa"); + } +} +#endif // HAVE_6REGS && HAVE_SSE2 + +static void filter_plane(struct vf_priv_s *ctx, uint8_t *dst, uint8_t *src, + int width, int height, int dstride, int sstride, int r) +{ + int bstride = ((width+15)&~15)/2; + int y; + uint32_t dc_factor = (1<<21)/(r*r); + uint16_t *dc = ctx->buf+16; + uint16_t *buf = ctx->buf+bstride+32; + int thresh = ctx->thresh; + + memset(dc, 0, (bstride+16)*sizeof(*buf)); + for (y=0; yblur_line(dc, buf+y*bstride, buf+(y-1)*bstride, src+2*y*sstride, sstride, width/2); + for (;;) { + if (y < height-r) { + int mod = ((y+r)/2)%r; + uint16_t *buf0 = buf+mod*bstride; + uint16_t *buf1 = buf+(mod?mod-1:r-1)*bstride; + int x, v; + ctx->blur_line(dc, buf0, buf1, src+(y+r)*sstride, sstride, width/2); + for (x=v=0; x> 16; + } + for (; x<(width+r+1)/2; x++) + dc[x-r] = v * dc_factor >> 16; + for (x=-r/2; x<0; x++) + dc[x] = dc[0]; + } + if (y == r) { + for (y=0; yfilter_line(dst+y*dstride, src+y*sstride, dc-r/2, width, thresh, dither[y&7]); + } + ctx->filter_line(dst+y*dstride, src+y*sstride, dc-r/2, width, thresh, dither[y&7]); + if (++y >= height) break; + ctx->filter_line(dst+y*dstride, src+y*sstride, dc-r/2, width, thresh, dither[y&7]); + if (++y >= height) break; + } +} +