You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
431 lines
14 KiB
431 lines
14 KiB
/*****************************************************************************
|
|
* substx3gsub.c : MP4 tx3g subtitles decoder
|
|
*****************************************************************************
|
|
* Copyright (C) 2014 VLC authors and VideoLAN
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify it
|
|
* under the terms of the GNU Lesser General Public License as published by
|
|
* the Free Software Foundation; either version 2.1 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public License
|
|
* along with this program; if not, write to the Free Software Foundation, Inc.,
|
|
* Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
|
|
*****************************************************************************/
|
|
|
|
#ifdef HAVE_CONFIG_H
|
|
# include "config.h"
|
|
#endif
|
|
|
|
#include <vlc_common.h>
|
|
#include <vlc_plugin.h>
|
|
#include <vlc_codec.h>
|
|
#include <vlc_sout.h>
|
|
#include <vlc_charset.h>
|
|
|
|
#include "substext.h"
|
|
|
|
/*****************************************************************************
|
|
* Module descriptor.
|
|
*****************************************************************************/
|
|
static int Open ( vlc_object_t * );
|
|
static subpicture_t *Decode( decoder_t *, block_t ** );
|
|
|
|
vlc_module_begin ()
|
|
set_description( N_("tx3g subtitles decoder") )
|
|
set_shortname( N_("tx3g subtitles") )
|
|
set_capability( "decoder", 100 )
|
|
set_category( CAT_INPUT )
|
|
set_subcategory( SUBCAT_INPUT_SCODEC )
|
|
set_callbacks( Open, NULL )
|
|
vlc_module_end ()
|
|
|
|
/****************************************************************************
|
|
* Local structs
|
|
****************************************************************************/
|
|
|
|
/*****************************************************************************
|
|
* Open: probe the decoder and return score
|
|
*****************************************************************************/
|
|
static int Open( vlc_object_t *p_this )
|
|
{
|
|
decoder_t *p_dec = (decoder_t *) p_this;
|
|
|
|
if( p_dec->fmt_in.i_codec != VLC_CODEC_TX3G )
|
|
return VLC_EGENERIC;
|
|
|
|
p_dec->pf_decode_sub = Decode;
|
|
|
|
p_dec->fmt_out.i_cat = SPU_ES;
|
|
p_dec->fmt_out.i_codec = 0;
|
|
|
|
return VLC_SUCCESS;
|
|
}
|
|
|
|
/*****************************************************************************
|
|
* Local:
|
|
*****************************************************************************/
|
|
|
|
#define FONT_FACE_BOLD 0x1
|
|
#define FONT_FACE_ITALIC 0x2
|
|
#define FONT_FACE_UNDERLINE 0x4
|
|
|
|
static int ConvertFlags( int i_atomflags )
|
|
{
|
|
int i_vlcstyles_flags = 0;
|
|
if ( i_atomflags & FONT_FACE_BOLD )
|
|
i_vlcstyles_flags |= STYLE_BOLD;
|
|
if ( i_atomflags & FONT_FACE_ITALIC )
|
|
i_vlcstyles_flags |= STYLE_ITALIC;
|
|
if ( i_atomflags & FONT_FACE_UNDERLINE )
|
|
i_vlcstyles_flags |= STYLE_UNDERLINE;
|
|
return i_vlcstyles_flags;
|
|
}
|
|
|
|
static size_t str8len( const char *psz_string )
|
|
{
|
|
const char *psz_tmp = psz_string;
|
|
size_t i=0;
|
|
while ( *psz_tmp )
|
|
{
|
|
if ( (*psz_tmp & 0xC0) != 0x80 ) i++;
|
|
psz_tmp++;
|
|
}
|
|
return i;
|
|
}
|
|
|
|
static char * str8indup( const char *psz_string, size_t i_skip, size_t n )
|
|
{
|
|
while( i_skip && *psz_string )
|
|
{
|
|
if ( (*psz_string & 0xC0) != 0x80 ) i_skip--;
|
|
psz_string++;
|
|
}
|
|
if ( ! *psz_string || i_skip ) return NULL;
|
|
|
|
const char *psz_tmp = psz_string;
|
|
while( n && *psz_tmp )
|
|
{
|
|
if ( (*psz_tmp & 0xC0) != 0x80 ) n--;
|
|
psz_tmp++;
|
|
}
|
|
return strndup( psz_string, psz_tmp - psz_string );
|
|
}
|
|
|
|
typedef struct tx3g_segment_t tx3g_segment_t;
|
|
|
|
struct tx3g_segment_t
|
|
{
|
|
text_segment_t *s;
|
|
size_t i_size;
|
|
tx3g_segment_t *p_next3g;
|
|
};
|
|
|
|
static tx3g_segment_t * tx3g_segment_New( const char *psz_string )
|
|
{
|
|
tx3g_segment_t *p_seg = malloc( sizeof(tx3g_segment_t) );
|
|
if( p_seg )
|
|
{
|
|
p_seg->i_size = 0;
|
|
p_seg->p_next3g = NULL;
|
|
p_seg->s = text_segment_New( psz_string );
|
|
if( !p_seg->s )
|
|
{
|
|
free( p_seg );
|
|
p_seg = NULL;
|
|
}
|
|
}
|
|
return p_seg;
|
|
}
|
|
|
|
static void SegmentDoSplit( tx3g_segment_t *p_segment, uint16_t i_start, uint16_t i_end,
|
|
tx3g_segment_t **pp_segment_left,
|
|
tx3g_segment_t **pp_segment_middle,
|
|
tx3g_segment_t **pp_segment_right )
|
|
{
|
|
tx3g_segment_t *p_segment_left = *pp_segment_left;
|
|
tx3g_segment_t *p_segment_right = *pp_segment_right;
|
|
tx3g_segment_t *p_segment_middle = *pp_segment_middle;
|
|
p_segment_left = p_segment_middle = p_segment_right = NULL;
|
|
|
|
if ( (p_segment->i_size - i_start < 1) || (p_segment->i_size - i_end < 1) )
|
|
return;
|
|
|
|
if ( i_start > 0 )
|
|
{
|
|
char* psz_text = str8indup( p_segment->s->psz_text, 0, i_start );
|
|
p_segment_left = tx3g_segment_New( psz_text );
|
|
free( psz_text );
|
|
if ( !p_segment_left ) goto error;
|
|
p_segment_left->s->style = text_style_Duplicate( p_segment->s->style );
|
|
p_segment_left->i_size = str8len( p_segment_left->s->psz_text );
|
|
}
|
|
|
|
char* psz_text = str8indup( p_segment->s->psz_text, i_start, i_end - i_start + 1 );
|
|
p_segment_middle = tx3g_segment_New( psz_text );
|
|
free( psz_text );
|
|
if ( !p_segment_middle ) goto error;
|
|
p_segment_middle->s->style = text_style_Duplicate( p_segment->s->style );
|
|
p_segment_middle->i_size = str8len( p_segment_middle->s->psz_text );
|
|
|
|
if ( i_end < (p_segment->i_size - 1) )
|
|
{
|
|
char* psz_text = str8indup( p_segment->s->psz_text, i_end + 1, p_segment->i_size - i_end - 1 );
|
|
p_segment_right = tx3g_segment_New( psz_text );
|
|
free( psz_text );
|
|
if ( !p_segment_right ) goto error;
|
|
p_segment_right->s->style = text_style_Duplicate( p_segment->s->style );
|
|
p_segment_right->i_size = str8len( p_segment_right->s->psz_text );
|
|
}
|
|
|
|
if ( p_segment_left ) p_segment_left->p_next3g = p_segment_middle;
|
|
if ( p_segment_right ) p_segment_middle->p_next3g = p_segment_right;
|
|
|
|
*pp_segment_left = p_segment_left;
|
|
*pp_segment_middle = p_segment_middle;
|
|
*pp_segment_right = p_segment_right;
|
|
|
|
return;
|
|
|
|
error:
|
|
text_segment_Delete( p_segment_left->s );
|
|
free( p_segment_left );
|
|
text_segment_Delete( p_segment_middle->s );
|
|
free( p_segment_middle );
|
|
text_segment_Delete( p_segment_right->s );
|
|
free( p_segment_right );
|
|
}
|
|
|
|
static bool SegmentSplit( tx3g_segment_t *p_prev, tx3g_segment_t **pp_segment,
|
|
const uint16_t i_start, const uint16_t i_end,
|
|
const text_style_t *p_styles )
|
|
{
|
|
tx3g_segment_t *p_segment_left = NULL, *p_segment_middle = NULL, *p_segment_right = NULL;
|
|
|
|
if ( (*pp_segment)->i_size == 0 ) return false;
|
|
if ( i_start > i_end ) return false;
|
|
if ( (size_t)(i_end - i_start) > (*pp_segment)->i_size - 1 ) return false;
|
|
if ( i_end > (*pp_segment)->i_size - 1 ) return false;
|
|
|
|
SegmentDoSplit( *pp_segment, i_start, i_end, &p_segment_left, &p_segment_middle, &p_segment_right );
|
|
if ( !p_segment_middle )
|
|
{
|
|
/* Failed */
|
|
text_segment_Delete( p_segment_left->s );
|
|
free( p_segment_left );
|
|
text_segment_Delete( p_segment_right->s );
|
|
free( p_segment_right );
|
|
return false;
|
|
}
|
|
|
|
tx3g_segment_t *p_next3g = (*pp_segment)->p_next3g;
|
|
text_segment_Delete( (*pp_segment)->s );
|
|
free( *pp_segment );
|
|
*pp_segment = ( p_segment_left ) ? p_segment_left : p_segment_middle ;
|
|
if ( p_prev ) p_prev->p_next3g = *pp_segment;
|
|
|
|
if ( p_segment_right )
|
|
p_segment_right->p_next3g = p_next3g;
|
|
else
|
|
p_segment_middle->p_next3g = p_next3g;
|
|
|
|
text_style_Delete( p_segment_middle->s->style );
|
|
p_segment_middle->s->style = text_style_Duplicate( p_styles );
|
|
|
|
return true;
|
|
}
|
|
|
|
/* Creates a new segment using the given style and split existing ones according
|
|
to the start & end offsets */
|
|
static void ApplySegmentStyle( tx3g_segment_t **pp_segment, const uint16_t i_absstart,
|
|
const uint16_t i_absend, const text_style_t *p_styles )
|
|
{
|
|
/* find the matching segment */
|
|
uint16_t i_curstart = 0;
|
|
tx3g_segment_t *p_prev = NULL;
|
|
tx3g_segment_t *p_cur = *pp_segment;
|
|
while ( p_cur )
|
|
{
|
|
uint16_t i_curend = i_curstart + p_cur->i_size - 1;
|
|
if ( (i_absstart >= i_curstart) && (i_absend <= i_curend) )
|
|
{
|
|
/* segment found */
|
|
if ( !SegmentSplit( p_prev, &p_cur, i_absstart - i_curstart,
|
|
i_absend - i_curstart, p_styles ) )
|
|
return;
|
|
if ( !p_prev ) *pp_segment = p_cur;
|
|
break;
|
|
}
|
|
else
|
|
{
|
|
i_curstart += p_cur->i_size;
|
|
p_prev = p_cur;
|
|
p_cur = p_cur->p_next3g;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*****************************************************************************
|
|
* Decode:
|
|
*****************************************************************************/
|
|
static subpicture_t *Decode( decoder_t *p_dec, block_t **pp_block )
|
|
{
|
|
block_t *p_block;
|
|
subpicture_t *p_spu = NULL;
|
|
|
|
if( ( pp_block == NULL ) || ( *pp_block == NULL ) ) return NULL;
|
|
p_block = *pp_block;
|
|
*pp_block = NULL;
|
|
|
|
if( ( p_block->i_flags & (BLOCK_FLAG_DISCONTINUITY|BLOCK_FLAG_CORRUPTED) ) ||
|
|
p_block->i_buffer < sizeof(uint16_t) )
|
|
{
|
|
block_Release( p_block );
|
|
return NULL;
|
|
}
|
|
|
|
uint8_t *p_buf = p_block->p_buffer;
|
|
|
|
/* Read our raw string and create the styled segment for HTML */
|
|
uint16_t i_psz_bytelength = GetWBE( p_buf );
|
|
const uint8_t *p_pszstart = p_block->p_buffer + sizeof(uint16_t);
|
|
char *psz_subtitle;
|
|
if ( i_psz_bytelength > 2 &&
|
|
( !memcmp( p_pszstart, "\xFE\xFF", 2 ) || !memcmp( p_pszstart, "\xFF\xFE", 2 ) )
|
|
)
|
|
{
|
|
psz_subtitle = FromCharset( "UTF-16", p_pszstart, i_psz_bytelength );
|
|
if ( !psz_subtitle ) return NULL;
|
|
}
|
|
else
|
|
{
|
|
psz_subtitle = malloc( i_psz_bytelength + 1 );
|
|
if ( !psz_subtitle ) return NULL;
|
|
memcpy( psz_subtitle, p_pszstart, i_psz_bytelength );
|
|
psz_subtitle[ i_psz_bytelength ] = '\0';
|
|
}
|
|
p_buf += i_psz_bytelength + sizeof(uint16_t);
|
|
|
|
for( uint16_t i=0; i < i_psz_bytelength; i++ )
|
|
if ( psz_subtitle[i] == '\r' ) psz_subtitle[i] = '\n';
|
|
|
|
tx3g_segment_t *p_segment3g = tx3g_segment_New( psz_subtitle );
|
|
p_segment3g->i_size = str8len( psz_subtitle );
|
|
if ( p_dec->fmt_in.subs.p_style )
|
|
p_segment3g->s->style = text_style_Duplicate( p_dec->fmt_in.subs.p_style );
|
|
|
|
if ( !p_segment3g->s->psz_text )
|
|
{
|
|
text_segment_Delete( p_segment3g->s );
|
|
free( p_segment3g );
|
|
free( psz_subtitle );
|
|
return NULL;
|
|
}
|
|
|
|
/* Create the subpicture unit */
|
|
p_spu = decoder_NewSubpictureText( p_dec );
|
|
if( !p_spu )
|
|
{
|
|
free( psz_subtitle );
|
|
text_segment_Delete( p_segment3g->s );
|
|
free( p_segment3g );
|
|
return NULL;
|
|
}
|
|
subpicture_updater_sys_t *p_spu_sys = p_spu->updater.p_sys;
|
|
|
|
/* Parse our styles */
|
|
while( (size_t)(p_buf - p_block->p_buffer) + 8 < p_block->i_buffer )
|
|
{
|
|
uint32_t i_atomsize = GetDWBE( p_buf );
|
|
vlc_fourcc_t i_atomtype = VLC_FOURCC(p_buf[4],p_buf[5],p_buf[6],p_buf[7]);
|
|
p_buf += 8;
|
|
switch( i_atomtype )
|
|
{
|
|
|
|
case VLC_FOURCC('s','t','y','l'):
|
|
{
|
|
if ( (size_t)(p_buf - p_block->p_buffer) < 14 ) break;
|
|
uint16_t i_nbrecords = GetWBE(p_buf);
|
|
uint16_t i_cur_record = 0;
|
|
p_buf += 2;
|
|
while( i_cur_record++ < i_nbrecords )
|
|
{
|
|
if ( (size_t)(p_buf - p_block->p_buffer) < 12 ) break;
|
|
uint16_t i_start = __MIN( GetWBE(p_buf), i_psz_bytelength - 1 );
|
|
uint16_t i_end = __MIN( GetWBE(p_buf + 2), i_psz_bytelength - 1 );
|
|
|
|
text_style_t style;
|
|
memset( &style, 0, sizeof(text_style_t) );
|
|
style.i_style_flags = ConvertFlags( p_buf[6] );
|
|
style.f_font_relsize = p_buf[7] * 5 / 100; /* in % units of 0.05 height */
|
|
style.i_font_color = GetDWBE(p_buf+8) >> 8;// RGBA -> RGB
|
|
style.i_font_alpha = GetDWBE(p_buf+8) & 0xFF;
|
|
style.i_features = STYLE_HAS_FONT_COLOR | STYLE_HAS_FONT_ALPHA;
|
|
ApplySegmentStyle( &p_segment3g, i_start, i_end, &style );
|
|
|
|
if ( i_nbrecords == 1 )
|
|
{
|
|
if ( p_buf[6] )
|
|
{
|
|
if( (p_spu_sys->p_default_style->i_style_flags = ConvertFlags( p_buf[6] )) )
|
|
p_spu_sys->p_default_style->i_features |= STYLE_HAS_FLAGS;
|
|
}
|
|
p_spu_sys->p_default_style->f_font_relsize = p_buf[7] * 5 / 100;
|
|
p_spu_sys->p_default_style->i_font_color = GetDWBE(p_buf+8) >> 8;// RGBA -> ARGB
|
|
p_spu_sys->p_default_style->i_font_alpha = (GetDWBE(p_buf+8) & 0xFF) << 24;
|
|
p_spu_sys->p_default_style->i_features |= (STYLE_HAS_FONT_COLOR | STYLE_HAS_FONT_ALPHA);
|
|
}
|
|
|
|
p_buf += 12;
|
|
}
|
|
} break;
|
|
|
|
case VLC_FOURCC('d','r','p','o'):
|
|
if ( (size_t)(p_buf - p_block->p_buffer) < 4 ) break;
|
|
p_spu_sys->p_default_style->i_shadow_width = __MAX( GetWBE(p_buf), GetWBE(p_buf+2) );
|
|
break;
|
|
|
|
case VLC_FOURCC('d','r','p','t'):
|
|
if ( (size_t)(p_buf - p_block->p_buffer) < 2 ) break;
|
|
p_spu_sys->p_default_style->i_shadow_alpha = GetWBE(p_buf);
|
|
p_spu_sys->p_default_style->i_features |= STYLE_HAS_SHADOW_ALPHA;
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
|
|
}
|
|
p_buf += i_atomsize;
|
|
}
|
|
|
|
p_spu->i_start = p_block->i_pts;
|
|
p_spu->i_stop = p_block->i_pts + p_block->i_length;
|
|
p_spu->b_ephemer = (p_block->i_length == 0);
|
|
p_spu->b_absolute = false;
|
|
|
|
p_spu_sys->align = SUBPICTURE_ALIGN_BOTTOM;
|
|
|
|
/* Unwrap */
|
|
text_segment_t *p_text_segments = p_segment3g->s;
|
|
text_segment_t *p_cur = p_text_segments;
|
|
while( p_segment3g )
|
|
{
|
|
tx3g_segment_t * p_old = p_segment3g;
|
|
p_segment3g = p_segment3g->p_next3g;
|
|
free( p_old );
|
|
if( p_segment3g )
|
|
p_cur->p_next = p_segment3g->s;
|
|
p_cur = p_cur->p_next;
|
|
}
|
|
|
|
p_spu_sys->p_segments = p_text_segments;
|
|
|
|
block_Release( p_block );
|
|
|
|
return p_spu;
|
|
}
|
|
|