diff --git a/modules/codec/faad.c b/modules/codec/faad.c index 7da3c97954..e38c0021f4 100644 --- a/modules/codec/faad.c +++ b/modules/codec/faad.c @@ -157,7 +157,7 @@ static int Open( vlc_object_t *p_this ) p_dec->fmt_out.audio.i_rate = i_rate; p_dec->fmt_out.audio.i_channels = i_channels; p_dec->fmt_out.audio.i_physical_channels - = mpeg4_asc_channelsbyindex[i_channels]; + = MPEG4_asc_channelsbyindex[i_channels]; date_Init( &p_sys->date, i_rate, 1 ); } else @@ -328,7 +328,7 @@ static int DecodeBlock( decoder_t *p_dec, block_t *p_block ) p_dec->fmt_out.audio.i_rate = i_rate; p_dec->fmt_out.audio.i_channels = i_channels; p_dec->fmt_out.audio.i_physical_channels - = mpeg4_asc_channelsbyindex[i_channels]; + = MPEG4_asc_channelsbyindex[i_channels]; date_Init( &p_sys->date, i_rate, 1 ); } @@ -399,7 +399,7 @@ static int DecodeBlock( decoder_t *p_dec, block_t *p_block ) p_dec->fmt_out.audio.i_rate = i_rate; p_dec->fmt_out.audio.i_channels = i_channels; p_dec->fmt_out.audio.i_physical_channels - = mpeg4_asc_channelsbyindex[i_channels]; + = MPEG4_asc_channelsbyindex[i_channels]; date_Init( &p_sys->date, i_rate, 1 ); } } diff --git a/modules/packetizer/mpeg4audio.c b/modules/packetizer/mpeg4audio.c index f778fc996f..c08db19ff8 100644 --- a/modules/packetizer/mpeg4audio.c +++ b/modules/packetizer/mpeg4audio.c @@ -57,62 +57,6 @@ /***************************************************************************** * decoder_sys_t : decoder descriptor *****************************************************************************/ -typedef struct -{ - enum mpeg4_audioObjectType i_object_type; - unsigned i_samplerate; - uint8_t i_channel_configuration; - int8_t i_sbr; // 0: no sbr, 1: sbr, -1: unknown - int8_t i_ps; // 0: no ps, 1: ps, -1: unknown - - struct - { - enum mpeg4_audioObjectType i_object_type; - unsigned i_samplerate; - uint8_t i_channel_configuration; - } extension; - - /* GASpecific */ - unsigned i_frame_length; // 1024 or 960 - -} mpeg4_asc_t; - -#define LATM_MAX_EXTRA_SIZE 64 -typedef struct -{ - uint8_t i_program; - uint8_t i_layer; - - unsigned i_frame_length; // type 1 - uint8_t i_frame_length_type; - uint8_t i_frame_length_index; // type 3 4 5 6 7 - - mpeg4_asc_t cfg; - - /* Raw configuration */ - size_t i_extra; - uint8_t extra[LATM_MAX_EXTRA_SIZE]; - -} latm_stream_t; - -#define LATM_MAX_LAYER (8) -#define LATM_MAX_PROGRAM (16) -typedef struct -{ - bool b_same_time_framing; - uint8_t i_sub_frames; - uint8_t i_programs; - - uint8_t pi_layers[LATM_MAX_PROGRAM]; - - uint8_t pi_stream[LATM_MAX_PROGRAM][LATM_MAX_LAYER]; - - uint8_t i_streams; - latm_stream_t stream[LATM_MAX_PROGRAM*LATM_MAX_LAYER]; - - uint32_t i_other_data; - int16_t i_crc; /* -1 if not set */ -} latm_mux_t; typedef struct { @@ -140,7 +84,7 @@ typedef struct /* LOAS */ bool b_latm_cfg; - latm_mux_t latm; + MPEG4_streammux_config_t latm; int i_warnings; } decoder_sys_t; @@ -167,13 +111,6 @@ enum { TYPE_LOAS }; -static const int pi_sample_rates[16] = -{ - 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, - 16000, 12000, 11025, 8000, 7350, 0, 0, 0 -}; - - static int ChannelConfigurationToVLC(uint8_t i_channel) { if (i_channel == 7) @@ -187,17 +124,17 @@ static int AOTtoAACProfile(uint8_t i_object_type) { switch(i_object_type) { - case AOT_AAC_MAIN: - case AOT_AAC_LC: - case AOT_AAC_SSR: - case AOT_AAC_LTP: - case AOT_AAC_SBR: - case AOT_AAC_SC: - case AOT_ER_AAC_LD: - case AOT_AAC_PS: - case AOT_ER_AAC_ELD: + case MPEG4_AOT_AAC_MAIN: + case MPEG4_AOT_AAC_LC: + case MPEG4_AOT_AAC_SSR: + case MPEG4_AOT_AAC_LTP: + case MPEG4_AOT_AAC_SBR: + case MPEG4_AOT_AAC_SC: + case MPEG4_AOT_ER_AAC_LD: + case MPEG4_AOT_AAC_PS: + case MPEG4_AOT_ER_AAC_ELD: { - static_assert(AOT_AAC_MAIN == AAC_PROFILE_MAIN + 1, + static_assert(MPEG4_AOT_AAC_MAIN == AAC_PROFILE_MAIN + 1, "invalid profile to object mapping"); return i_object_type - 1; } @@ -218,8 +155,6 @@ static void ClosePacketizer(vlc_object_t *); static block_t *Packetize (decoder_t *, block_t **); static void Flush( decoder_t * ); -static int Mpeg4ReadAudioSpecificConfig(bs_t *s, mpeg4_asc_t *p_cfg, bool); - /***************************************************************************** * Module descriptor *****************************************************************************/ @@ -299,10 +234,10 @@ static int OpenPacketizer(vlc_object_t *p_this) if(p_dec->fmt_in->i_extra) { - mpeg4_asc_t asc; + MPEG4_asc_t asc; bs_t s; bs_init(&s, p_dec->fmt_in->p_extra, p_dec->fmt_in->i_extra); - if(Mpeg4ReadAudioSpecificConfig(&s, &asc, true) == VLC_SUCCESS) + if(MPEG4_read_AudioSpecificConfig(&s, &asc, true) == VLC_SUCCESS) { p_dec->fmt_out.audio.i_rate = asc.i_samplerate; p_dec->fmt_out.audio.i_frame_length = asc.i_frame_length; @@ -493,443 +428,6 @@ static int LOASSyncInfo(uint8_t p_header[LOAS_HEADER_SIZE], unsigned int *pi_hea return ((p_header[1] & 0x1f) << 8) + p_header[2]; } -static int Mpeg4GAProgramConfigElement(bs_t *s) -{ - /* TODO compute channels count ? */ - int i_tag = bs_read(s, 4); - if (i_tag != 0x05) - return -1; - bs_skip(s, 2 + 4); // object type + sampling index - int i_num_front = bs_read(s, 4); - int i_num_side = bs_read(s, 4); - int i_num_back = bs_read(s, 4); - int i_num_lfe = bs_read(s, 2); - int i_num_assoc_data = bs_read(s, 3); - int i_num_valid_cc = bs_read(s, 4); - - if (bs_read1(s)) - bs_skip(s, 4); // mono downmix - if (bs_read1(s)) - bs_skip(s, 4); // stereo downmix - if (bs_read1(s)) - bs_skip(s, 2+1); // matrix downmix + pseudo_surround - - bs_skip(s, i_num_front * (1+4)); - bs_skip(s, i_num_side * (1+4)); - bs_skip(s, i_num_back * (1+4)); - bs_skip(s, i_num_lfe * (4)); - bs_skip(s, i_num_assoc_data * (4)); - bs_skip(s, i_num_valid_cc * (5)); - bs_align(s); - int i_comment = bs_read(s, 8); - bs_skip(s, i_comment * 8); - return 0; -} - -static int Mpeg4GASpecificConfig(mpeg4_asc_t *p_cfg, bs_t *s) -{ - p_cfg->i_frame_length = bs_read1(s) ? 960 : 1024; - if(p_cfg->i_object_type == AOT_ER_AAC_LD) /* 14496-3 4.5.1.1 */ - p_cfg->i_frame_length >>= 1; - else if(p_cfg->i_object_type == AOT_AAC_SSR) - p_cfg->i_frame_length = 256; - - if (bs_read1(s)) // depend on core coder - bs_skip(s, 14); // core coder delay - - int i_extension_flag = bs_read1(s); - if (p_cfg->i_channel_configuration == 0) - Mpeg4GAProgramConfigElement(s); - if (p_cfg->i_object_type == AOT_AAC_SC || - p_cfg->i_object_type == AOT_ER_AAC_SC) - bs_skip(s, 3); // layer - - if (i_extension_flag) { - if (p_cfg->i_object_type == AOT_ER_BSAC) - bs_skip(s, 5 + 11); // numOfSubFrame + layer length - if (p_cfg->i_object_type == AOT_ER_AAC_LC || - p_cfg->i_object_type == AOT_ER_AAC_LTP || - p_cfg->i_object_type == AOT_ER_AAC_SC || - p_cfg->i_object_type == AOT_ER_AAC_LD) - bs_skip(s, 1+1+1); // ER data : section scale spectral */ - if (bs_read1(s)) // extension 3 - fprintf(stderr, "Mpeg4GASpecificConfig: error 1\n"); - } - return 0; -} - -static int Mpeg4ELDSpecificConfig(mpeg4_asc_t *p_cfg, bs_t *s) -{ - p_cfg->i_frame_length = bs_read1(s) ? 480 : 512; - - /* ELDSpecificConfig Table 4.180 */ - - bs_skip(s, 3); - if(bs_read1(s)) /* ldSbrPresentFlag */ - { - bs_skip(s, 2); - /* ld_sbr_header(channelConfiguration) Table 4.181 */ - unsigned numSbrHeader; - switch(p_cfg->i_channel_configuration) - { - case 1: case 2: - numSbrHeader = 1; - break; - case 3: - numSbrHeader = 2; - break; - case 4: case 5: case 6: - numSbrHeader = 3; - break; - case 7: - numSbrHeader = 4; - break; - default: - numSbrHeader = 0; - break; - } - for( ; numSbrHeader; numSbrHeader-- ) - { - /* sbr_header() Table 4.63 */ - bs_read(s, 14); - bool header_extra_1 = bs_read1(s); - bool header_extra_2 = bs_read1(s); - if(header_extra_1) - bs_read(s, 5); - if(header_extra_2) - bs_read(s, 6); - } - } - - for(unsigned eldExtType = bs_read(s, 4); - eldExtType != 0x0 /* ELDEXT_TERM */; - eldExtType = bs_read(s, 4)) - { - unsigned eldExtLen = bs_read(s, 4); - unsigned eldExtLenAdd = 0; - if(eldExtLen == 15) - { - eldExtLenAdd = bs_read(s, 8); - eldExtLen += eldExtLenAdd; - } - if(eldExtLenAdd == 255) - eldExtLen += bs_read(s, 16); - /* reserved extensions */ - for(; eldExtLen; eldExtLen--) - bs_skip(s, 8); - } - - return 0; -} - -static enum mpeg4_audioObjectType Mpeg4ReadAudioObjectType(bs_t *s) -{ - int i_type = bs_read(s, 5); - if (i_type == 31) - i_type = 32 + bs_read(s, 6); - return i_type; -} - -static unsigned Mpeg4ReadAudioSamplerate(bs_t *s) -{ - int i_index = bs_read(s, 4); - if (i_index != 0x0f) - return pi_sample_rates[i_index]; - return bs_read(s, 24); -} - -static int Mpeg4ReadAudioSpecificConfig(bs_t *s, mpeg4_asc_t *p_cfg, bool b_withext) -{ - p_cfg->i_object_type = Mpeg4ReadAudioObjectType(s); - p_cfg->i_samplerate = Mpeg4ReadAudioSamplerate(s); - p_cfg->i_channel_configuration = bs_read(s, 4); - - p_cfg->i_sbr = -1; - p_cfg->i_ps = -1; - p_cfg->extension.i_object_type = 0; - p_cfg->extension.i_samplerate = 0; - p_cfg->extension.i_channel_configuration = 0; - p_cfg->i_frame_length = 0; - - if (p_cfg->i_object_type == AOT_AAC_SBR || - p_cfg->i_object_type == AOT_AAC_PS) { - p_cfg->i_sbr = 1; - if (p_cfg->i_object_type == AOT_AAC_PS) - p_cfg->i_ps = 1; - p_cfg->extension.i_object_type = AOT_AAC_SBR; - p_cfg->extension.i_samplerate = Mpeg4ReadAudioSamplerate(s); - - p_cfg->i_object_type = Mpeg4ReadAudioObjectType(s); - if(p_cfg->i_object_type == AOT_ER_BSAC) - p_cfg->extension.i_channel_configuration = bs_read(s, 4); - } - - switch(p_cfg->i_object_type) - { - case AOT_AAC_MAIN: - case AOT_AAC_LC: - case AOT_AAC_SSR: - case AOT_AAC_LTP: - case AOT_AAC_SC: - case AOT_TWINVQ: - case AOT_ER_AAC_LC: - case AOT_ER_AAC_LTP: - case AOT_ER_AAC_SC: - case AOT_ER_TWINVQ: - case AOT_ER_BSAC: - case AOT_ER_AAC_LD: - Mpeg4GASpecificConfig(p_cfg, s); - break; - case AOT_CELP: - // CelpSpecificConfig(); - case AOT_HVXC: - // HvxcSpecificConfig(); - case AOT_TTSI: - // TTSSSpecificConfig(); - case AOT_MAIN_SYNTHETIC: - case AOT_WAVETABLES: - case AOT_GENERAL_MIDI: - case AOT_ALGORITHMIC: - // StructuredAudioSpecificConfig(); - case AOT_ER_CELP: - // ERCelpSpecificConfig(); - case AOT_ER_HXVC: - // ERHvxcSpecificConfig(); - case AOT_ER_HILN: - case AOT_ER_Parametric: - // ParametricSpecificConfig(); - case AOT_SSC: - // SSCSpecificConfig(); - case AOT_LAYER1: - case AOT_LAYER2: - case AOT_LAYER3: - // MPEG_1_2_SpecificConfig(); - case AOT_DST: - // DSTSpecificConfig(); - case AOT_ALS: - // ALSSpecificConfig(); - case AOT_SLS: - case AOT_SLS_NON_CORE: - // SLSSpecificConfig(); - case AOT_ER_AAC_ELD: - Mpeg4ELDSpecificConfig(p_cfg, s); - break; - case AOT_SMR_SIMPLE: - case AOT_SMR_MAIN: - // SymbolicMusicSpecificConfig(); - default: - // error - return VLC_EGENERIC; - } - - switch(p_cfg->i_object_type) - { - case AOT_ER_AAC_LC: - case AOT_ER_AAC_LTP: - case AOT_ER_AAC_SC: - case AOT_ER_TWINVQ: - case AOT_ER_BSAC: - case AOT_ER_AAC_LD: - case AOT_ER_CELP: - case AOT_ER_HXVC: - case AOT_ER_HILN: - case AOT_ER_Parametric: - case AOT_ER_AAC_ELD: - { - int epConfig = bs_read(s, 2); - if (epConfig == 2 || epConfig == 3) - //ErrorProtectionSpecificConfig(); - if (epConfig == 3) - if (bs_read1(s)) { - // TODO : directMapping - } - break; - } - default: - break; - } - - if (b_withext && p_cfg->extension.i_object_type != AOT_AAC_SBR && - !bs_eof(s) && bs_read(s, 11) == 0x2b7) - { - p_cfg->extension.i_object_type = Mpeg4ReadAudioObjectType(s); - if (p_cfg->extension.i_object_type == AOT_AAC_SBR) - { - p_cfg->i_sbr = bs_read1(s); - if (p_cfg->i_sbr == 1) { - p_cfg->extension.i_samplerate = Mpeg4ReadAudioSamplerate(s); - if (bs_read(s, 11) == 0x548) - p_cfg->i_ps = bs_read1(s); - } - } - else if (p_cfg->extension.i_object_type == AOT_ER_BSAC) - { - p_cfg->i_sbr = bs_read1(s); - if(p_cfg->i_sbr) - p_cfg->extension.i_samplerate = Mpeg4ReadAudioSamplerate(s); - p_cfg->extension.i_channel_configuration = bs_read(s, 4); - } - } - -#if 0 - static const char *ppsz_otype[] = { - "NULL", - "AAC Main", "AAC LC", "AAC SSR", "AAC LTP", "SBR", "AAC Scalable", - "TwinVQ", - "CELP", "HVXC", - "Reserved", "Reserved", - "TTSI", - "Main Synthetic", "Wavetables Synthesis", "General MIDI", - "Algorithmic Synthesis and Audio FX", - "ER AAC LC", - "Reserved", - "ER AAC LTP", "ER AAC Scalable", "ER TwinVQ", "ER BSAC", "ER AAC LD", - "ER CELP", "ER HVXC", "ER HILN", "ER Parametric", - "SSC", - "PS", "MPEG Surround", "Escape", - "Layer 1", "Layer 2", "Layer 3", - "DST", "ALS", "SLS", "SLS non-core", "ELD", - "SMR Simple", "SMR Main", - }; - - fprintf(stderr, "Mpeg4ReadAudioSpecificInfo: t=%s(%d)f=%d c=%d sbr=%d\n", - ppsz_otype[p_cfg->i_object_type], p_cfg->i_object_type, - p_cfg->i_samplerate, p_cfg->i_channel, p_cfg->i_sbr); -#endif - return bs_error(s) ? VLC_EGENERIC : VLC_SUCCESS; -} - -static uint32_t LatmGetValue(bs_t *s) -{ - uint32_t v = 0; - for (int i = 1 + bs_read(s, 2); i > 0; i--) - v = (v << 8) + bs_read(s, 8); - return v; -} - -static size_t AudioSpecificConfigBitsToBytes(bs_t *s, uint32_t i_bits, uint8_t *p_data) -{ - size_t i_extra = __MIN((i_bits + 7) / 8, LATM_MAX_EXTRA_SIZE); - for (size_t i = 0; i < i_extra; i++) { - const uint32_t i_read = __MIN(8, i_bits - 8*i); - p_data[i] = bs_read(s, i_read) << (8-i_read); - } - return i_extra; -} - -static int LatmReadStreamMuxConfiguration(latm_mux_t *m, bs_t *s) -{ - int i_mux_version; - int i_mux_versionA; - - i_mux_version = bs_read(s, 1); - i_mux_versionA = 0; - if (i_mux_version) - i_mux_versionA = bs_read(s, 1); - - if (i_mux_versionA != 0) /* support only A=0 */ - return -1; - - memset(m, 0, sizeof(*m)); - - if (i_mux_versionA == 0) - if (i_mux_version == 1) - LatmGetValue(s); /* taraBufferFullness */ - - if(bs_eof(s)) - return -1; - - m->b_same_time_framing = bs_read1(s); - m->i_sub_frames = 1 + bs_read(s, 6); - m->i_programs = 1 + bs_read(s, 4); - - for (uint8_t i_program = 0; i_program < m->i_programs; i_program++) { - if(bs_eof(s)) - return -1; - m->pi_layers[i_program] = 1+bs_read(s, 3); - - for (uint8_t i_layer = 0; i_layer < m->pi_layers[i_program]; i_layer++) { - latm_stream_t *st = &m->stream[m->i_streams]; - bool b_previous_cfg; - - m->pi_stream[i_program][i_layer] = m->i_streams; - st->i_program = i_program; - st->i_layer = i_layer; - - b_previous_cfg = false; - if (i_program != 0 || i_layer != 0) - b_previous_cfg = bs_read1(s); - - if (b_previous_cfg) { - assert(m->i_streams > 0); - st->cfg = m->stream[m->i_streams-1].cfg; - } else { - uint32_t asc_size = 0; - if(i_mux_version > 0) - asc_size = LatmGetValue(s); - bs_t asc_bs = *s; - Mpeg4ReadAudioSpecificConfig(&asc_bs, &st->cfg, i_mux_version > 0); - if (i_mux_version == 0) - asc_size = bs_pos(&asc_bs) - bs_pos(s); - asc_bs = *s; - st->i_extra = AudioSpecificConfigBitsToBytes(&asc_bs, asc_size, st->extra); - bs_skip(s, asc_size); - } - - st->i_frame_length_type = bs_read(s, 3); - switch(st->i_frame_length_type) - { - case 0: - { - bs_skip(s, 8); /* latmBufferFullnes */ - if (!m->b_same_time_framing) - if (st->cfg.i_object_type == AOT_AAC_SC || - st->cfg.i_object_type == AOT_CELP || - st->cfg.i_object_type == AOT_ER_AAC_SC || - st->cfg.i_object_type == AOT_ER_CELP) - bs_skip(s, 6); /* eFrameOffset */ - break; - } - case 1: - st->i_frame_length = bs_read(s, 9); - break; - case 3: case 4: case 5: - st->i_frame_length_index = bs_read(s, 6); // celp - break; - case 6: case 7: - st->i_frame_length_index = bs_read(s, 1); // hvxc - default: - break; - } - /* Next stream */ - m->i_streams++; - } - } - - if(bs_error(s) || bs_eof(s)) - return -1; - - /* other data */ - if (bs_read1(s)) { - if (i_mux_version == 1) - m->i_other_data = LatmGetValue(s); - else { - int b_continue; - do { - b_continue = bs_read1(s); - m->i_other_data = (m->i_other_data << 8) + bs_read(s, 8); - } while (b_continue); - } - } - - /* crc */ - m->i_crc = -1; - if (bs_read1(s)) - m->i_crc = bs_read(s, 8); - - return bs_error(s) ? -1 : 0; -} - static int LOASParse(decoder_t *p_dec, uint8_t *p_buffer, int i_buffer) { decoder_sys_t *p_sys = p_dec->p_sys; @@ -939,9 +437,9 @@ static int LOASParse(decoder_t *p_dec, uint8_t *p_buffer, int i_buffer) bs_init(&s, p_buffer, i_buffer); /* Read the stream mux configuration if present */ - if (!bs_read1(&s) && !LatmReadStreamMuxConfiguration(&p_sys->latm, &s) && + if (!bs_read1(&s) && !MPEG4_parse_StreamMuxConfig(&s, &p_sys->latm) && p_sys->latm.i_streams > 0) { - const latm_stream_t *st = &p_sys->latm.stream[0]; + const MPEG4_audio_stream_t *st = &p_sys->latm.stream[0]; if(st->cfg.i_samplerate == 0 || st->cfg.i_frame_length == 0 || ChannelConfigurationToVLC(st->cfg.i_channel_configuration) == 0) @@ -996,12 +494,12 @@ static int LOASParse(decoder_t *p_dec, uint8_t *p_buffer, int i_buffer) msg_Err(p_dec, "latm sub frames not yet supported, please send a sample"); for (uint8_t i_sub = 0; i_sub < p_sys->latm.i_sub_frames; i_sub++) { - unsigned pi_payload[LATM_MAX_PROGRAM][LATM_MAX_LAYER]; + unsigned pi_payload[MPEG4_STREAMMUX_MAX_PROGRAM][MPEG4_STREAMMUX_MAX_LAYER]; if (p_sys->latm.b_same_time_framing) { /* Payload length */ for (uint8_t i_program = 0; i_program < p_sys->latm.i_programs; i_program++) { for (uint8_t i_layer = 0; i_layer < p_sys->latm.pi_layers[i_program]; i_layer++) { - latm_stream_t *st = &p_sys->latm.stream[p_sys->latm.pi_stream[i_program][i_layer]]; + MPEG4_audio_stream_t *st = &p_sys->latm.stream[p_sys->latm.pi_stream[i_program][i_layer]]; if (st->i_frame_length_type == 0) { unsigned i_payload = 0; for (;;) { @@ -1055,7 +553,7 @@ static int LOASParse(decoder_t *p_dec, uint8_t *p_buffer, int i_buffer) for (int i_chunk = 0; i_chunk < i_chunks; i_chunk++) { const int streamIndex = bs_read(&s, 4); - latm_stream_t *st = &p_sys->latm.stream[streamIndex]; + MPEG4_audio_stream_t *st = &p_sys->latm.stream[streamIndex]; const int i_program = st->i_program; const int i_layer = st->i_layer; diff --git a/modules/packetizer/mpeg4audio.h b/modules/packetizer/mpeg4audio.h index fc44fd2aca..4ac4f76d06 100644 --- a/modules/packetizer/mpeg4audio.h +++ b/modules/packetizer/mpeg4audio.h @@ -1,7 +1,7 @@ /***************************************************************************** - * mpeg4audio.h: MPEG 4 audio definitions + * mpeg4audio.h: ISO/IEC 14496-3 audio definitions ***************************************************************************** - * Copyright (C) 2001-2017 VLC authors and VideoLAN + * Copyright (C) 2001-2024 VLC authors, VideoLAN and VideoLabs * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as published by @@ -17,68 +17,71 @@ * along with this program; if not, write to the Free Software Foundation, * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA. *****************************************************************************/ -enum mpeg4_audioObjectType /* ISO/IEC 14496-3:2009 1.5.1 */ +#include + +enum MPEG4_audioObjectType /* ISO/IEC 14496-3:2009 1.5.1 */ { - AOT_AAC_MAIN = 1, - AOT_AAC_LC = 2, - AOT_AAC_SSR = 3, - AOT_AAC_LTP = 4, - AOT_AAC_SBR = 5, - AOT_AAC_SC = 6, - AOT_TWINVQ = 7, - AOT_CELP = 8, - AOT_HVXC = 9, - AOT_RESERVED10 = 10, - AOT_RESERVED11 = 11, - AOT_TTSI = 12, - AOT_MAIN_SYNTHETIC = 13, - AOT_WAVETABLES = 14, - AOT_GENERAL_MIDI = 15, - AOT_ALGORITHMIC = 16, - AOT_ER_AAC_LC = 17, - AOT_RESERVED18 = 18, - AOT_ER_AAC_LTP = 19, - AOT_ER_AAC_SC = 20, - AOT_ER_TWINVQ = 21, - AOT_ER_BSAC = 22, - AOT_ER_AAC_LD = 23, - AOT_ER_CELP = 24, - AOT_ER_HXVC = 25, - AOT_ER_HILN = 26, - AOT_ER_Parametric = 27, - AOT_SSC = 28, - AOT_AAC_PS = 29, - AOT_MPEG_SURROUND = 30, - AOT_ESCAPE = 31, - AOT_LAYER1 = 32, - AOT_LAYER2 = 33, - AOT_LAYER3 = 34, - AOT_DST = 35, - AOT_ALS = 36, - AOT_SLS = 37, - AOT_SLS_NON_CORE = 38, - AOT_ER_AAC_ELD = 39, - AOT_SMR_SIMPLE = 40, - AOT_SMR_MAIN = 41, + MPEG4_AOT_NULL = 0, + MPEG4_AOT_AAC_MAIN = 1, + MPEG4_AOT_AAC_LC = 2, + MPEG4_AOT_AAC_SSR = 3, + MPEG4_AOT_AAC_LTP = 4, + MPEG4_AOT_AAC_SBR = 5, + MPEG4_AOT_AAC_SC = 6, + MPEG4_AOT_TWINVQ = 7, + MPEG4_AOT_CELP = 8, + MPEG4_AOT_HVXC = 9, + MPEG4_AOT_RESERVED10 = 10, + MPEG4_AOT_RESERVED11 = 11, + MPEG4_AOT_TTSI = 12, + MPEG4_AOT_MAIN_SYNTHETIC = 13, + MPEG4_AOT_WAVETABLES = 14, + MPEG4_AOT_GENERAL_MIDI = 15, + MPEG4_AOT_ALGORITHMIC = 16, + MPEG4_AOT_ER_AAC_LC = 17, + MPEG4_AOT_RESERVED18 = 18, + MPEG4_AOT_ER_AAC_LTP = 19, + MPEG4_AOT_ER_AAC_SC = 20, + MPEG4_AOT_ER_TWINVQ = 21, + MPEG4_AOT_ER_BSAC = 22, + MPEG4_AOT_ER_AAC_LD = 23, + MPEG4_AOT_ER_CELP = 24, + MPEG4_AOT_ER_HXVC = 25, + MPEG4_AOT_ER_HILN = 26, + MPEG4_AOT_ER_Parametric = 27, + MPEG4_AOT_SSC = 28, + MPEG4_AOT_AAC_PS = 29, + MPEG4_AOT_MPEG_SURROUND = 30, + MPEG4_AOT_ESCAPE = 31, + MPEG4_AOT_LAYER1 = 32, + MPEG4_AOT_LAYER2 = 33, + MPEG4_AOT_LAYER3 = 34, + MPEG4_AOT_DST = 35, + MPEG4_AOT_ALS = 36, + MPEG4_AOT_SLS = 37, + MPEG4_AOT_SLS_NON_CORE = 38, + MPEG4_AOT_ER_AAC_ELD = 39, + MPEG4_AOT_SMR_SIMPLE = 40, + MPEG4_AOT_SMR_MAIN = 41, }; enum { - AAC_PROFILE_MAIN = AOT_AAC_MAIN - 1, + AAC_PROFILE_MAIN = MPEG4_AOT_AAC_MAIN - 1, AAC_PROFILE_LC, AAC_PROFILE_SSR, AAC_PROFILE_LTP, AAC_PROFILE_HE, - AAC_PROFILE_LD = AOT_ER_AAC_LD - 1, - AAC_PROFILE_HEv2 = AOT_AAC_PS - 1, - AAC_PROFILE_ELD = AOT_ER_AAC_ELD - 1, + AAC_PROFILE_LD = MPEG4_AOT_ER_AAC_LD - 1, + AAC_PROFILE_HEv2 = MPEG4_AOT_AAC_PS - 1, + AAC_PROFILE_ELD = MPEG4_AOT_ER_AAC_ELD - 1, /* Similar shift signaling as avcodec, as signaling should have been done in ADTS header. Values defaults to MPEG4 */ AAC_PROFILE_MPEG2_LC = AAC_PROFILE_LC + 128, AAC_PROFILE_MPEG2_HE = AAC_PROFILE_HE + 128, }; -static const uint32_t mpeg4_asc_channelsbyindex[] = +static const uint32_t MPEG4_asc_channelsbyindex[] = { [0] = 0, /* Set later */ @@ -110,4 +113,505 @@ static const uint32_t mpeg4_asc_channelsbyindex[] = [8] = 0, }; -#define MPEG4_ASC_MAX_INDEXEDPOS ARRAY_SIZE(mpeg4_asc_channelsbyindex) +#define MPEG4_ASC_MAX_INDEXEDPOS ARRAY_SIZE(MPEG4_asc_channelsbyindex) + +typedef struct +{ + enum MPEG4_audioObjectType i_object_type; + unsigned i_samplerate; + uint8_t i_channel_configuration; + int8_t i_sbr; // 0: no sbr, 1: sbr, -1: unknown + int8_t i_ps; // 0: no ps, 1: ps, -1: unknown + + struct + { + enum MPEG4_audioObjectType i_object_type; + unsigned i_samplerate; + uint8_t i_channel_configuration; + } extension; + + /* GASpecific */ + unsigned i_frame_length; // 1024 or 960 + +} MPEG4_asc_t; + +static inline int MPEG4_read_GAProgramConfigElement(bs_t *s) +{ + /* TODO compute channels count ? */ + int i_tag = bs_read(s, 4); + if (i_tag != 0x05) + return -1; + bs_skip(s, 2 + 4); // object type + sampling index + int i_num_front = bs_read(s, 4); + int i_num_side = bs_read(s, 4); + int i_num_back = bs_read(s, 4); + int i_num_lfe = bs_read(s, 2); + int i_num_assoc_data = bs_read(s, 3); + int i_num_valid_cc = bs_read(s, 4); + + if (bs_read1(s)) + bs_skip(s, 4); // mono downmix + if (bs_read1(s)) + bs_skip(s, 4); // stereo downmix + if (bs_read1(s)) + bs_skip(s, 2+1); // matrix downmix + pseudo_surround + + bs_skip(s, i_num_front * (1+4)); + bs_skip(s, i_num_side * (1+4)); + bs_skip(s, i_num_back * (1+4)); + bs_skip(s, i_num_lfe * (4)); + bs_skip(s, i_num_assoc_data * (4)); + bs_skip(s, i_num_valid_cc * (5)); + bs_align(s); + int i_comment = bs_read(s, 8); + bs_skip(s, i_comment * 8); + return 0; +} + +static inline int MPEG4_read_GASpecificConfig(MPEG4_asc_t *p_cfg, bs_t *s) +{ + p_cfg->i_frame_length = bs_read1(s) ? 960 : 1024; + if(p_cfg->i_object_type == MPEG4_AOT_ER_AAC_LD) /* 14496-3 4.5.1.1 */ + p_cfg->i_frame_length >>= 1; + else if(p_cfg->i_object_type == MPEG4_AOT_AAC_SSR) + p_cfg->i_frame_length = 256; + + if (bs_read1(s)) // depend on core coder + bs_skip(s, 14); // core coder delay + + int i_extension_flag = bs_read1(s); + if (p_cfg->i_channel_configuration == 0) + MPEG4_read_GAProgramConfigElement(s); + if (p_cfg->i_object_type == MPEG4_AOT_AAC_SC || + p_cfg->i_object_type == MPEG4_AOT_ER_AAC_SC) + bs_skip(s, 3); // layer + + if (i_extension_flag) { + if (p_cfg->i_object_type == MPEG4_AOT_ER_BSAC) + bs_skip(s, 5 + 11); // numOfSubFrame + layer length + if (p_cfg->i_object_type == MPEG4_AOT_ER_AAC_LC || + p_cfg->i_object_type == MPEG4_AOT_ER_AAC_LTP || + p_cfg->i_object_type == MPEG4_AOT_ER_AAC_SC || + p_cfg->i_object_type == MPEG4_AOT_ER_AAC_LD) + bs_skip(s, 1+1+1); // ER data : section scale spectral */ + if (bs_read1(s)) // extension 3 + fprintf(stderr, "MPEG4GASpecificConfig: error 1\n"); + } + return 0; +} + +static inline int MPEG4_read_ELDSpecificConfig(MPEG4_asc_t *p_cfg, bs_t *s) +{ + p_cfg->i_frame_length = bs_read1(s) ? 480 : 512; + + /* ELDSpecificConfig Table 4.180 */ + + bs_skip(s, 3); + if(bs_read1(s)) /* ldSbrPresentFlag */ + { + bs_skip(s, 2); + /* ld_sbr_header(channelConfiguration) Table 4.181 */ + unsigned numSbrHeader; + switch(p_cfg->i_channel_configuration) + { + case 1: case 2: + numSbrHeader = 1; + break; + case 3: + numSbrHeader = 2; + break; + case 4: case 5: case 6: + numSbrHeader = 3; + break; + case 7: + numSbrHeader = 4; + break; + default: + numSbrHeader = 0; + break; + } + for( ; numSbrHeader; numSbrHeader-- ) + { + /* sbr_header() Table 4.63 */ + bs_read(s, 14); + bool header_extra_1 = bs_read1(s); + bool header_extra_2 = bs_read1(s); + if(header_extra_1) + bs_read(s, 5); + if(header_extra_2) + bs_read(s, 6); + } + } + + for(unsigned eldExtType = bs_read(s, 4); + eldExtType != 0x0 /* ELDEXT_TERM */; + eldExtType = bs_read(s, 4)) + { + unsigned eldExtLen = bs_read(s, 4); + unsigned eldExtLenAdd = 0; + if(eldExtLen == 15) + { + eldExtLenAdd = bs_read(s, 8); + eldExtLen += eldExtLenAdd; + } + if(eldExtLenAdd == 255) + eldExtLen += bs_read(s, 16); + /* reserved extensions */ + for(; eldExtLen; eldExtLen--) + bs_skip(s, 8); + } + + return 0; +} + +static inline enum MPEG4_audioObjectType MPEG4_read_AudioObjectType(bs_t *s) +{ + int i_type = bs_read(s, 5); + if (i_type == 31) + i_type = 32 + bs_read(s, 6); + return (enum MPEG4_audioObjectType) i_type; +} + +static const int pi_sample_rates[16] = + { + 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, + 16000, 12000, 11025, 8000, 7350, 0, 0, 0 +}; + +static inline unsigned MPEG4_read_AudioSamplerate(bs_t *s) +{ + int i_index = bs_read(s, 4); + if (i_index != 0x0f) + return pi_sample_rates[i_index]; + return bs_read(s, 24); +} + +static inline int MPEG4_read_AudioSpecificConfig(bs_t *s, MPEG4_asc_t *p_cfg, bool b_withext) +{ + p_cfg->i_object_type = MPEG4_read_AudioObjectType(s); + p_cfg->i_samplerate = MPEG4_read_AudioSamplerate(s); + p_cfg->i_channel_configuration = bs_read(s, 4); + + p_cfg->i_sbr = -1; + p_cfg->i_ps = -1; + p_cfg->extension.i_object_type = MPEG4_AOT_NULL; + p_cfg->extension.i_samplerate = 0; + p_cfg->extension.i_channel_configuration = 0; + p_cfg->i_frame_length = 0; + + if (p_cfg->i_object_type == MPEG4_AOT_AAC_SBR || + p_cfg->i_object_type == MPEG4_AOT_AAC_PS) { + p_cfg->i_sbr = 1; + if (p_cfg->i_object_type == MPEG4_AOT_AAC_PS) + p_cfg->i_ps = 1; + p_cfg->extension.i_object_type = MPEG4_AOT_AAC_SBR; + p_cfg->extension.i_samplerate = MPEG4_read_AudioSamplerate(s); + + p_cfg->i_object_type = MPEG4_read_AudioObjectType(s); + if(p_cfg->i_object_type == MPEG4_AOT_ER_BSAC) + p_cfg->extension.i_channel_configuration = bs_read(s, 4); + } + + switch(p_cfg->i_object_type) + { + case MPEG4_AOT_AAC_MAIN: + case MPEG4_AOT_AAC_LC: + case MPEG4_AOT_AAC_SSR: + case MPEG4_AOT_AAC_LTP: + case MPEG4_AOT_AAC_SC: + case MPEG4_AOT_TWINVQ: + case MPEG4_AOT_ER_AAC_LC: + case MPEG4_AOT_ER_AAC_LTP: + case MPEG4_AOT_ER_AAC_SC: + case MPEG4_AOT_ER_TWINVQ: + case MPEG4_AOT_ER_BSAC: + case MPEG4_AOT_ER_AAC_LD: + MPEG4_read_GASpecificConfig(p_cfg, s); + break; + case MPEG4_AOT_CELP: + // CelpSpecificConfig(); + case MPEG4_AOT_HVXC: + // HvxcSpecificConfig(); + case MPEG4_AOT_TTSI: + // TTSSSpecificConfig(); + case MPEG4_AOT_MAIN_SYNTHETIC: + case MPEG4_AOT_WAVETABLES: + case MPEG4_AOT_GENERAL_MIDI: + case MPEG4_AOT_ALGORITHMIC: + // StructuredAudioSpecificConfig(); + case MPEG4_AOT_ER_CELP: + // ERCelpSpecificConfig(); + case MPEG4_AOT_ER_HXVC: + // ERHvxcSpecificConfig(); + case MPEG4_AOT_ER_HILN: + case MPEG4_AOT_ER_Parametric: + // ParametricSpecificConfig(); + case MPEG4_AOT_SSC: + // SSCSpecificConfig(); + case MPEG4_AOT_LAYER1: + case MPEG4_AOT_LAYER2: + case MPEG4_AOT_LAYER3: + // MPEG_1_2_SpecificConfig(); + case MPEG4_AOT_DST: + // DSTSpecificConfig(); + case MPEG4_AOT_ALS: + // ALSSpecificConfig(); + case MPEG4_AOT_SLS: + case MPEG4_AOT_SLS_NON_CORE: + // SLSSpecificConfig(); + case MPEG4_AOT_ER_AAC_ELD: + MPEG4_read_ELDSpecificConfig(p_cfg, s); + break; + case MPEG4_AOT_SMR_SIMPLE: + case MPEG4_AOT_SMR_MAIN: + // SymbolicMusicSpecificConfig(); + default: + // error + return VLC_EGENERIC; + } + + switch(p_cfg->i_object_type) + { + case MPEG4_AOT_ER_AAC_LC: + case MPEG4_AOT_ER_AAC_LTP: + case MPEG4_AOT_ER_AAC_SC: + case MPEG4_AOT_ER_TWINVQ: + case MPEG4_AOT_ER_BSAC: + case MPEG4_AOT_ER_AAC_LD: + case MPEG4_AOT_ER_CELP: + case MPEG4_AOT_ER_HXVC: + case MPEG4_AOT_ER_HILN: + case MPEG4_AOT_ER_Parametric: + case MPEG4_AOT_ER_AAC_ELD: + { + int epConfig = bs_read(s, 2); + if (epConfig == 2 || epConfig == 3) + //ErrorProtectionSpecificConfig(); + if (epConfig == 3) + if (bs_read1(s)) { + // TODO : directMapping + } + break; + } + default: + break; + } + + if (b_withext && p_cfg->extension.i_object_type != MPEG4_AOT_AAC_SBR && + !bs_eof(s) && bs_read(s, 11) == 0x2b7) + { + p_cfg->extension.i_object_type = MPEG4_read_AudioObjectType(s); + if (p_cfg->extension.i_object_type == MPEG4_AOT_AAC_SBR) + { + p_cfg->i_sbr = bs_read1(s); + if (p_cfg->i_sbr == 1) { + p_cfg->extension.i_samplerate = MPEG4_read_AudioSamplerate(s); + if (bs_read(s, 11) == 0x548) + p_cfg->i_ps = bs_read1(s); + } + } + else if (p_cfg->extension.i_object_type == MPEG4_AOT_ER_BSAC) + { + p_cfg->i_sbr = bs_read1(s); + if(p_cfg->i_sbr) + p_cfg->extension.i_samplerate = MPEG4_read_AudioSamplerate(s); + p_cfg->extension.i_channel_configuration = bs_read(s, 4); + } + } + +#if 0 + static const char *ppsz_otype[] = { + "NULL", + "AAC Main", "AAC LC", "AAC SSR", "AAC LTP", "SBR", "AAC Scalable", + "TwinVQ", + "CELP", "HVXC", + "Reserved", "Reserved", + "TTSI", + "Main Synthetic", "Wavetables Synthesis", "General MIDI", + "Algorithmic Synthesis and Audio FX", + "ER AAC LC", + "Reserved", + "ER AAC LTP", "ER AAC Scalable", "ER TwinVQ", "ER BSAC", "ER AAC LD", + "ER CELP", "ER HVXC", "ER HILN", "ER Parametric", + "SSC", + "PS", "MPEG Surround", "Escape", + "Layer 1", "Layer 2", "Layer 3", + "DST", "ALS", "SLS", "SLS non-core", "ELD", + "SMR Simple", "SMR Main", + }; + + fprintf(stderr, "MPEG4ReadAudioSpecificInfo: t=%s(%d)f=%d c=%d sbr=%d\n", + ppsz_otype[p_cfg->i_object_type], p_cfg->i_object_type, + p_cfg->i_samplerate, p_cfg->i_channel, p_cfg->i_sbr); +#endif + return bs_error(s) ? VLC_EGENERIC : VLC_SUCCESS; +} + +#define MPEG4_STREAM_MAX_EXTRADATA 64 +typedef struct +{ + uint8_t i_program; + uint8_t i_layer; + + unsigned i_frame_length; // type 1 + uint8_t i_frame_length_type; + uint8_t i_frame_length_index; // type 3 4 5 6 7 + + MPEG4_asc_t cfg; + + /* Raw configuration */ + size_t i_extra; + uint8_t extra[MPEG4_STREAM_MAX_EXTRADATA]; + +} MPEG4_audio_stream_t; + +#define MPEG4_STREAMMUX_MAX_LAYER 8 +#define MPEG4_STREAMMUX_MAX_PROGRAM 16 +typedef struct +{ + bool b_same_time_framing; + uint8_t i_sub_frames; + uint8_t i_programs; + + uint8_t pi_layers[MPEG4_STREAMMUX_MAX_PROGRAM]; + + uint8_t pi_stream[MPEG4_STREAMMUX_MAX_PROGRAM][MPEG4_STREAMMUX_MAX_LAYER]; + + uint8_t i_streams; + MPEG4_audio_stream_t stream[MPEG4_STREAMMUX_MAX_PROGRAM*MPEG4_STREAMMUX_MAX_LAYER]; + + uint32_t i_other_data; + int16_t i_crc; /* -1 if not set */ +} MPEG4_streammux_config_t; + +static inline uint32_t MPEG4_LatmGetValue(bs_t *s) +{ + uint32_t v = 0; + for (int i = 1 + bs_read(s, 2); i > 0; i--) + v = (v << 8) + bs_read(s, 8); + return v; +} + +static inline size_t AudioSpecificConfigBitsToBytes(bs_t *s, uint32_t i_bits, uint8_t *p_data) +{ + size_t i_extra = __MIN((i_bits + 7) / 8, MPEG4_STREAM_MAX_EXTRADATA); + for (size_t i = 0; i < i_extra; i++) { + const uint32_t i_read = __MIN(8, i_bits - 8*i); + p_data[i] = bs_read(s, i_read) << (8-i_read); + } + return i_extra; +} + +static inline int MPEG4_parse_StreamMuxConfig(bs_t *s, MPEG4_streammux_config_t *m) +{ + int i_mux_version; + int i_mux_versionA; + + i_mux_version = bs_read(s, 1); + i_mux_versionA = 0; + if (i_mux_version) + i_mux_versionA = bs_read(s, 1); + + if (i_mux_versionA != 0) /* support only A=0 */ + return -1; + + memset(m, 0, sizeof(*m)); + + if (i_mux_versionA == 0) + if (i_mux_version == 1) + MPEG4_LatmGetValue(s); /* taraBufferFullness */ + + if(bs_eof(s)) + return -1; + + m->b_same_time_framing = bs_read1(s); + m->i_sub_frames = 1 + bs_read(s, 6); + m->i_programs = 1 + bs_read(s, 4); + + for (uint8_t i_program = 0; i_program < m->i_programs; i_program++) { + if(bs_eof(s)) + return -1; + m->pi_layers[i_program] = 1+bs_read(s, 3); + + for (uint8_t i_layer = 0; i_layer < m->pi_layers[i_program]; i_layer++) { + MPEG4_audio_stream_t *st = &m->stream[m->i_streams]; + bool b_previous_cfg; + + m->pi_stream[i_program][i_layer] = m->i_streams; + st->i_program = i_program; + st->i_layer = i_layer; + + b_previous_cfg = false; + if (i_program != 0 || i_layer != 0) + b_previous_cfg = bs_read1(s); + + if (b_previous_cfg) { + assert(m->i_streams > 0); + st->cfg = m->stream[m->i_streams-1].cfg; + } else { + uint32_t asc_size = 0; + if(i_mux_version > 0) + asc_size = MPEG4_LatmGetValue(s); + bs_t asc_bs = *s; + MPEG4_read_AudioSpecificConfig(&asc_bs, &st->cfg, i_mux_version > 0); + if (i_mux_version == 0) + asc_size = bs_pos(&asc_bs) - bs_pos(s); + asc_bs = *s; + st->i_extra = AudioSpecificConfigBitsToBytes(&asc_bs, asc_size, st->extra); + bs_skip(s, asc_size); + } + + st->i_frame_length_type = bs_read(s, 3); + switch(st->i_frame_length_type) + { + case 0: + { + bs_skip(s, 8); /* latmBufferFullnes */ + if (!m->b_same_time_framing) + if (st->cfg.i_object_type == MPEG4_AOT_AAC_SC || + st->cfg.i_object_type == MPEG4_AOT_CELP || + st->cfg.i_object_type == MPEG4_AOT_ER_AAC_SC || + st->cfg.i_object_type == MPEG4_AOT_ER_CELP) + bs_skip(s, 6); /* eFrameOffset */ + break; + } + case 1: + st->i_frame_length = bs_read(s, 9); + break; + case 3: case 4: case 5: + st->i_frame_length_index = bs_read(s, 6); // celp + break; + case 6: case 7: + st->i_frame_length_index = bs_read(s, 1); // hvxc + default: + break; + } + /* Next stream */ + m->i_streams++; + } + } + + if(bs_error(s) || bs_eof(s)) + return -1; + + /* other data */ + if (bs_read1(s)) { + if (i_mux_version == 1) + m->i_other_data = MPEG4_LatmGetValue(s); + else { + int b_continue; + do { + b_continue = bs_read1(s); + m->i_other_data = (m->i_other_data << 8) + bs_read(s, 8); + } while (b_continue); + } + } + + /* crc */ + m->i_crc = -1; + if (bs_read1(s)) + m->i_crc = bs_read(s, 8); + + return bs_error(s) ? -1 : 0; +} +