FFMPEG library implements encapsulation and separation of mp4/flv files (H264+AAC)

ffmepeg 4.4 (available for personal testing)

1. Use the FFMPEG library to encapsulate 264 video and acc audio data into mp4/flv files

Encapsulation process

1. Use avformat_open_input to open the video and audio files respectively, initialize their AVFormatContext, and use avformat_find_stream_info to obtain basic encoder information.

2. Use avformat_alloc_output_context2 to initialize the output AVFormatContext structure

3. Use the function avformat_new_stream to create audio and video streams for the output AVFormatContext structure, and use the avcodec_parameters_copy method to copy the audio and video encoding parameters to the codecpar structure of the newly created corresponding stream.

4. Use avio_open to open the output file and initialize the IO context structure in the output AVFormatContext structure.

5. Use avformat_write_header to write the header information of the stream to the output file

6. Interleave the audio and video data according to the timestamp synchronization principle, and set and calibrate the timestamp information.

7. Write stream preview information to the output file (moov)

8. Free up space and close the file


#include <stdio.h>
#ifdef _WIN32
extern "C"
#include "libavformat/avformat.h"
#ifdef __cplusplus
extern "C"
#include <libavformat/avformat.h>
#ifdef __cplusplus
int main(int argc, char* argv[]) {
    const AVOutputFormat* ofmt = NULL;
    //Input AVFormatContext and Output AVFormatContext
    AVFormatContext* ifmt_ctx_v = NULL, * ifmt_ctx_a = NULL, * ofmt_ctx = NULL;
    AVPacket pkt;
    int ret;
    unsigned int i;
    int videoindex_v = -1, videoindex_out = -1;
    int audioindex_a = -1, audioindex_out = -1;
    int frame_index = 0;
    int64_t cur_pts_v = 0, cur_pts_a = 0;
    int writing_v = 1, writing_a = 1;
    const char* in_filename_v = "test.h264";
    const char* in_filename_a = "audio_chn0.aac";
    const char* out_filename = "test.mp4";//Output file URL
    if ((ret = avformat_open_input( & amp;ifmt_ctx_v, in_filename_v, 0, 0)) < 0) {
        printf("Could not open input file.");
        goto end;
    if ((ret = avformat_find_stream_info(ifmt_ctx_v, 0)) < 0) {
        printf("Failed to retrieve input stream information");
        goto end;
    if ((ret = avformat_open_input( & amp;ifmt_ctx_a, in_filename_a, 0, 0)) < 0) {
        printf("Could not open input file.");
        goto end;
    if ((ret = avformat_find_stream_info(ifmt_ctx_a, 0)) < 0) {
        printf("Failed to retrieve input stream information");
        goto end;
    avformat_alloc_output_context2( & amp;ofmt_ctx, NULL, NULL, out_filename);
    if (!ofmt_ctx) {
        printf("Could not create output context\
        ret = AVERROR_UNKNOWN;
        goto end;
    ofmt = ofmt_ctx->oformat;
    for (i = 0; i < ifmt_ctx_v->nb_streams; i + + ) {
        //Create output AVStream according to input AVStream
        if (ifmt_ctx_v->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
            AVStream* out_stream = avformat_new_stream(ofmt_ctx, nullptr);
            videoindex_v = i;
            if (!out_stream) {
                printf("Failed allocating output stream\
                ret = AVERROR_UNKNOWN;
                goto end;
            videoindex_out = out_stream->index;
            //Copy the settings of AVCodecContext
            if (avcodec_parameters_copy(out_stream->codecpar, ifmt_ctx_v->streams[i]->codecpar) < 0) {
                printf("Failed to copy context from input to output stream codec context\
                goto end;
    for (i = 0; i < ifmt_ctx_a->nb_streams; i + + ) {
        //Create output AVStream according to input AVStream
        if (ifmt_ctx_a->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
            AVStream* out_stream = avformat_new_stream(ofmt_ctx, nullptr);
            audioindex_a = i;
            if (!out_stream) {
                printf("Failed allocating output stream\
                ret = AVERROR_UNKNOWN;
                goto end;
            audioindex_out = out_stream->index;
            //Copy the settings of AVCodecContext
            if (avcodec_parameters_copy(out_stream->codecpar, ifmt_ctx_a->streams[i]->codecpar) < 0) {
                printf("Failed to copy context from input to output stream codec context\
                goto end;
            out_stream->codecpar->codec_tag = 0;
            if (ofmt_ctx->oformat->flags & amp; AVFMT_GLOBALHEADER)
                ofmt_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
    /* open the output file, if needed */
    if (!(ofmt->flags & amp; AVFMT_NOFILE)) {
        if (avio_open( & amp;ofmt_ctx->pb, out_filename, AVIO_FLAG_WRITE)) {
            fprintf(stderr, "Could not open '%s': %d\
", out_filename,
            goto end;
    //Write file header
    if (avformat_write_header(ofmt_ctx, NULL) < 0) {
        fprintf(stderr, "Error occurred when opening output file: %d\
        goto end;
    //data input
    while (writing_v || writing_a)
        AVFormatContext* ifmt_ctx;
        int stream_index = 0;
        AVStream* in_stream, * out_stream;
        int av_type = 0;

        if (writing_v & amp; & amp;
            (!writing_a || av_compare_ts(cur_pts_v, ifmt_ctx_v->streams[videoindex_v]->time_base,
                cur_pts_a, ifmt_ctx_a->streams[audioindex_a]->time_base) <= 0))
            av_type = 0;
            ifmt_ctx = ifmt_ctx_v;
            stream_index = videoindex_out;
            if (av_read_frame(ifmt_ctx, & amp;pkt) >= 0)
                do {
                    in_stream = ifmt_ctx->streams[pkt.stream_index];
                    out_stream = ofmt_ctx->streams[stream_index];
                    if (pkt.stream_index == videoindex_v)
                        //FIX: No PTS (Example: Raw H.264)
                        //Simple Write PTS
                        if (pkt.pts == AV_NOPTS_VALUE)
                            //Write PTS
                            AVRational time_base1 = in_stream->time_base;
                            //Duration between 2 frames (us)
                            int64_t calc_duration = (double)AV_TIME_BASE / av_q2d(in_stream->r_frame_rate);
                            pkt.pts = (double)(frame_index * calc_duration) / (double)(av_q2d(time_base1) * AV_TIME_BASE);
                            pkt.dts = pkt.pts;
                            pkt.duration = (double)calc_duration / (double)(av_q2d(time_base1) * AV_TIME_BASE);
                            frame_index + + ;
                            printf("frame_index: %d\
", frame_index);
                        cur_pts_v = pkt.pts;
                } while
                    (av_read_frame(ifmt_ctx, & amp;pkt) >= 0);
                writing_v = 0;
            av_type = 1;
            ifmt_ctx = ifmt_ctx_a;
            stream_index = audioindex_out;

            if (av_read_frame(ifmt_ctx, & amp;pkt) >= 0)
                do {
                    in_stream = ifmt_ctx->streams[pkt.stream_index];
                    out_stream = ofmt_ctx->streams[stream_index];
                    if (pkt.stream_index == audioindex_a)
                        //FIX: No PTS
                        //Simple Write PTS
                        if (pkt.pts == AV_NOPTS_VALUE)
                            //Write PTS
                            AVRational time_base1 = in_stream->time_base;
                            //Duration between 2 frames (us)
                            int64_t calc_duration = (double)AV_TIME_BASE / av_q2d(in_stream->r_frame_rate);
                            pkt.pts = (double)(frame_index * calc_duration) /
                                (double)(av_q2d(time_base1) * AV_TIME_BASE);
                            pkt.dts = pkt.pts;
                            pkt.duration = (double)calc_duration / (double)(av_q2d(time_base1) * AV_TIME_BASE);
                            frame_index + + ;
                        cur_pts_a = pkt.pts;
                } while (av_read_frame(ifmt_ctx, & amp;pkt) >= 0);
                writing_a = 0;
        //Convert PTS/DTS
        pkt.pts = av_rescale_q_rnd(pkt.pts, in_stream->time_base, out_stream->time_base,
            (AVRounding)(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
        pkt.dts = av_rescale_q_rnd(pkt.dts, in_stream->time_base, out_stream->time_base,
            (AVRounding)(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
        pkt.duration = av_rescale_q(pkt.duration, in_stream->time_base, out_stream->time_base);
        pkt.pos = -1;
        pkt.stream_index = stream_index;
        printf("Write 1 Packet. type:%d, size:%d\tpts:%ld\
", av_type, pkt.size, pkt.pts);
        if (av_interleaved_write_frame(ofmt_ctx, & amp;pkt) < 0) {
            printf("Error muxing packet\
        av_packet_unref( & amp;pkt);
    printf("Write file trailer.\
    //Write file trailer
    avformat_close_input( & amp;ifmt_ctx_v);
    avformat_close_input( & amp;ifmt_ctx_a);
    /* close output */
    if (ofmt_ctx & amp; & amp; !(ofmt->flags & amp; AVFMT_NOFILE))
    if (ret < 0 & amp; & amp; ret != AVERROR_EOF) {
        printf("Error occurred.\
        return -1;
    return 0;


CROSS_COMPILE = aarch64-himix200-linux-


CFLAGS = -Wall -O2 -I../../source/mp4Lib/include
LIBS + = -L../../source/mp4Lib/lib -lpthread
LIBS += -lavformat -lavcodec -lavdevice -lavutil -lavfilter -lswscale -lswresample -lz

SRCS = $(wildcard *.cpp)
OBJS = $(SRCS:%.cpp=%.o)
DEPS = $(SRCS:%.cpp=%.d)
TARGET = mp4muxer


-include $(DEPS)

        $(CC) $(CFLAGS) -c -o $@ $<

        @set -e; rm -f $@; \
        $(CC) -MM $(CFLAGS) $< > $@.$$$$; \
        sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' < $@.$$$$ > $@; \
        rm -f $@.$$$$

        $(CC) -o $@ $^ $(LIBS)
        $(STRIP) $@


        rm -fr $(TARGET) $(OBJS) $(DEPS)

2. Use FFMPEG to separate 264 video and aac audio in mp4/flv files

Separation process

1. Use avformat_open_input function to open the file and initialize the structure AVFormatContext

2. Find whether audio and video information exists

3. Construct a h264_mp4toannexb bit stream filter to add header information to the video avpaket packet

4. Open 2 output files (audio, video)

5. Read the video file in a loop and write the audio and video to the file respectively

Note: Audio requires manual addition of header information, and there is no filter automatically added by AAC’s adts.


#include <stdio.h>
extern "C"
#include <libavformat/avformat.h>
/* Print the encoder supports this sampling rate and find the specified sampling rate subscript */
static int find_sample_rate_index(const AVCodec* codec, int sample_rate)
        const int* p = codec->supported_samplerates;
        int sample_rate_index = -1; //Supported resolution subscripts
        int count = 0;
        while (*p != 0) {// 0 as exit condition, such as aac_sample_rates of libfdk-aacenc.c
                printf("%s supports sampling rate: %dhz corresponding subscript: %d\
", codec->name, *p, count);
                if (*p == sample_rate)
                        sample_rate_index = count;
                p + + ;
                count + + ;
        return sample_rate_index;
/// <summary>
/// Add adts header to aac audio data
/// </summary>
/// <param name="header">adts array</param>
/// <param name="sample_rate">Sampling rate</param>
/// <param name="channals">Number of channels</param>
/// <param name="prfile">Audio encoder configuration file (FF_PROFILE_AAC_LOW defined in avcodec.h)</param>
/// <param name="len">Audio packet length</param>
void addHeader(char header[], int sample_rate, int channals, int prfile, int len)
        uint8_t sampleIndex = 0;
        switch (sample_rate) {
        case 96000: sampleIndex = 0; break;
        case 88200: sampleIndex = 1; break;
        case 64000: sampleIndex = 2; break;
        case 48000: sampleIndex = 3; break;
        case 44100: sampleIndex = 4; break;
        case 32000: sampleIndex = 5; break;
        case 24000: sampleIndex = 6; break;
        case 22050: sampleIndex = 7; break;
        case 16000: sampleIndex = 8; break;
        case 12000: sampleIndex = 9; break;
        case 11025: sampleIndex = 10; break;
        case 8000: sampleIndex = 11; break;
        case 7350: sampleIndex = 12; break;
        default: sampleIndex = 4; break;
        uint8_t audioType = 2; //AAC LC
        uint8_t channelConfig = 2; //Dual channel
        len + = 7;
        //0,1 are fixed
        header[0] = (uint8_t)0xff; //syncword:0xfff high 8bits
        header[1] = (uint8_t)0xf0; //syncword:0xfff low 4bits
        header[1] |= (0 << 3); //MPEG Version:0 for MPEG-4,1 for MPEG-2 1bit
        header[1] |= (0 << 1); //Layer:0 2bits
        header[1] |= 1; //protection absent:1 1bit
        //Configure according to aac type, sampling rate, and number of channels
        header[2] = (audioType - 1) << 6; //profile:audio_object_type - 1 2bits
        header[2] |= (sampleIndex & amp; 0x0f) << 2; //sampling frequency index:sampling_frequency_index 4bits
        header[2] |= (0 << 1); //private bit:0 1bit
        header[2] |= (channelConfig & amp; 0x04) >> 2; //channel configuration:channel_config high 1bit
        //Configure according to the number of channels + data length
        header[3] = (channelConfig & amp; 0x03) << 6; //channel configuration:channel_config lower 2bits
        header[3] |= (0 << 5); //original: 0 1bit
        header[3] |= (0 << 4); //home: 0 1bit
        header[3] |= (0 << 3); //copyright id bit: 0 1bit
        header[3] |= (0 << 2); //copyright id start: 0 1bit
        header[3] |= ((len & amp; 0x1800) >> 11); //frame length: value 2bits high
        //Configure according to data length
        header[4] = (uint8_t)((len & amp; 0x7f8) >> 3); //frame length:value middle 8bits
        header[5] = (uint8_t)((len & amp; 0x7) << 5); //frame length:value lower 3bits
        header[5] |= (uint8_t)0x1f; //buffer fullness:0x7ff high 5bits
        header[6] = (uint8_t)0xfc;
int main() {
        AVFormatContext* ifmt_ctx = NULL;
        AVPacket pkt;
        int ret;
    unsigned int i;
        int videoindex = -1, audioindex = -1;
        const char* in_filename = "test.mp4";
        const char* out_filename_v = "test1.h264";
        const char* out_filename_a = "test1.aac";
        if ((ret = avformat_open_input( & amp;ifmt_ctx, in_filename, 0, 0)) < 0) {
                printf("Could not open input file.");
                return -1;
        if ((ret = avformat_find_stream_info(ifmt_ctx, 0)) < 0) {
                printf("Failed to retrieve input stream information");
                return -1;
        videoindex = -1;
        for (i = 0; i < ifmt_ctx->nb_streams; i + + ) { //nb_streams: the number of video and audio streams
                if (ifmt_ctx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO)
                        videoindex = i;
                else if (ifmt_ctx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO)
                        audioindex = i;
Input Video============================\
        av_dump_format(ifmt_ctx, 0, in_filename, 0); // Print information
        FILE* fp_audio = fopen(out_filename_a, "wb + ");
        FILE* fp_video = fopen(out_filename_v, "wb + ");
        AVBSFContext* bsf_ctx = NULL;
        const AVBitStreamFilter* pfilter = av_bsf_get_by_name("h264_mp4toannexb");
        if (pfilter == NULL) {
                printf("Get bsf failed!\
        if ((ret = av_bsf_alloc(pfilter, & amp;bsf_ctx)) != 0) {
                printf("Alloc bsf failed!\
        ret = avcodec_parameters_copy(bsf_ctx->par_in, ifmt_ctx->streams[videoindex]->codecpar);
        if (ret < 0) {
                printf("Set Codec failed!\
        ret = av_bsf_init(bsf_ctx);
        if (ret < 0) {
                printf("Init bsf failed!\
        //Here, traverse the audio encoder to print the supported sampling rates, and find the table below where the current audio sampling rate is located, which is used to add the adts header later.
    //This program is not used, it is just a test. If you want the robustness of the program, you can use this method.
        const AVCodec* codec = nullptr;
        codec = avcodec_find_encoder(ifmt_ctx->streams[audioindex]->codecpar->codec_id);
        int sample_rate_index = find_sample_rate_index(codec, ifmt_ctx->streams[audioindex]->codecpar->sample_rate);
        printf("Resolution array table below: %d\
", sample_rate_index);
        while (av_read_frame(ifmt_ctx, & amp;pkt) >= 0) {
                if (pkt.stream_index == videoindex) {
                        av_bsf_send_packet(bsf_ctx, & amp;pkt);
                                ret = av_bsf_receive_packet(bsf_ctx, & amp;pkt);
                                if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
                                else if (ret < 0) {
                                        printf("Receive Pkt failed!\
                                printf("Write Video Packet. size:%d\tpts:%ld\
", pkt.size, pkt.pts);
                                fwrite(pkt.data, 1, pkt.size, fp_video);

                else if (pkt.stream_index == audioindex) {
                        printf("Write Audio Packet. size:%d\tpts:%ld\
", pkt.size, pkt.pts);
                        char adts[7] = { 0 };
                        addHeader(adts, ifmt_ctx->streams[audioindex]->codecpar->sample_rate,
                        fwrite(adts, 1, 7, fp_audio);
                        fwrite(pkt.data, 1, pkt.size, fp_audio);
                av_packet_unref( & amp;pkt);
        av_bsf_free( & amp;bsf_ctx);
        avformat_close_input( & amp;ifmt_ctx);
        return 0;
        if (ifmt_ctx)
                avformat_close_input( & amp;ifmt_ctx);
        if (fp_video)
        if (bsf_ctx)
                av_bsf_free( & amp;bsf_ctx);
        return -1;


CROSS_COMPILE = aarch64-himix200-linux-


CFLAGS = -Wall -O2 -I../../source/mp4Lib/include
LIBS + = -L../../source/mp4Lib/lib -lpthread
LIBS + = -lavformat -lavcodec -lavdevice -lavutil -lavfilter -lswscale -lswresample -lz

SRCS = $(wildcard *.cpp)
OBJS = $(SRCS:%.cpp=%.o)
DEPS = $(SRCS:%.cpp=%.d)
TARGET = mp4demuxer


-include $(DEPS)

        $(CC) $(CFLAGS) -c -o $@ $<

        @set -e; rm -f $@; \
        $(CC) -MM $(CFLAGS) $< > $@.$$$$; \
        sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' < $@.$$$$ > $@; \
        rm -f $@.$$$$

        $(CC) -o $@ $^ $(LIBS)
        $(STRIP) $@


        rm -fr $(TARGET) $(OBJS) $(DEPS)

The knowledge points of the article match the official knowledge files, and you can further learn relevant knowledge. CS entry skill treeLinux entryFirst introduction to Linux 38064 people are learning the system