FFmpeg+SDL2 implements a simple audio and video synchronization player

Threading model

This is the thread model of a simple player implemented. Through this picture and combined with the content we learned in our previous blog, we can basically understand the overall running process of the player. The specific code is also implemented based on this picture.

Important structures

VideoState

The most important structure in the entire player, demultiplexing, decoding, audio and video synchronization, and rendering related parameters are all in this structure, which runs through the entire playback process.

typedef struct VideoState {
    
    char filename[1024]; // file name
    AVFormatContext *pFormatCtx; //Context
    int videoStream, audioStream; //audio and video stream index
 
 
     Synchronization related
    double audio_clock;
    double frame_timer;
    double frame_last_pts;
    double frame_last_delay;
 
    double video_clock;
    double video_current_pts;
    int64_t video_current_pts_time;
 
    //audio related
    AVStream *audio_st; // audio stream
    AVCodecContext *audio_ctx; // Audio decoding context
    PacketQueue audioq; // audio queue
    uint8_t audio_buf[(MAX_AUDIO_FRAME_SIZE * 3) / 2]; // Audio cache
    unsigned int audio_buf_size;
    unsigned int audio_buf_index;
    AVFrame audio_frame; // audio frame
    AVPacket audio_pkt; // audio packet
    uint8_t *audio_pkt_data;
    int audio_pkt_size;
    struct SwrContext *audio_swr_ctx; // Audio resampling
 
 
    //video
    AVStream *video_st; // video stream
    AVCodecContext *video_ctx; //Video stream decoding context
    PacketQueue videoq; // Video stream queue
 
 
    VideoPicture pictq[VIDEO_PICTURE_QUEUE_SIZE]; //Decoded video frame array
    int pictq_size, pictq_rindex, pictq_windex;
    SDL_mutex *pictq_mutex;
    SDL_cond *pictq_cond;
 
    SDL_Thread *parse_tid; // Demultiplex thread
    SDL_Thread *video_tid;//Video decoding thread
 
    int quit; //Exit flag bit
} VideoState;

PacketQueue

 Audio and video packet storage queue after demultiplexing
typedef struct PacketQueue {
    AVPacketList *first_pkt, *last_pkt;
    int nb_packets;
    int size;
    SDL_mutex *mutex;
    SDL_cond *cond;
}PacketQueue;

PacketQueue

 Audio and video packet storage queue after demultiplexing
typedef struct PacketQueue {
    AVPacketList *first_pkt, *last_pkt;
    int nb_packets;
    int size;
    SDL_mutex *mutex;
    SDL_cond *cond;
}PacketQueue;

VideoPicture

Decoded video frame
typedef struct VideoPicture {
    AVFrame *frame;
    int width, height;
    double pts; // The time the video frame should be played after audio and video synchronization
} VideoPicture;

Specific code

Main

  1. initialization
  2. Create a timer to schedule the refresh of video frames
  3. Create demultiplexing thread
  4. Wait for event
int WinMain(int argc, char *argv[]) {
    char *file = "C:\Users\lenovo\Desktop\IMG_5950.mp4";
    SDL_Event event;
    VideoState *is;
    is = av_mallocz(sizeof(VideoState));
 
    if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_TIMER)) {
        fprintf(stderr, "Could not initialize SDL - %s\
", SDL_GetError());
        exit(1);
    }
    //Create SDL Window
    win = SDL_CreateWindow("Media Player",
                           100,
                           100,
                           640, 480,
                           SDL_WINDOW_RESIZABLE);
    if (!win) {
        fprintf(stderr, "SDL_CreateWindow error, exit!", SDL_GetError());
        exit(1);
    }
 
    renderer = SDL_CreateRenderer(win, -1, 0);
    text_mutex = SDL_CreateMutex();
    
    strlcpy(is->filename, file, sizeof(is->filename));
    is->pictq_mutex = SDL_CreateMutex();
    is->pictq_cond = SDL_CreateCond();
 
    // Scheduled refresher, mainly used to control video refresh
    schedule_refresh(is, 40);
 
    //Create demultiplexing thread
    is->parse_tid = SDL_CreateThread(demux_thread, "demux_thread", is);
    if (!is->parse_tid) {
        av_free(is);
        return -1;
    }
 
 
    for (;;) {
        // Wait for SDL events, otherwise block
        SDL_WaitEvent( & amp;event);
        switch (event.type) {
            case FF_QUIT_EVENT:
            case SDL_QUIT: //Exit
                is->quit = 1;
                goto Destroy;
            case SDL_KEYDOWN:// ESC to exit
                if (event.key.keysym.sym == SDLK_ESCAPE) {
                    is->quit = 1;
                    goto Destroy;
                }
                break;
            case FF_REFRESH_EVENT: // timer refresh event
                video_refresh_timer(event.user.data1);
                break;
            default:
                break;
        }
    }
 
    // quit
    Destroy:
    SDL_Quit();
    return 0;
 
}

Demultiplexing

  1. open a file
  2. Find audio and video streams
  3. Open audio and video streams, create video decoding threads, and prepare for decoding
  4. Read the packet, put the audio and video packets into the queue respectively, and wait for the decoding thread to take them out.
int demux_thread(void *arg) {
    
    if ((err_code = avformat_open_input( & amp;pFormatCtx, is->filename, NULL, NULL)) < 0) {
        av_strerror(err_code, errors, 1024);
        return -1;
    }
 
    // Find the first video stream
    for (i = 0; i < pFormatCtx->nb_streams; i + + ) {
        if (pFormatCtx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO & amp; & amp;
            video_index < 0) {
            video_index = i;
        }
        if (pFormatCtx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO & amp; & amp;
            audio_index < 0) {
            audio_index = i;
        }
    }
    //Open the audio stream, create a decoder, and configure parameters
    if (audio_index >= 0) {
        stream_component_open(is, audio_index);
    }
    //Open the video stream, create a decoder, and create a decoding thread
    if (video_index >= 0) {
        stream_component_open(is, video_index);
        // video_tid = SDL_CreateThread(decode_video_thread, "decode_video_thread", is);
    }
 
    for (;;) {
        if (av_read_frame(is->pFormatCtx, packet) < 0) {
            if (is->pFormatCtx->pb->error == 0) {
                SDL_Delay(100); /* no error; wait for user input */
                continue;
            } else {
                break;
            }
        }
        // Store the packet in the queue
        if (packet->stream_index == is->videoStream) {
            packet_queue_put( & amp;is->videoq, packet);
        } else if (packet->stream_index == is->audioStream) {
            packet_queue_put( & amp;is->audioq, packet);
        } else {
            av_packet_unref(packet);
        }
    }
    return 0;
}

Video decoding

  1. Get the video packet from the queue
  2. decode, synchronize
  3. After encoding, the Frame is stored in the array and waits for video rendering.
 Video decoding
int decode_video_thread(void *arg) {
    VideoState *is = (VideoState *) arg;
    AVPacket pkt1, *packet = & amp;pkt1;
    AVFrame *pFrame;
    double pts;
 
    pFrame = av_frame_alloc();
 
    for (;;) {
        // Get the packet from the video queue
        if (packet_queue_get( & amp;is->videoq, packet, 1) < 0) {
            break;
        }
        
        // decode
        avcodec_send_packet(is->video_ctx, packet);
        while (avcodec_receive_frame(is->video_ctx, pFrame) == 0) {
            if ((pts = pFrame->best_effort_timestamp) != AV_NOPTS_VALUE) {
            } else {
                pts = 0;
            }
            pts *= av_q2d(is->video_st->time_base);
 
            // Synchronize
            pts = synchronize_video(is, pFrame, pts);
            if (queue_picture(is, pFrame, pts) < 0) {
                break;
            }
            av_packet_unref(packet);
        }
    }
    av_frame_free( & amp;pFrame);
    return 0;
}

Audio decoding

Audio device callback
void audio_callback(void *userdata, Uint8 *stream, int len) {
 
    VideoState *is = (VideoState *) userdata;
    int len1, audio_size;
    double pts;
 
    SDL_memset(stream, 0, len);
 
    while (len > 0) {
        if (is->audio_buf_index >= is->audio_buf_size) {
            //audio decoding
            audio_size = audio_decode_frame(is, is->audio_buf, sizeof(is->audio_buf), & amp;pts);
            if (audio_size < 0) {
                // Audio decoding error, playback muted
                is->audio_buf_size = 1024 * 2 * 2;
                memset(is->audio_buf, 0, is->audio_buf_size);
            } else {
                is->audio_buf_size = audio_size;
            }
            is->audio_buf_index = 0;
        }
        len1 = is->audio_buf_size - is->audio_buf_index;
        if (len1 > len)
            len1 = len;
        // mix playback
        SDL_MixAudio(stream, (uint8_t *) is->audio_buf + is->audio_buf_index, len1, SDL_MIX_MAXVOLUME);
        len -= len1;
        stream + = len1;
        is->audio_buf_index + = len1;
    }
}

Video refresh playback

 The video refreshes and plays, predicts the playback time of the next frame, and sets a new timer
void video_refresh_timer(void *userdata) {
 
    VideoState *is = (VideoState *) userdata;
    VideoPicture *vp;
    double actual_delay, delay, sync_threshold, ref_clock, diff;
 
    if (is->video_st) {
        if (is->pictq_size == 0) {
            schedule_refresh(is, 1);
        } else {
            // Get a video frame from the array
            vp = & amp;is->pictq[is->pictq_rindex];
 
            is->video_current_pts = vp->pts;
            is->video_current_pts_time = av_gettime();
            // Subtract the time of the previous frame from the current Frame time to obtain the time difference between the two frames.
            delay = vp->pts - is->frame_last_pts;
            if (delay <= 0 || delay >= 1.0) {
                // Delay less than 0 or greater than 1 second (too long) is wrong, set the delay time to the last delay time
                delay = is->frame_last_delay;
            }
            //Save the delay and PTS and wait for next use
            is->frame_last_delay = delay;
            is->frame_last_pts = vp->pts;
 
            // Get audio Audio_Clock
            ref_clock = get_audio_clock(is);
            // Get the difference between current PTS and Audio_Clock
            diff = vp->pts - ref_clock;
 
            // AV_SYNC_THRESHOLD minimum refresh time
            sync_threshold = (delay > AV_SYNC_THRESHOLD) ? delay : AV_SYNC_THRESHOLD;
            // diff is less than the asynchronous threshold, synchronization can be performed
            if (fabs(diff) < AV_NOSYNC_THRESHOLD) {
                if (diff <= -sync_threshold) {
                    // The video time is before the audio time, so the video should be played as soon as possible
                    delay = 0;
                } else if (diff >= sync_threshold) {
                     // The video time is after the audio time, so the video should be played with delay
                    delay = 2 * delay;
                }
            }
            is->frame_timer + = delay;
            //The final actual delay time
            actual_delay = is->frame_timer - (av_gettime() / 1000000.0);
            if (actual_delay < 0.010) {
                // If the delay time is too small, set the minimum value
                actual_delay = 0.010;
            }
            //Reset the timer according to the delay time and refresh the video
            schedule_refresh(is, (int) (actual_delay * 1000 + 0.5));
 
            //Video frame display
            video_display(is);
 
            //Update video frame array index
            if ( + + is->pictq_rindex == VIDEO_PICTURE_QUEUE_SIZE) {
                is->pictq_rindex = 0;
            }
            SDL_LockMutex(is->pictq_mutex);
            // Decrement the video frame array by one
            is->pictq_size--;
            SDL_CondSignal(is->pictq_cond);
            SDL_UnlockMutex(is->pictq_mutex);
        }
    } else {
        schedule_refresh(is, 100);
    }
}

The general process is like this. Compared with the previous Demo, the complexity will be much higher, but all the knowledge has been covered in the previous blog. There is nothing that can be explained in the blog. It is better to run it yourself and then look at the code. It will be better, clarify the process, and the entire player code will not be difficult to understand. Here is a simple audio and video synchronization player implemented by FFmpeg + SDL2.

Original text FFmpeg + SDL2 implements a simple audio and video synchronization player – Nuggets

On the business card at the end of the article, you can get free audio and video development learning materials, including (FFmpeg, webRTC, rtmp, hls, rtsp, ffplay, srs) and audio and video learning roadmap, etc.

See below! ↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓