C++ ffmpeg+SDL audio and video playback encapsulation class

1. Include the necessary header files.

#define SDL_MAIN_HANDLED //Avoid main redefinition file

extern "C" {
#include "./ffmpeg/include/libavformat/avformat.h"
#include "./ffmpeg/include/libswscale/swscale.h"
#include "./ffmpeg/include/libavcodec/avcodec.h"
#include "./ffmpeg/include/libavcodec/codec.h"
#include "./ffmpeg/include/libavutil/avutil.h"
#include "./ffmpeg/include/libavutil/time.h"
#include "./ffmpeg/include/libavutil/frame.h"
#include "./ffmpeg/include/libavutil/opt.h"
#include "./ffmpeg/include/libavutil/samplefmt.h"
#include "./ffmpeg/include/libswresample/swresample.h"

#include "./sdl/include/SDL.h"
}

#pragma comment(lib,"./ffmpeg/lib/x64/debug/avcodec.lib")
#pragma comment(lib,"./ffmpeg/lib/x64/debug/avdevice.lib")
#pragma comment(lib,"./ffmpeg/lib/x64/debug/avfilter.lib")
#pragma comment(lib,"./ffmpeg/lib/x64/debug/avformat.lib")
#pragma comment(lib,"./ffmpeg/lib/x64/debug/avutil.lib")
#pragma comment(lib,"./ffmpeg/lib/x64/debug/swresample.lib")
#pragma comment(lib,"./ffmpeg/lib/x64/debug/swscale.lib")

#pragma comment(lib, "Mfuuid.lib") //IID_IMFMediaEventGenerator
#pragma comment(lib, "Strmiids.lib")//IID_ICodecAPI
#pragma comment(lib, "Bcrypt.lib") //BCryptOpenAlgorithmProvider
#pragma comment(lib, "Secur32.lib")//AcquireCredentialsHandleA
#pragma comment(lib, "ws2_32.lib") //Network library

#pragma comment(lib,"./sdl/lib/x64/SDL2.lib")

2. Class encapsulation.

#pragma once

#include <iostream>
#include <thread>
#include <mutex>

#include <queue>
#include <objbase.h>
#include <condition_variable>

#define SDL_MAIN_HANDLED //Avoid main redefinition file

extern "C" {
#include "./ffmpeg/include/libavformat/avformat.h"
#include "./ffmpeg/include/libswscale/swscale.h"
#include "./ffmpeg/include/libavcodec/avcodec.h"
#include "./ffmpeg/include/libavcodec/codec.h"
#include "./ffmpeg/include/libavutil/avutil.h"
#include "./ffmpeg/include/libavutil/time.h"
#include "./ffmpeg/include/libavutil/frame.h"
#include "./ffmpeg/include/libavutil/opt.h"
#include "./ffmpeg/include/libavutil/samplefmt.h"
#include "./ffmpeg/include/libswresample/swresample.h"

#include "./sdl/include/SDL.h"
}

#pragma comment(lib,"./ffmpeg/lib/x64/debug/avcodec.lib")
#pragma comment(lib,"./ffmpeg/lib/x64/debug/avdevice.lib")
#pragma comment(lib,"./ffmpeg/lib/x64/debug/avfilter.lib")
#pragma comment(lib,"./ffmpeg/lib/x64/debug/avformat.lib")
#pragma comment(lib,"./ffmpeg/lib/x64/debug/avutil.lib")
#pragma comment(lib,"./ffmpeg/lib/x64/debug/swresample.lib")
#pragma comment(lib,"./ffmpeg/lib/x64/debug/swscale.lib")

#pragma comment(lib, "Mfuuid.lib") //IID_IMFMediaEventGenerator
#pragma comment(lib, "Strmiids.lib")//IID_ICodecAPI
#pragma comment(lib, "Bcrypt.lib") //BCryptOpenAlgorithmProvider
#pragma comment(lib, "Secur32.lib")//AcquireCredentialsHandleA
#pragma comment(lib, "ws2_32.lib") //Network library

#pragma comment(lib,"./sdl/lib/x64/SDL2.lib")

class FFPlayer {
public:
FFPlayer() : quit(false), audioStreamIndex(-1), videoStreamIndex(-1) {
SDL_Init(SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_TIMER);
}
~FFPlayer() {
//SDL_Quit();
}

bool open(const std::string & filename) {
// Initialize the FFmpeg library
avformat_network_init();

// open the audio and video file
formatCtx = avformat_alloc_context();//The interrupt callback needs to be set here, so you need to apply first
formatCtx->interrupt_callback.callback = &FFPlayer::decode_interrupt_cb;
formatCtx->interrupt_callback. opaque = this;
if (avformat_open_input( & formatCtx, filename. c_str(), nullptr, nullptr) != 0)
return false;

if (avformat_find_stream_info(formatCtx, nullptr) < 0)
return false;

// Find audio and video stream indices
for (int i = 0; i < formatCtx->nb_streams; i ++ ) {
if (formatCtx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO & amp; & amp; audioStreamIndex == -1) {
audioStreamIndex = i;
}
else if (formatCtx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO & amp; & amp; videoStreamIndex == -1) {
videoStreamIndex = i;
}
}

if (audioStreamIndex == -1 || videoStreamIndex == -1)
return false;

// Create an audio decoder context
audioCodecCtx = avcodec_alloc_context3(nullptr);
avcodec_parameters_to_context(audioCodecCtx, formatCtx->streams[audioStreamIndex]->codecpar);

// Create a video decoder context
videoCodecCtx = avcodec_alloc_context3(nullptr);
avcodec_parameters_to_context(videoCodecCtx, formatCtx->streams[videoStreamIndex]->codecpar);

// open the audio codec
const AVCodec* audioCodec = avcodec_find_decoder(audioCodecCtx->codec_id);
if (audioCodec == nullptr || avcodec_open2(audioCodecCtx, audioCodec, nullptr) < 0)
return false;

// open the video codec
const AVCodec* videoCodec = avcodec_find_decoder(videoCodecCtx->codec_id);
if (videoCodec == nullptr || avcodec_open2(videoCodecCtx, videoCodec, nullptr) < 0)
return false;

// Initialize audio related parameters
audioFrameQueue = std::queue<AVFrame*>(); // audio frame queue

// Initialize video related parameters
videoFrameQueue = std::queue<AVFrame*>(); // video frame queue
videoBaseTime = 0;

return true;
}

void play() {
// Create audio and video playback threads
std::thread audioThread( &FFPlayer::audioPlayer, this);
std::thread videoThread( &FFPlayer::videoPlayer, this);

// Loop to read audio and video frames and decode them
AVPacket packet;
int64_t startTime = av_gettime();
while (!quit & amp; & amp; av_read_frame(formatCtx, & amp;packet) >= 0) {
if (packet. stream_index == audioStreamIndex) {
// audio stream processing
decodeAudioPacket(packet);

//Delay (otherwise the file will be played immediately)
AVRational timeBase = { 1, AV_TIME_BASE };
int64_t ptsTime = av_rescale_q(packet.dts, formatCtx->streams[audioStreamIndex]->time_base, timeBase);
int64_t nowTime = av_gettime() - startTime;
if (ptsTime > nowTime) {
unsigned usec = ptsTime - nowTime;
av_usleep(usec);
}
}
else if (packet. stream_index == videoStreamIndex) {
// video stream processing
decodeVideoPacket(packet);

//Delay (otherwise the file will be played immediately)
AVRational timeBase = { 1, AV_TIME_BASE };
int64_t ptsTime = av_rescale_q(packet.dts, formatCtx->streams[videoStreamIndex]->time_base, timeBase);
int64_t nowTime = av_gettime() - startTime;
if (ptsTime > nowTime) {
unsigned usec = ptsTime - nowTime;
av_usleep(usec);
}
}

av_packet_unref( & amp;packet);
}

stop();

// Wait for the audio and video playback threads to end
audioThread. join();
videoThread. join();

// Release resources
avcodec_free_context( & audioCodecCtx);
avcodec_free_context( & videoCodecCtx);
avformat_close_input( & formatCtx);
avformat_free_context(formatCtx);
}

void stop() {
quit = true;
audioFrameCond. notify_one();
videoFrameCond. notify_one();
}

private:
AVFormatContext* formatCtx;
AVCodecContext* audioCodecCtx;
AVCodecContext* videoCodecCtx;
int audioStreamIndex;
int videoStreamIndex;

std::queue<AVFrame*> audioFrameQueue; // audio frame queue
std::mutex audioFrameMutex; // audio frame lock
std::condition_variable audioFrameCond; // audio frame condition variable

std::queue<AVFrame*> videoFrameQueue; // video frame queue
std::mutex videoFrameMutex; // video frame lock
std::condition_variable videoFrameCond; // video frame condition variable

std::queue<AVFrame*> audioPcmFrameQueue; // PCM audio frame queue
std::mutex audioPcmFrameMutex; // PCM audio frame lock
std::condition_variable audioPcmFrameCond; // PCM audio frame condition variable

int64_t videoBaseTime;
bool quit;

void decodeAudioPacket(AVPacket packet) {
int ret = avcodec_send_packet(audioCodecCtx, &packet);
while (ret >= 0) {
AVFrame* frame = av_frame_alloc();
ret = avcodec_receive_frame(audioCodecCtx, frame);
if (ret == AVERROR_EOF || ret == AVERROR(EAGAIN)) {
av_frame_free( & amp; frame);
break;
}

// process audio frames
std::unique_lock<std::mutex> lock(audioFrameMutex);
audioFrameQueue. push(frame);
lock. unlock();
audioFrameCond. notify_one();
}
}

void decodeVideoPacket(AVPacket packet) {
int ret = avcodec_send_packet(videoCodecCtx, &packet);
static int frame_index = 1;
while (ret >= 0) {
AVFrame* frame = av_frame_alloc();
ret = avcodec_receive_frame(videoCodecCtx, frame);
if (ret == AVERROR_EOF || ret == AVERROR(EAGAIN)) {
av_frame_free( & amp; frame);
break;
}

// Calculate the display timestamp of the video frame
double timestamp = frame->best_effort_timestamp * videoBaseTime;

frame->linesize[7] = frame_index++;
// process video frame
std::unique_lock<std::mutex> lock(videoFrameMutex);
videoFrameQueue. push(frame);
lock. unlock();
videoFrameCond. notify_one();
}
}

void audioPlayer() {
CoInitialize(NULL);// Otherwise SDL_OpenAudio fails
//SDL_Init(SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_TIMER);

SDL_AudioSpec desiredSpec, obtainedSpec;
SDL_zero(desiredSpec);
 desiredSpec.freq = audioCodecCtx->sample_rate;// audio sampling rate 44100
 desiredSpec.format = AUDIO_S16SYS;// audio format
 desiredSpec.channels = audioCodecCtx->ch_layout.nb_channels;// Number of channels
 desiredSpec. silence = 0;
 desiredSpec.samples = FFMAX(512, 2 << av_log2(desiredSpec.freq / 30)); //audioCodecCtx->frame_size;// The number of samples per audio buffer
desiredSpec.callback = nullptr;// nullptr;// audioCallback;// If the callback is empty, call SDL_QueueAudio to write audio data
desiredSpec. userdata = nullptr; // nullptr; // this;
\t\t
SDL_AudioDeviceID audioDevice = 0;
if (desiredSpec. callback) {
if (SDL_OpenAudio( &desiredSpec, &obtainedSpec) < 0) {
std::cerr << "Failed to open audio device: " << SDL_GetError() << std::endl;
return;
}

// start playing audio
SDL_PauseAudio(0);
}
else {
audioDevice = SDL_OpenAudioDevice(nullptr, 0, &desiredSpec, &obtainedSpec, SDL_AUDIO_ALLOW_FREQUENCY_CHANGE);
if (audioDevice < 2) {
std::cerr << "Failed to open audio device: " << SDL_GetError() << std::endl;
return;
}

// start playing audio
SDL_PauseAudioDevice(audioDevice, 0);
}

int in_sample_rate = audioCodecCtx->sample_rate;
AVSampleFormat in_sample_fmt = audioCodecCtx->sample_fmt; // Input data is interleaved, not plannar
int in_channel_layout = AV_CH_LAYOUT_STEREO;
int in_nb_samples = 8192;

int out_sample_rate = desiredSpec.freq;
AVSampleFormat out_sample_fmt = AV_SAMPLE_FMT_S16;
int out_channel_layout = AV_CH_LAYOUT_STEREO;
int out_nb_samples = av_rescale_rnd(in_nb_samples, out_sample_rate, in_sample_rate, AV_ROUND_UP);
switch (desiredSpec. format)
{
case AUDIO_S16SYS:
out_sample_fmt = AV_SAMPLE_FMT_S16;
break;
case AUDIO_S32SYS:
out_sample_fmt = AV_SAMPLE_FMT_S32;
break;
case AUDIO_F32SYS:
out_sample_fmt = AV_SAMPLE_FMT_FLT;
break;
default:
printf("audio device format was not supported!\
");
break;
}

// Create a resampling context and set parameters
SwrContext *swrCtx = swr_alloc();
av_opt_set_int(swrCtx, "in_channel_layout", in_channel_layout, 0);
av_opt_set_int(swrCtx, "in_sample_rate", in_sample_rate, 0);
av_opt_set_sample_fmt(swrCtx, "in_sample_fmt", in_sample_fmt, 0);
av_opt_set_int(swrCtx, "out_channel_layout", out_channel_layout, 0); // target audio format is stereo
av_opt_set_int(swrCtx, "out_sample_rate", out_sample_rate, 0); // The target audio sampling rate is 44.1kHz
av_opt_set_sample_fmt(swrCtx, "out_sample_fmt", out_sample_fmt, 0); // The target audio sample format is a 16-bit signed integer
swr_init(swrCtx);

while (!quit) {
// Check if the video frame queue is empty
std::unique_lock<std::mutex> lock(audioFrameMutex);
if (audioFrameQueue. empty()) {
// If the video frame queue is empty, wait for new video frames to arrive
audioFrameCond.wait(lock, [ & amp;] { return !audioFrameQueue.empty() || quit; });
}

if (!audioFrameQueue. empty()) {
// Get a frame of video data from the audio frame queue
AVFrame* frame = audioFrameQueue. front();
audioFrameQueue. pop();
lock. unlock();
\t\t\t\t
int dst_nb_samples = av_rescale_rnd(
swr_get_delay(swrCtx, in_sample_rate) + in_nb_samples, // in_sample_rate in_nb_samples
out_sample_rate,
in_sample_rate,
AV_ROUND_UP);

AVFrame* pcmFrame = av_frame_alloc();
av_samples_alloc(pcmFrame->data, pcmFrame->linesize, audioCodecCtx->ch_layout.nb_channels, out_nb_samples, out_sample_fmt, 1);
if (dst_nb_samples > out_nb_samples) {
av_frame_unref(pcmFrame);
out_nb_samples = dst_nb_samples;
av_samples_alloc(pcmFrame->data, pcmFrame->linesize, audioCodecCtx->ch_layout.nb_channels, out_nb_samples, out_sample_fmt, 1);
}

int out_samples = swr_convert(swrCtx,
&pcmFrame->data[0], out_nb_samples,
(const uint8_t**)frame->data,
frame->nb_samples);
pcmFrame->nb_samples = out_samples;

if (desiredSpec. callback) {
// process audio frames
std::unique_lock<std::mutex> lock(audioPcmFrameMutex);
audioPcmFrameQueue.push(pcmFrame);
lock. unlock();
audioPcmFrameCond. notify_one();
}
else {
// Process the audio frame and send the data to the audio device for playback
//int out_sample_buffer_size = av_samples_get_buffer_size(0, audioCodecCtx->ch_layout.nb_channels, out_samples, out_sample_fmt, 1);
\t\t\t\t\t//or
int out_sample_buffer_size = pcmFrame->nb_samples * audioCodecCtx->ch_layout.nb_channels *
av_get_bytes_per_sample(static_cast<AVSampleFormat>(out_sample_fmt));

//Delay, according to the data length, -1 is to prevent slow writing
//SDL_Delay((out_sample_buffer_size) * 1000.0 / (desiredSpec.freq * av_get_bytes_per_sample(out_sample_fmt) * desiredSpec.channels) - 1);

\t\t\t\t\tdata input
if (0 != SDL_QueueAudio(audioDevice, pcmFrame->data[0], out_sample_buffer_size)) {
printf("SDL_QueueAudio error\
");
}
\t\t\t\t\t
av_freep( & amp;pcmFrame->data[0]);//After calling av_samples_alloc, remember to use av_freep to release memory data, otherwise memory leaks
av_frame_free( &pcmFrame);
}

av_frame_free( & amp; frame);
}

SDL_Event event;
if (SDL_PollEvent( & amp; event)) {
if (event.type == SDL_QUIT) {
quit = true; // window close event, exit the loop
}
}
}

{
std::unique_lock<std::mutex> lock(audioFrameMutex);
while (!audioFrameQueue. empty()) {
AVFrame* frame = audioFrameQueue. front();
audioFrameQueue. pop();
av_frame_free( & amp; frame);
}
lock. unlock();
}

{
std::unique_lock<std::mutex> lock(audioPcmFrameMutex);
while (!audioPcmFrameQueue.empty()) {
AVFrame* frame = audioPcmFrameQueue. front();
audioPcmFrameQueue. pop();
av_freep( & amp;frame->data[0]);//After calling av_samples_alloc, remember to use av_freep to release memory data, otherwise memory leaks
av_frame_free( & amp; frame);
}
lock. unlock();
}

// Release resources
swr_free( &swrCtx);

// close the audio device
if (desiredSpec. callback)
SDL_CloseAudio();
else
SDL_CloseAudioDevice(audioDevice);

CoUninitialize();
}

static int decode_interrupt_cb(void *userdata)
{
FFPlayer* player = static_cast<FFPlayer*>(userdata);
return player->quit;
}

static void audioCallback(void* userdata, Uint8* stream, int len) {
FFPlayer* player = static_cast<FFPlayer*>(userdata);
player->fillAudioBuffer(stream, len);
}

void fillAudioBuffer(Uint8* stream, int len) {
// Calculate the number of samples that need to be filled
int numSamples = len / (audioCodecCtx->ch_layout.nb_channels * av_get_bytes_per_sample(AV_SAMPLE_FMT_S16));
int copy_sample_length = len;

// Create an audio frame buffer for storing decoded audio data
AVFrame* audioFrame = av_frame_alloc();
uint8_t* audioBuffer = new uint8_t[len];
Uint8* current_buffer = audioBuffer;

while (copy_sample_length > 0) {
// Check if the audio frame queue is empty
std::unique_lock<std::mutex> lock(audioPcmFrameMutex);
if (audioPcmFrameQueue. empty()) {
// If the audio frame queue is empty, wait for a new audio frame to arrive
audioPcmFrameCond.wait(lock, [ & amp;] { return !audioPcmFrameQueue.empty() || quit; });
}

if (!audioPcmFrameQueue. empty()) {
// Get a frame of audio data from the audio frame queue
audioFrame = audioPcmFrameQueue. front();
audioPcmFrameQueue. pop();
lock. unlock();

// Calculate the number of samples to replicate
int copySamples = (std::min)(numSamples, audioFrame->nb_samples);

// copy audio data to audio buffer
memcpy(current_buffer, audioFrame->data[0], copySamples * av_get_bytes_per_sample(AV_SAMPLE_FMT_S16) * audioCodecCtx->ch_layout.nb_channels);
//memcpy(stream, audioFrame->data[0], copySamples * audioCodecCtx->ch_layout.nb_channels * av_get_bytes_per_sample(AV_SAMPLE_FMT_S16));

// Update the position and number of remaining samples to fill the audio buffer
current_buffer + = copySamples * audioCodecCtx->ch_layout.nb_channels * av_get_bytes_per_sample(AV_SAMPLE_FMT_S16);
copy_sample_length -= copySamples * audioCodecCtx->ch_layout.nb_channels * av_get_bytes_per_sample(AV_SAMPLE_FMT_S16);
numSamples -= copySamples;

// release the audio frame resource
av_freep( & amp;audioFrame->data[0]);//After calling av_samples_alloc, remember to use av_freep to release memory data, otherwise memory leaks
av_frame_free( & audioFrame);
}
}

// Copy audio data to SDL audio buffer
memcpy(stream, audioBuffer, len);

delete[] audioBuffer;
}

void videoPlayer() {
// Create SDL window and renderer
SDL_Window* window = SDL_CreateWindow("FFPlayer", SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED,
videoCodecCtx->width, videoCodecCtx->height, SDL_WINDOW_ALLOW_HIGHDPI | SDL_WINDOW_MOUSE_FOCUS | SDL_WINDOW_RESIZABLE | SDL_WINDOW_SHOWN);
SDL_Renderer* renderer = SDL_CreateRenderer(window, -1, SDL_RENDERER_PRESENTVSYNC | SDL_RENDERER_ACCELERATED);
\t\t
// create and initialize SDL texture
SDL_Texture* texture = SDL_CreateTexture(renderer, SDL_PIXELFORMAT_IYUV, SDL_TEXTUREACCESS_STREAMING,
videoCodecCtx->width, videoCodecCtx->height);

while (!quit) {
// Check if the video frame queue is empty
std::unique_lock<std::mutex> lock(videoFrameMutex);
if (videoFrameQueue. empty()) {
// If the video frame queue is empty, wait for new video frames to arrive
videoFrameCond.wait(lock, [ & amp;] { return !videoFrameQueue.empty() || quit; });
}

if (!videoFrameQueue. empty()) {
// Get a frame of video data from the video frame queue
AVFrame* frame = videoFrameQueue. front();
videoFrameQueue. pop();
lock. unlock();

// Copy video frame data to SDL texture
//SDL_UpdateTexture(texture, nullptr, frame->data[0], frame->linesize[0]);
SDL_UpdateYUVTexture(texture, nullptr,
(Uint8*)frame->data[0], frame->linesize[0],
(Uint8*)frame->data[1], frame->linesize[1],
(Uint8*)frame->data[2], frame->linesize[2]);

// Clear the renderer and render the texture to the window
SDL_RenderClear(renderer);
SDL_RenderCopy(renderer, texture, nullptr, nullptr);
SDL_RenderPresent(renderer);

// Release the video frame resources
av_frame_free( & amp; frame);
}

SDL_Event event;
if (SDL_PollEvent( & amp; event)) {
if (event.type == SDL_QUIT) {
quit = true; // window close event, exit the loop
}
}
}

std::unique_lock<std::mutex> lock(videoFrameMutex);
while (!videoFrameQueue.empty()) {
AVFrame* frame = videoFrameQueue. front();
videoFrameQueue. pop();
av_frame_free( & amp; frame);
}
lock. unlock();

// Release texture resources
SDL_DestroyTexture(texture);

// Destroy the SDL window and renderer
SDL_DestroyRenderer(renderer);
SDL_DestroyWindow(window);
}
};

The example shows two ways of playing audio, one is callback, and the other is directly writing audio data through SDL_QueueAudio. At present, the encapsulation of the class is not perfect, and the problem of synchronous playback of audio and video is not considered. If you want to consider audio and video synchronization, it is recommended to refer to the ffplay.c source code for modification.