ffmpeg call test under Windows (YUV420P encoding)

1 Download static libraries and dynamic libraries

If you are loading dynamically, you can directly use the dynamic library (dlopen, etc.); otherwise, use the dynamic + static method (static loading method of dynamic library); or directly compile the static library into your execution program (pure static components use );

https://www.gyan.dev/ffmpeg/builds/#libraries

Add component

vs2022: Properties->c/c++->Additional include directory; add the header file directory to it

Properties->Linker->Additional library directory, add the static library path; then remember to put the dll in the EXE directory when running

2.YUV420P encoding 264

I chose YUV420P (YV12) because I can get this data by parsing the file through the FFmpeg command line for testing, and the data Y, V, and U in the planner format are continuous and placed separately, and there will be no UV intersection; YUV is one frame The amount of data is width*heigt*1.5

Just find an MP4 video and use the FFmpeg command line to get YUV data for testing, ffmpeg -i input.mp4 -vf “format=yuv420p” output.yuv; if you have YUV test data yourself, or the data comes from Real-time recording, screenshots, etc. can be used directly;

The following is the source code of the test demo provided by FFmpeg. It is the YUV generated by myself, and then sets the encoding parameters to output 264. I will make a few changes based on this demo later; but the most important thing is to use this demo.

/*
 * Copyright (c) 2001 Fabrice Bellard
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

/**
 * @file libavcodec encoding video API usage example
 * @example encode_video.c
 *
 * Generate synthetic video data and encode it to an output file.
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <libavcodec/avcodec.h>

#include <libavutil/opt.h>
#include <libavutil/imgutils.h>

static void encode(AVCodecContext *enc_ctx, AVFrame *frame, AVPacket *pkt,
                   FILE *outfile)
{
    int ret;

    /* send the frame to the encoder */
    if(frame)
        printf("Send frame %3"PRId64"\\
", frame->pts);

    ret = avcodec_send_frame(enc_ctx, frame);
    if (ret < 0) {
        fprintf(stderr, "Error sending a frame for encoding\\
");
        exit(1);
    }

    while (ret >= 0) {
        ret = avcodec_receive_packet(enc_ctx, pkt);
        if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
            return;
        else if (ret < 0) {
            fprintf(stderr, "Error during encoding\\
");
            exit(1);
        }

        printf("Write packet %3"PRId64" (size=])\\
", pkt->pts, pkt->size);
        fwrite(pkt->data, 1, pkt->size, outfile);
        av_packet_unref(pkt);
    }
}

int main(int argc, char **argv)
{
    const char *filename, *codec_name;
    constAVCodec *codec;
    AVCodecContext *c= NULL;
    int i, ret, x, y;
    FILE *f;
    AVFrame *frame;
    AVPacket *pkt;
    uint8_t endcode[] = { 0, 0, 1, 0xb7 };

    if (argc <= 2) {
        fprintf(stderr, "Usage: %s <output file> <codec name>\\
", argv[0]);
        exit(0);
    }
    filename = argv[1];
    codec_name = argv[2];

    /* find the mpeg1video encoder */
    codec = avcodec_find_encoder_by_name(codec_name);
    if (!codec) {
        fprintf(stderr, "Codec '%s' not found\\
", codec_name);
        exit(1);
    }

    c = avcodec_alloc_context3(codec);
    if (!c) {
        fprintf(stderr, "Could not allocate video codec context\\
");
        exit(1);
    }

    pkt = av_packet_alloc();
    if (!pkt)
        exit(1);

    /* put sample parameters */
    c->bit_rate = 400000;
    /* resolution must be a multiple of two */
    c->width = 352;
    c->height = 288;
    /* frames per second */
    c->time_base = (AVRational){1, 25};
    c->framerate = (AVRational){25, 1};

    /* emit one intra frame every ten frames
     * check frame pict_type before passing frame
     * to encoder, if frame->pict_type is AV_PICTURE_TYPE_I
     * then gop_size is ignored and the output of encoder
     * will always be I frame irrespective to gop_size
     */
    c->gop_size = 10;
    c->max_b_frames = 1;
    c->pix_fmt = AV_PIX_FMT_YUV420P;

    if (codec->id == AV_CODEC_ID_H264)
        av_opt_set(c->priv_data, "preset", "slow", 0);

    /* open it */
    ret = avcodec_open2(c, codec, NULL);
    if (ret < 0) {
        fprintf(stderr, "Could not open codec: %s\\
", av_err2str(ret));
        exit(1);
    }

    f = fopen(filename, "wb");
    if (!f) {
        fprintf(stderr, "Could not open %s\\
", filename);
        exit(1);
    }

    frame = av_frame_alloc();
    if (!frame) {
        fprintf(stderr, "Could not allocate video frame\\
");
        exit(1);
    }
    frame->format = c->pix_fmt;
    frame->width = c->width;
    frame->height = c->height;

    ret = av_frame_get_buffer(frame, 0);
    if (ret < 0) {
        fprintf(stderr, "Could not allocate the video frame data\\
");
        exit(1);
    }

    /* encode 1 second of video */
    for (i = 0; i < 25; i + + ) {
        fflush(stdout);

        /* Make sure the frame data is writable.
           On the first round, the frame is fresh from av_frame_get_buffer()
           and therefore we know it is writable.
           But on the next rounds, encode() will have called
           avcodec_send_frame(), and the codec may have kept a reference to
           the frame in its internal structures, that makes the frame
           unwritable.
           av_frame_make_writable() checks that and allocates a new buffer
           for the frame only if necessary.
         */
        ret = av_frame_make_writable(frame);
        if (ret < 0)
            exit(1);

        /* Prepare a dummy image.
           In real code, this is where you would have your own logic for
           filling the frame. FFmpeg does not care what you put in the
           frame.
         */
        /* Y */
        for (y = 0; y < c->height; y + + ) {
            for (x = 0; x < c->width; x + + ) {
                frame->data[0][y * frame->linesize[0] + x] = x + y + i * 3;
            }
        }

        /* Cb and Cr */
        for (y = 0; y < c->height/2; y + + ) {
            for (x = 0; x < c->width/2; x + + ) {
                frame->data[1][y * frame->linesize[1] + x] = 128 + y + i * 2;
                frame->data[2][y * frame->linesize[2] + x] = 64 + x + i * 5;
            }
        }

        frame->pts = i;

        /* encode the image */
        encode(c, frame, pkt, f);
    }

    /* flush the encoder */
    encode(c, NULL, pkt, f);

    /* Add sequence end code to have a real MPEG file.
       It makes only sense because this tiny examples writes packets
       directly. This is called "elementary stream" and only works for some
       codecs. To create a valid file, you usually need to write packets
       into a proper file format or protocol; see mux.c.
     */
    if (codec->id == AV_CODEC_ID_MPEG1VIDEO || codec->id == AV_CODEC_ID_MPEG2VIDEO)
        fwrite(endcode, 1, sizeof(endcode), f);
    fclose(f);

    avcodec_free_context( & amp;c);
    av_frame_free( & amp;frame);
    av_packet_free( & amp;pkt);

    return 0;
}

Function introduction

1 Find the encoder; you can also use AVcodec_find_encoder(condec_ID)

const AVCodec* codec = avcodec_find_encoder_by_name(codec_name);//codec_name = “libx264”;

The AVCodec structure pointer returned by the function contains the encoding ID, supported frame rate, supported pixel format, audio sampling rate, sound channel, encoding level, etc.

 1 /**
 2*AVCodec.
 3*/
 4 typedef struct AVCodec {
 5/**
 6 * Name of the codec implementation.
 7 * The name is globally unique among encoders and among decoders (but an
 8 * encoder and a decoder can share the same name).
 9 * This is the primary way to find a codec from the user perspective.
10 */
11 const char *name;
12/**
13 * Descriptive name for the codec, meant to be more human readable than name.
14 * You should use the NULL_IF_CONFIG_SMALL() macro to define it.
15 */
16 const char *long_name;
17 enum AVMediaType type;
18 enumAVCodecID id;
19/**
20 * Codec capabilities.
21 * see AV_CODEC_CAP_*
twenty two      */
23 int capabilities;
24 uint8_t max_lowres; ///< maximum value for lowres supported by the decoder
25 const AVRational *supported_framerates; ///< array of supported framerates, or NULL if any, array is terminated by {0,0}
26 const enum AVPixelFormat *pix_fmts; ///< array of supported pixel formats, or NULL if unknown, array is terminated by -1
27 const int *supported_samplerates; ///< array of supported audio samplerates, or NULL if unknown, array is terminated by 0
28 const enum AVSampleFormat *sample_fmts; ///< array of supported sample formats, or NULL if unknown, array is terminated by -1
29 #if FF_API_OLD_CHANNEL_LAYOUT
30/**
31 * @deprecated use ch_layouts instead
32 */
33 attribute_deprecated
34 const uint64_t *channel_layouts; ///< array of support channel layouts, or NULL if unknown. array is terminated by 0
35 #endif
36 const AVClass *priv_class; ///< AVClass for the private context
37 const AVProfile *profiles; ///< array of recognized profiles, or NULL if unknown, array is terminated by {FF_PROFILE_UNKNOWN}
38
39/**
40 * Group name of the codec implementation.
41 * This is a short symbolic name of the wrapper backing this codec. A
42 * wrapper uses some kind of external implementation for the codec, such
43 * as an external library, or a codec implementation provided by the OS or
44 * the hardware.
45 * If this field is NULL, this is a builtin, libavcodec native codec.
46 * If non-NULL, this will be the suffix in AVCodec.name in most cases
47 * (usually AVCodec.name will be of the form "<codec_name>_<wrapper_name>").
48 */
49 const char *wrapper_name;
50
51/**
52 * Array of supported channel layouts, terminated with a zeroed layout.
53 */
54 const AVChannelLayout *ch_layouts;
55 } AVCodec;

2 Assign/create encoder context

Its function is to create a context and set encoding parameters; after creating a context, you can set some parameters, which will affect the encoder;

AVCodecContext* c = NULL; c = avcodec_alloc_context3(codec);

 /* put sample parameters */
    c->bit_rate = 400000; //Output bit rate 4M
    /* resolution must be a multiple of two */
    c->width = 1920; //The width of the input image corresponds to the amount of YUV data
    c->height = 1080; //Height of input image
    /* frames per second */
    c->time_base.num = 1;
    c->time_base.den = 25;
    c->framerate.num = 25; //25 FPS
    c->framerate.den = 1;

    /* emit one intra frame every ten frames
     * check frame pict_type before passing frame
     * to encoder, if frame->pict_type is AV_PICTURE_TYPE_I
     * then gop_size is ignored and the output of encoder
     * will always be I frame irrespective to gop_size
     */
    c->gop_size = 10; //GOP size is 10 frames
    c->max_b_frames = 1;//Insert at most 1 B frame between each I and P frame
    c->pix_fmt = AV_PIX_FMT_YUV420P;//YV 12 planner format; a storage format in which YVU is continuous and does not have UV intersection

3 Initialize the encoder context, created in the second step above. After setting the parameters, initialize the context and open the encoder

avcodec_open2(c, codec, NULL[user-defined encoding options]);//const AVCodec* codec; AVCodecContext* c = NULL;

4 Create frame and package structures to carry data

The uncompressed image is placed in the frame structure, and the encoded data is placed in the package structure; not all open source codes store uncompressed data in the frame, and this depends on the specific situation; in the test of this blog In the use case, the frame stores uncompressed YUV data; in some places, the frame stores 264 and 265 data. Different usage scenarios have different meanings.

The data in ffmpeg stores uncompressed data, which can store tiling mode planner or (cross-storage) data;

frame = av_frame_alloc();

pkt = av_packet_alloc();

 frame = av_frame_alloc();
    if (!frame) {
        fprintf(stderr, "Could not allocate video frame\\
");
        exit(1);
    }
    frame->format = c->pix_fmt;//Color format, YUV420P can be set
    frame->width = c->width;
    frame->height = c->height;

    ret = av_frame_get_buffer(frame, 0);//Before each time you reuse the buffer to install YUV, you must first ensure that the buffer is not occupied and is writable, av_frame_make_writable
    if (ret < 0) {
        fprintf(stderr, "Could not allocate the video frame data\\
");
        exit(1);
    }

5 Read the YUV data, put it in the data of the frame, and encode it; note that the input and output buffers here are multiplexed, and it is a read-encode-output, serial logic

 int res_len = fread(buffer,len ,1,fpin);
       if (res_len > 0)
       {
           frame->data[0] = buffer;//Y component
           frame->data[2] =buffer + frame->width * frame->height;//V component
           frame->data[1] = buffer + frame->width * frame->height * 5 / 4;//U component
           frame->pts + = 40;//Display timestamp<br>           //frame You can also set whether the frame should be encoded into a key frame, frame type, etc.;<br>

           /* encode the image */
           encode(c, frame, pkt, frle_out_ptr);
       }

5 AVpackage is used to save the encoded data. If H264 is encoded, then here is the NAL data of H264, which is the ES stream. The PTS and DTS of the current frame will also be stored internally.

The implementation of the encoding function is to send YUV data and obtain Package: avcodec_send_frame(enc_ctx, frame); avcodec_receive_packet(enc_ctx, pkt);

Write the data to the file and dereference the package: fwrite(pkt->data, 1, pkt->size, outfile); av_packet_unref(pkt);

typedef struct AVPacket {
    /**
     * A reference to the reference-counted buffer where the packet data is
     * stored.
     * May be NULL, then the packet data is not reference-counted.
     */
    AVBufferRef *buf;
    /**
     * Presentation timestamp in AVStream->time_base units; the time at which
     * the decompressed packet will be presented to the user.
     * Can be AV_NOPTS_VALUE if it is not stored in the file.
     * pts MUST be larger or equal to dts as presentation cannot happen before
     * decompression, unless one wants to view hex dumps. Some formats misuse
     * the terms dts and pts/cts to mean something different. Such timestamps
     * must be converted to true pts/dts before they are stored in AVPacket.
     */
    int64_t pts;
    /**
     * Decompression timestamp in AVStream->time_base units; the time at which
     * the packet is decompressed.
     * Can be AV_NOPTS_VALUE if it is not stored in the file.
     */
    int64_t dts;
    uint8_t *data;
    int size;
    int stream_index;
    /**
     * A combination of AV_PKT_FLAG values
     */
    int flags;
    /**
     * Additional packet data that can be provided by the container.
     * Packet can contain several types of side information.
     */
    AVPacketSideData *side_data;
    int side_data_elems;

    /**
     * Duration of this packet in AVStream->time_base units, 0 if unknown.
     * Equals next_pts - this_pts in presentation order.
     */
    int64_t duration;

    int64_t pos; ///< byte position in stream, -1 if unknown

    /**
     * for some private data of the user
     */
    void *opaque;

    /**
     * AVBufferRef for free use by the API user. FFmpeg will never check the
     * contents of the buffer ref. FFmpeg calls av_buffer_unref() on it when
     * the packet is unreferenced. av_packet_copy_props() calls create a new
     * reference with av_buffer_ref() for the target packet's opaque_ref field.
     *
     * This is unrelated to the opaque field, although it serves a similar
     * purpose.
     */
    AVBufferRef *opaque_ref;

    /**
     * Time base of the packet's timestamps.
     * In the future, this field may be set on packets output by encoders or
     * demuxers, but its value will be ignored by default on input to decoders
     * or muxers.
     */
    AVRational time_base;
} AVPacket;

6 After encoding, call the encoding of the empty data again. In case there is data that is not refreshed in the encoder

/* flush the encoder */
encode(c, NULL, pkt, fout);

And add the mpeg end mark uint8_t endcode[] = { 0, 0, 1, 0xb7 }; at the end of the encoded output file;

7 The last step is to release the context, release the frame, package, and close the file.

avcodec_free_context( & amp;c);
av_frame_free( & amp;frame);
av_packet_free( & amp;pkt);

The following provides a real coding example: (visual studio 2022)

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavutil/opt.h>
#include <libavutil/imgutils.h>

char av_error[AV_ERROR_MAX_STRING_SIZE] = { 0 };
#define av_err2str(errnum) av_make_error_string(av_error, AV_ERROR_MAX_STRING_SIZE, errnum)


static void encode(AVCodecContext* enc_ctx, AVFrame* frame, AVPacket* pkt,
    FILE*outfile)
{
    int ret;

    /* send the frame to the encoder */
    if(frame)
        printf("Send frame %3ld\\
", frame->pts);

    ret = avcodec_send_frame(enc_ctx, frame);
    if (ret < 0) {
        fprintf(stderr, "Error sending a frame for encoding\\
");
        exit(1);
    }

    while (ret >= 0) {
        ret = avcodec_receive_packet(enc_ctx, pkt);
        if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
            return;
        else if (ret < 0) {
            fprintf(stderr, "Error during encoding\\
");
            exit(1);
        }

        printf("Write packet %3ld (size=])\\
", pkt->pts, pkt->size);
        fwrite(pkt->data, 1, pkt->size, outfile);
        av_packet_unref(pkt);
    }
}

int main(int argc, char** argv)
{
    const char* filename, * codec_name,* filename_in;
    //
    const AVCodec* codec;
    AVCodecContext* c = NULL;
    int i, ret, x, y;
    FILE* f;
    AVFrame* frame;
    AVPacket* pkt;
    uint8_t endcode[] = { 0, 0, 1, 0xb7 };
    int len = 1920 * 1080 * 3 / 2;
    uint8_t* buffer = new uint8_t[len];


    filename = "D:/Emscripten/cmaketest/20230801230523.264";//The encoding output is 264, the file will not be automatically created
    filename_in = "D:/Emscripten/cmaketest/20230.yuv";//YUV to be encoded, YV12; required
    codec_name = "libx264";//You can specify other codecs yourself

    FILE* fpin = fopen(filename_in, "rb + ");
    if (!fpin)
    {
        fprintf(stderr, "Codec '%s' not open\\
",filename_in);
        exit(1);
    }

    //Find the encoder; you can also use AVcodec_find_encoder(condec_ID)
    codec = avcodec_find_encoder_by_name(codec_name);
    if (!codec) {
        fprintf(stderr, "Codec '%s' not found\\
", codec_name);
        exit(1);
    }

    c = avcodec_alloc_context3(codec);
    if (!c) {
        fprintf(stderr, "Could not allocate video codec context\\
");
        exit(1);
    }

    pkt = av_packet_alloc();
    if (!pkt)
        exit(1);

    /* put sample parameters */
    c->bit_rate = 400000;
    /* resolution must be a multiple of two */
    c->width = 1920;
    c->height = 1080;
    /* frames per second */
    c->time_base.num = 1;
    c->time_base.den = 25;
    c->framerate.num = 25;
    c->framerate.den = 1;

    /* emit one intra frame every ten frames
     * check frame pict_type before passing frame
     * to encoder, if frame->pict_type is AV_PICTURE_TYPE_I
     * then gop_size is ignored and the output of encoder
     * will always be I frame irrespective to gop_size
     */
    c->gop_size = 10;
    c->max_b_frames = 1;
    c->pix_fmt = AV_PIX_FMT_YUV420P;//YV 12 format

    if (codec->id == AV_CODEC_ID_H264)
        av_opt_set(c->priv_data, "preset", "slow", 0);

    /* open it */
    ret = avcodec_open2(c, codec, NULL);
    if (ret < 0) {
        fprintf(stderr, "Could not open codec: %s\\
", av_err2str(ret));
        exit(1);
    }

    f = fopen(filename, "wb");
    if (!f) {
        fprintf(stderr, "Could not open %s\\
", filename);
        exit(1);
    }

    frame = av_frame_alloc();
    if (!frame) {
        fprintf(stderr, "Could not allocate video frame\\
");
        exit(1);
    }
    frame->format = c->pix_fmt;
    frame->width = c->width;
    frame->height = c->height;

    ret = av_frame_get_buffer(frame, 0);
    if (ret < 0) {
        fprintf(stderr, "Could not allocate the video frame data\\
");
        exit(1);
    }

    /* encode 1 second of video */
    while (!feof(fpin)) {

        fflush(stdout);

        /* Make sure the frame data is writable.
           On the first round, the frame is fresh from av_frame_get_buffer()
           and therefore we know it is writable.
           But on the next rounds, encode() will have called
           avcodec_send_frame(), and the codec may have kept a reference to
           the frame in its internal structures, that makes the frame
           unwritable.
           av_frame_make_writable() checks that and allocates a new buffer
           for the frame only if necessary.
         */
        ret = av_frame_make_writable(frame);
        if (ret < 0)
            exit(1);

        /* Prepare a dummy image.
           In real code, this is where you would have your own logic for
           filling the frame. FFmpeg does not care what you put in the
           frame.
         */
         /* Y */
       int res_len = fread(buffer,len ,1,fpin);
       if (res_len > 0)
       {
           frame->data[0] = buffer;
           frame->data[2] =buffer + frame->width * frame->height;
           frame->data[1] = buffer + frame->width * frame->height * 5 / 4;
           frame->pts + = 40;

           /* encode the image */
           encode(c, frame, pkt, f);
       }
    }

    /* flush the encoder */
    encode(c, NULL, pkt, f);
    if (codec->id == AV_CODEC_ID_MPEG1VIDEO || codec->id == AV_CODEC_ID_MPEG2VIDEO)
        fwrite(endcode, 1, sizeof(endcode), f);
    fclose(f);

    avcodec_free_context( & amp;c);
    av_frame_free( & amp;frame);
    av_packet_free( & amp;pkt);

    delete[] buffer;
    fclose(fpin);
    return 0;
}
}

Effect:

264 data is opened with elecard:

This is the encoded data when I modified gop =25; without B frame

ffmpeg call test under Windows (YUV420P encoding)