FFmpeg的AVcodecParser

文章目录

- 结构体
- 操作函数
- 支持的AVCodecParser

这个模块是AVCodec中的子模块，专门用来提前解析码流的元数据，为后面的解码做准备，这一点对cuda-NVdec非常明显，英伟达解码器的元数据解析是放在CPU上的，所以就非常依赖这个解析模块。
FFmpeg中有很多现成的parser
在这里插入图片描述

结构体

下面是它的上下文


typedef struct AVCodecParserContext {
    void *priv_data;
    const struct AVCodecParser *parser;
    int64_t frame_offset; /* offset of the current frame */
    int64_t cur_offset; /* current offset
                           (incremented by each av_parser_parse()) */
    int64_t next_frame_offset; /* offset of the next frame */
    /* video info */
    int pict_type; /* XXX: Put it back in AVCodecContext. */
    /**
     * This field is used for proper frame duration computation in lavf.
     * It signals, how much longer the frame duration of the current frame
     * is compared to normal frame duration.
     *
     * frame_duration = (1 + repeat_pict) * time_base
     *
     * It is used by codecs like H.264 to display telecined material.
     */
    int repeat_pict; /* XXX: Put it back in AVCodecContext. */
    int64_t pts;     /* pts of the current frame */
    int64_t dts;     /* dts of the current frame */

    /* private data */
    int64_t last_pts;
    int64_t last_dts;
    int fetch_timestamp;

#define AV_PARSER_PTS_NB 4
    int cur_frame_start_index;
    int64_t cur_frame_offset[AV_PARSER_PTS_NB];
    int64_t cur_frame_pts[AV_PARSER_PTS_NB];
    int64_t cur_frame_dts[AV_PARSER_PTS_NB];

    int flags;
#define PARSER_FLAG_COMPLETE_FRAMES           0x0001
#define PARSER_FLAG_ONCE                      0x0002
/// Set if the parser has a valid file offset
#define PARSER_FLAG_FETCHED_OFFSET            0x0004
#define PARSER_FLAG_USE_CODEC_TS              0x1000

    int64_t offset;      ///< byte offset from starting packet start
    int64_t cur_frame_end[AV_PARSER_PTS_NB];

    /**
     * Set by parser to 1 for key frames and 0 for non-key frames.
     * It is initialized to -1, so if the parser doesn't set this flag,
     * old-style fallback using AV_PICTURE_TYPE_I picture type as key frames
     * will be used.
     */
    int key_frame;

    // Timestamp generation support:
    /**
     * Synchronization point for start of timestamp generation.
     *
     * Set to >0 for sync point, 0 for no sync point and <0 for undefined
     * (default).
     *
     * For example, this corresponds to presence of H.264 buffering period
     * SEI message.
     */
    int dts_sync_point;

    /**
     * Offset of the current timestamp against last timestamp sync point in
     * units of AVCodecContext.time_base.
     *
     * Set to INT_MIN when dts_sync_point unused. Otherwise, it must
     * contain a valid timestamp offset.
     *
     * Note that the timestamp of sync point has usually a nonzero
     * dts_ref_dts_delta, which refers to the previous sync point. Offset of
     * the next frame after timestamp sync point will be usually 1.
     *
     * For example, this corresponds to H.264 cpb_removal_delay.
     */
    int dts_ref_dts_delta;

    /**
     * Presentation delay of current frame in units of AVCodecContext.time_base.
     *
     * Set to INT_MIN when dts_sync_point unused. Otherwise, it must
     * contain valid non-negative timestamp delta (presentation time of a frame
     * must not lie in the past).
     *
     * This delay represents the difference between decoding and presentation
     * time of the frame.
     *
     * For example, this corresponds to H.264 dpb_output_delay.
     */
    int pts_dts_delta;

    /**
     * Position of the packet in file.
     *
     * Analogous to cur_frame_pts/dts
     */
    int64_t cur_frame_pos[AV_PARSER_PTS_NB];

    /**
     * Byte position of currently parsed frame in stream.
     */
    int64_t pos;

    /**
     * Previous frame byte position.
     */
    int64_t last_pos;

    /**
     * Duration of the current frame.
     * For audio, this is in units of 1 / AVCodecContext.sample_rate.
     * For all other types, this is in units of AVCodecContext.time_base.
     */
    int duration;

    enum AVFieldOrder field_order;

    /**
     * Indicate whether a picture is coded as a frame, top field or bottom field.
     *
     * For example, H.264 field_pic_flag equal to 0 corresponds to
     * AV_PICTURE_STRUCTURE_FRAME. An H.264 picture with field_pic_flag
     * equal to 1 and bottom_field_flag equal to 0 corresponds to
     * AV_PICTURE_STRUCTURE_TOP_FIELD.
     */
    enum AVPictureStructure picture_structure;

    /**
     * Picture number incremented in presentation or output order.
     * This field may be reinitialized at the first picture of a new sequence.
     *
     * For example, this corresponds to H.264 PicOrderCnt.
     */
    int output_picture_number;

    /**
     * Dimensions of the decoded video intended for presentation.
     */
    int width;
    int height;

    /**
     * Dimensions of the coded video.
     */
    int coded_width;
    int coded_height;

    /**
     * The format of the coded data, corresponds to enum AVPixelFormat for video
     * and for enum AVSampleFormat for audio.
     *
     * Note that a decoder can have considerable freedom in how exactly it
     * decodes the data, so the format reported here might be different from the
     * one returned by a decoder.
     */
    int format;
} AVCodecParserContext;

下面是插件接口的入口。
如果你要实现一个自己的parse，只要简单的重写下面四个函数就可以了。

typedef struct AVCodecParser {
    int codec_ids[7]; /* several codec IDs are permitted */
    int ;
    int (*parser_init)(AVCodecParserContext *s);
    /* This callback never returns an error, a negative value means that
     * the frame start was in a previous packet. */
    int (*parser_parse)(AVCodecParserContext *s,
                        AVCodecContext *avctx,
                        const uint8_t **poutbuf, int *poutbuf_size,
                        const uint8_t *buf, int buf_size);
    void (*parser_close)(AVCodecParserContext *s);
    int (*split)(AVCodecContext *avctx, const uint8_t *buf, int buf_size);
} AVCodecParser;

特别注意上面的priv_data_size是AVCodecParserContext中的priv_data，这里面就是保存各种元信息。比如说对于h264来说，里面保存的就是：

static av_cold int init(AVCodecParserContext *s)
{
    H264ParseContext *p = s->priv_data;
	...
    return 0;
}

其实就是sps,pps,sei的值

typedef struct H264ParseContext {
    ParseContext pc;
    H264ParamSets ps;
    H264DSPContext h264dsp;
    H264POCContext poc;
    H264SEIContext sei;
    int is_avc;
    int nal_length_size;
    int got_first;
    int picture_structure;
    uint8_t parse_history[6];
    int parse_history_count;
    int parse_last_mb;
    int64_t reference_dts;
    int last_frame_num, last_picture_structure;
} H264ParseContext;

比如h264就是重写了下面三个函数接口。

const AVCodecParser ff_h264_parser = {
    .codec_ids      = { AV_CODEC_ID_H264 },
    .priv_data_size = sizeof(H264ParseContext),
    .parser_init    = init,
    .parser_parse   = h264_parse,
    .parser_close   = h264_close,
};

操作函数

下面是操作函数，第一个是列出所有的parser

/**
 * Iterate over all registered codec parsers.
 *
 * @param opaque a pointer where libavcodec will store the iteration state. Must
 *               point to NULL to start the iteration.
 *
 * @return the next registered codec parser or NULL when the iteration is
 *         finished
 */
const AVCodecParser *av_parser_iterate(void **opaque);

第二个是初始化parser

AVCodecParserContext *av_parser_init(int codec_id);

第三个是最重要的，也就是解析pkt

/**
 * Parse a packet.
 *
 * @param s             parser context.
 * @param avctx         codec context.
 * @param poutbuf       set to pointer to parsed buffer or NULL if not yet finished.
 * @param poutbuf_size  set to size of parsed buffer or zero if not yet finished.
 * @param buf           input buffer.
 * @param buf_size      buffer size in bytes without the padding. I.e. the full buffer
                        size is assumed to be buf_size + AV_INPUT_BUFFER_PADDING_SIZE.
                        To signal EOF, this should be 0 (so that the last frame
                        can be output).
 * @param pts           input presentation timestamp.
 * @param dts           input decoding timestamp.
 * @param pos           input byte position in stream.
 * @return the number of bytes of the input bitstream used.
 *
 * Example:
 * @code
 *   while(in_len){
 *       len = av_parser_parse2(myparser, AVCodecContext, &data, &size,
 *                                        in_data, in_len,
 *                                        pts, dts, pos);
 *       in_data += len;
 *       in_len  -= len;
 *
 *       if(size)
 *          decode_frame(data, size);
 *   }
 * @endcode
 */
int av_parser_parse2(AVCodecParserContext *s,
                     AVCodecContext *avctx,
                     uint8_t **poutbuf, int *poutbuf_size,
                     const uint8_t *buf, int buf_size,
                     int64_t pts, int64_t dts,
                     int64_t pos);

最后一个是close parser

void av_parser_close(AVCodecParserContext *s);

下面是一个用例


int main(int argc, char **argv)
{
    const char *filename, *outfilename;
    const AVCodec *codec;
    AVCodecParserContext *parser;
    AVCodecContext *c= NULL;
    FILE *f;
    AVFrame *frame;
    uint8_t inbuf[INBUF_SIZE + AV_INPUT_BUFFER_PADDING_SIZE];
    uint8_t *data;
    size_t   data_size;
    int ret;
    AVPacket *pkt;

    if (argc <= 2) {
        fprintf(stderr, "Usage: %s <input file> <output file>\n"
                "And check your input file is encoded by mpeg1video please.\n", argv[0]);
        exit(0);
    }
    filename    = argv[1];
    outfilename = argv[2];

    pkt = av_packet_alloc();
    if (!pkt)
        exit(1);

    /* set end of buffer to 0 (this ensures that no overreading happens for damaged MPEG streams) */
    memset(inbuf + INBUF_SIZE, 0, AV_INPUT_BUFFER_PADDING_SIZE);

    /* find the MPEG-1 video decoder */
    codec = avcodec_find_decoder(AV_CODEC_ID_MPEG1VIDEO);
    if (!codec) {
        fprintf(stderr, "Codec not found\n");
        exit(1);
    }

    parser = av_parser_init(codec->id);
    if (!parser) {
        fprintf(stderr, "parser not found\n");
        exit(1);
    }

    c = avcodec_alloc_context3(codec);
    if (!c) {
        fprintf(stderr, "Could not allocate video codec context\n");
        exit(1);
    }

    /* For some codecs, such as msmpeg4 and mpeg4, width and height
       MUST be initialized there because this information is not
       available in the bitstream. */

    /* open it */
    if (avcodec_open2(c, codec, NULL) < 0) {
        fprintf(stderr, "Could not open codec\n");
        exit(1);
    }

    f = fopen(filename, "rb");
    if (!f) {
        fprintf(stderr, "Could not open %s\n", filename);
        exit(1);
    }

    frame = av_frame_alloc();
    if (!frame) {
        fprintf(stderr, "Could not allocate video frame\n");
        exit(1);
    }

    while (!feof(f)) {
        /* read raw data from the input file */
        data_size = fread(inbuf, 1, INBUF_SIZE, f);
        if (!data_size)
            break;

        /* use the parser to split the data into frames */
        data = inbuf;
        while (data_size > 0) {
            ret = av_parser_parse2(parser, c, &pkt->data, &pkt->size,
                                   data, data_size, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
            if (ret < 0) {
                fprintf(stderr, "Error while parsing\n");
                exit(1);
            }
            data      += ret;
            data_size -= ret;

            if (pkt->size)
                decode(c, frame, pkt, outfilename);
        }
    }

    /* flush the decoder */
    decode(c, frame, NULL, outfilename);

    fclose(f);

    av_parser_close(parser);
    avcodec_free_context(&c);
    av_frame_free(&frame);
    av_packet_free(&pkt);

    return 0;
}

上面的例子展示了一个从文件中读取码流，然后通过av_parser_parse2将码流解析为一个个分离的NALU单元，然后送去解码，我们可以看到，第二个参数是c，也就是说将来要把sps,pps，sei这些信息保存在c中的extradata中。

下面第一个函数是av_parser_init，看到除了分配空间外，就是对parser->parser_init(s)的封装。


AVCodecParserContext *av_parser_init(int codec_id)
{
    AVCodecParserContext *s = NULL;
    const AVCodecParser *parser;
    void *i = 0;
    int ret;

    if (codec_id == AV_CODEC_ID_NONE)
        return NULL;

    while ((parser = av_parser_iterate(&i))) {
        if (parser->codec_ids[0] == codec_id ||
            parser->codec_ids[1] == codec_id ||
            parser->codec_ids[2] == codec_id ||
            parser->codec_ids[3] == codec_id ||
            parser->codec_ids[4] == codec_id ||
            parser->codec_ids[5] == codec_id ||
            parser->codec_ids[6] == codec_id)
            goto found;
    }
    return NULL;

found:
    s = av_mallocz(sizeof(AVCodecParserContext));
    if (!s)
        goto err_out;
    s->parser = parser;
    s->priv_data = av_mallocz(parser->priv_data_size);
    if (!s->priv_data)
        goto err_out;
    s->fetch_timestamp=1;
    s->pict_type = AV_PICTURE_TYPE_I;
    if (parser->parser_init) {
        ret = parser->parser_init(s);
        if (ret != 0)
            goto err_out;
    }
    s->key_frame            = -1;
    s->dts_sync_point       = INT_MIN;
    s->dts_ref_dts_delta    = INT_MIN;
    s->pts_dts_delta        = INT_MIN;
    s->format               = -1;

    return s;

err_out:
    if (s)
        av_freep(&s->priv_data);
    av_free(s);
    return NULL;
}

在这里插入图片描述
第二个函数也很简单av_parser_parse2，内部其实也是对parser_parse的封装。

int av_parser_parse2(AVCodecParserContext *s, AVCodecContext *avctx,
                     uint8_t **poutbuf, int *poutbuf_size,
                     const uint8_t *buf, int buf_size,
                     int64_t pts, int64_t dts, int64_t pos){
   ...
    index = s->parser->parser_parse(s, avctx, (const uint8_t **) poutbuf,
                                    poutbuf_size, buf, buf_size);
                                   
  ...
  }

第三个函数av_parser_close就更加简单了

void av_parser_close(AVCodecParserContext *s)
{
    if (s) {
        if (s->parser->parser_close)
            s->parser->parser_close(s);
        av_freep(&s->priv_data);
        av_free(s);
    }
}

上面全部是架构代码，如果想要查看详细的解析过程，可以单独看每个插件的内容，也就是那三个函数。

支持的AVCodecParser


extern const AVCodecParser ff_aac_parser;
extern const AVCodecParser ff_aac_latm_parser;
extern const AVCodecParser ff_ac3_parser;
extern const AVCodecParser ff_adx_parser;
extern const AVCodecParser ff_amr_parser;
extern const AVCodecParser ff_av1_parser;
extern const AVCodecParser ff_avs2_parser;
extern const AVCodecParser ff_avs3_parser;
extern const AVCodecParser ff_bmp_parser;
extern const AVCodecParser ff_cavsvideo_parser;
extern const AVCodecParser ff_cook_parser;
extern const AVCodecParser ff_cri_parser;
extern const AVCodecParser ff_dca_parser;
extern const AVCodecParser ff_dirac_parser;
extern const AVCodecParser ff_dnxhd_parser;
extern const AVCodecParser ff_dolby_e_parser;
extern const AVCodecParser ff_dpx_parser;
extern const AVCodecParser ff_dvaudio_parser;
extern const AVCodecParser ff_dvbsub_parser;
extern const AVCodecParser ff_dvdsub_parser;
extern const AVCodecParser ff_dvd_nav_parser;
extern const AVCodecParser ff_flac_parser;
extern const AVCodecParser ff_g723_1_parser;
extern const AVCodecParser ff_g729_parser;
extern const AVCodecParser ff_gif_parser;
extern const AVCodecParser ff_gsm_parser;
extern const AVCodecParser ff_h261_parser;
extern const AVCodecParser ff_h263_parser;
extern const AVCodecParser ff_h264_parser;
extern const AVCodecParser ff_hevc_parser;
extern const AVCodecParser ff_ipu_parser;
extern const AVCodecParser ff_jpeg2000_parser;
extern const AVCodecParser ff_mjpeg_parser;
extern const AVCodecParser ff_mlp_parser;
extern const AVCodecParser ff_mpeg4video_parser;
extern const AVCodecParser ff_mpegaudio_parser;
extern const AVCodecParser ff_mpegvideo_parser;
extern const AVCodecParser ff_opus_parser;
extern const AVCodecParser ff_png_parser;
extern const AVCodecParser ff_pnm_parser;
extern const AVCodecParser ff_rv30_parser;
extern const AVCodecParser ff_rv40_parser;
extern const AVCodecParser ff_sbc_parser;
extern const AVCodecParser ff_sipr_parser;
extern const AVCodecParser ff_tak_parser;
extern const AVCodecParser ff_vc1_parser;
extern const AVCodecParser ff_vorbis_parser;
extern const AVCodecParser ff_vp3_parser;
extern const AVCodecParser ff_vp8_parser;
extern const AVCodecParser ff_vp9_parser;
extern const AVCodecParser ff_webp_parser;
extern const AVCodecParser ff_xbm_parser;
extern const AVCodecParser ff_xma_parser;