418 lines
15 KiB
C++
418 lines
15 KiB
C++
/*
|
|
* Copyright (c) 2023 Fancy Code.
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
#include "FfmpegEncoderV2.h"
|
|
#include "ILog.h"
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
#include <libavcodec/avcodec.h>
|
|
#include <libavcodec/codec.h>
|
|
#include <libavcodec/codec_id.h>
|
|
#include <libavcodec/packet.h>
|
|
#include <libavformat/avformat.h>
|
|
// #include <libavutil/avassert.h>
|
|
#include <libavutil/avutil.h>
|
|
#include <libavutil/channel_layout.h>
|
|
#include <libavutil/dict.h>
|
|
#include <libavutil/error.h>
|
|
#include <libavutil/frame.h>
|
|
#include <libavutil/mathematics.h>
|
|
#include <libavutil/opt.h>
|
|
#include <libavutil/pixfmt.h>
|
|
#include <libavutil/samplefmt.h>
|
|
#include <libswresample/swresample.h>
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|
|
#include <cstdint>
|
|
#include <errno.h>
|
|
#include <functional>
|
|
constexpr long SOURCE_AUDIO_SAMPEL_RATE = 8000;
|
|
#define STREAM_DURATION 10.0
|
|
#define STREAM_FRAME_RATE 25 /* 25 images/s */
|
|
#define STREAM_PIX_FMT AV_PIX_FMT_YUV420P /* default pix_fmt */
|
|
FfmpegEncoderV2::FfmpegEncoderV2(const enum AVCodecID &codecId, const AVPixelFormat &encodePixelFormat,
|
|
const int &width, const int &height)
|
|
: mCodecId(codecId), mCodecCtx(nullptr), mCodec(nullptr), mFrame(nullptr), mTmpFrame(nullptr), mTmpPkt(nullptr),
|
|
mSamplesCount(0), mSwrCtx(nullptr), next_pts(0), mVideoWidth(width), mVideoHeight(height),
|
|
mEncodePixelFormat(encodePixelFormat)
|
|
{
|
|
}
|
|
bool FfmpegEncoderV2::Init(const int &outputFlags)
|
|
{
|
|
mTmpPkt = av_packet_alloc();
|
|
if (!mTmpPkt) {
|
|
LogError("Could not allocate AVPacket\n");
|
|
return false;
|
|
}
|
|
LogInfo("find encoder : %s\n", avcodec_get_name(mCodecId));
|
|
int i = 0;
|
|
/* find the encoder */
|
|
mCodec = (AVCodec *)avcodec_find_encoder(mCodecId);
|
|
if (!mCodec) {
|
|
LogError("Could not find encoder for '%s'\n", avcodec_get_name(mCodecId));
|
|
return false;
|
|
}
|
|
mCodecCtx = avcodec_alloc_context3(mCodec);
|
|
if (!mCodecCtx) {
|
|
LogError("Could not alloc an encoding context\n");
|
|
return false;
|
|
}
|
|
const AVChannelLayout src = (AVChannelLayout)AV_CHANNEL_LAYOUT_MONO;
|
|
switch (mCodec->type) {
|
|
case AVMEDIA_TYPE_AUDIO:
|
|
mCodecCtx->sample_fmt = mCodec->sample_fmts ? mCodec->sample_fmts[0] : AV_SAMPLE_FMT_FLTP;
|
|
mCodecCtx->bit_rate = 64000;
|
|
// mCodecCtx->bit_rate = 24000;
|
|
mCodecCtx->sample_rate = 44100;
|
|
if (mCodec->supported_samplerates) {
|
|
mCodecCtx->sample_rate = mCodec->supported_samplerates[0];
|
|
for (i = 0; mCodec->supported_samplerates[i]; i++) {
|
|
if (mCodec->supported_samplerates[i] == 44100)
|
|
mCodecCtx->sample_rate = 44100;
|
|
}
|
|
}
|
|
mCodecCtx->sample_rate = 16000;
|
|
// mCodecCtx->time_base = (AVRational){1, mCodecCtx->sample_rate};
|
|
// mCodecCtx->ch_layout.nb_channels = 1;
|
|
// av_channel_layout_default(&mCodecCtx->ch_layout, 1);
|
|
av_channel_layout_copy(&mCodecCtx->ch_layout, &src);
|
|
break;
|
|
|
|
case AVMEDIA_TYPE_VIDEO:
|
|
mCodecCtx->codec_id = mCodecId;
|
|
|
|
mCodecCtx->bit_rate = 300000;
|
|
/* Resolution must be a multiple of two. */
|
|
mCodecCtx->width = mVideoWidth;
|
|
mCodecCtx->height = mVideoHeight;
|
|
/* timebase: This is the fundamental unit of time (in seconds) in terms
|
|
* of which frame timestamps are represented. For fixed-fps content,
|
|
* timebase should be 1/framerate and timestamp increments should be
|
|
* identical to 1. */
|
|
mCodecCtx->time_base = (AVRational){1, STREAM_FRAME_RATE};
|
|
|
|
mCodecCtx->gop_size = 12; /* emit one intra frame every twelve frames at most */
|
|
mCodecCtx->pix_fmt = mEncodePixelFormat;
|
|
if (mCodecCtx->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
|
|
/* just for testing, we also add B-frames */
|
|
mCodecCtx->max_b_frames = 2;
|
|
}
|
|
if (mCodecCtx->codec_id == AV_CODEC_ID_MPEG1VIDEO) {
|
|
/* Needed to avoid using macroblocks in which some coeffs overflow.
|
|
* This does not happen with normal video, it just happens here as
|
|
* the motion of the chroma plane does not match the luma plane. */
|
|
mCodecCtx->mb_decision = 2;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
/* Some formats want stream headers to be separate. */
|
|
if (outputFlags & AVFMT_GLOBALHEADER) {
|
|
mCodecCtx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
|
|
}
|
|
return true;
|
|
}
|
|
void FfmpegEncoderV2::UnInit(void)
|
|
{
|
|
if (mFrame) {
|
|
av_frame_free(&mFrame);
|
|
mFrame = nullptr;
|
|
}
|
|
if (mTmpFrame) {
|
|
av_frame_free(&mTmpFrame);
|
|
mTmpFrame = nullptr;
|
|
}
|
|
if (mCodecCtx) {
|
|
avcodec_free_context(&mCodecCtx);
|
|
mCodecCtx = nullptr;
|
|
}
|
|
av_packet_free(&mTmpPkt);
|
|
swr_free(&mSwrCtx);
|
|
}
|
|
AVRational FfmpegEncoderV2::GetTimeBase(void)
|
|
{
|
|
switch (mCodec->type) {
|
|
case AVMEDIA_TYPE_AUDIO:
|
|
return (AVRational){1, mCodecCtx->sample_rate};
|
|
|
|
case AVMEDIA_TYPE_VIDEO:
|
|
return mCodecCtx->time_base;
|
|
|
|
default:
|
|
LogError("Unsupported media type.\n");
|
|
return (AVRational){0, -1};
|
|
}
|
|
}
|
|
bool FfmpegEncoderV2::OpenEncoder(AVDictionary *optArg, AVStream *stream)
|
|
{
|
|
switch (mCodec->type) {
|
|
case AVMEDIA_TYPE_AUDIO:
|
|
return OpenAudio(optArg, stream);
|
|
|
|
case AVMEDIA_TYPE_VIDEO:
|
|
return OpenVideo(optArg, stream);
|
|
|
|
default:
|
|
LogError("Unsupported media type.\n");
|
|
return false;
|
|
}
|
|
}
|
|
// static void save_code_stream_file(const void *data, const size_t &size)
|
|
// {
|
|
// char OutPath[16];
|
|
// const void *pData = data;
|
|
// FILE *file = NULL;
|
|
// LogInfo("save_code_stream_file: %d\n", size);
|
|
// sprintf(OutPath, "./test.jpg");
|
|
// file = fopen(OutPath, "a+");
|
|
|
|
// if (file) { // TODO: Don't open very time.
|
|
// fwrite(pData, 1, size, file);
|
|
// fflush(file);
|
|
// }
|
|
|
|
// if (file)
|
|
// fclose(file);
|
|
// }
|
|
int FfmpegEncoderV2::EncodeData(AVFrame *frame, AVStream *stream, std::function<void(AVPacket *pkt)> callback)
|
|
{
|
|
int ret = 0;
|
|
AVFrame *tmpFrame = frame;
|
|
if (AVMEDIA_TYPE_AUDIO == mCodec->type) {
|
|
tmpFrame = ConvertAudioFrame(frame, mSwrCtx);
|
|
}
|
|
if (!tmpFrame) {
|
|
LogError("Could not convert audio frame.\n");
|
|
return AVERROR_EXIT;
|
|
}
|
|
// send the frame to the encoder
|
|
ret = avcodec_send_frame(mCodecCtx, tmpFrame);
|
|
if (ret < 0) {
|
|
char error_str[AV_ERROR_MAX_STRING_SIZE] = {0};
|
|
LogInfo("Error sending a frame to the encoder: %s\n",
|
|
av_make_error_string(error_str, AV_ERROR_MAX_STRING_SIZE, ret));
|
|
return AVERROR_EXIT;
|
|
}
|
|
|
|
while (ret >= 0) {
|
|
ret = avcodec_receive_packet(mCodecCtx, mTmpPkt);
|
|
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
|
|
break;
|
|
}
|
|
if (ret < 0) {
|
|
char error_str[AV_ERROR_MAX_STRING_SIZE] = {0};
|
|
LogInfo("Error encoding a frame: %s\n", av_make_error_string(error_str, AV_ERROR_MAX_STRING_SIZE, ret));
|
|
return AVERROR_EXIT;
|
|
}
|
|
|
|
/* rescale output packet timestamp values from codec to stream timebase */
|
|
av_packet_rescale_ts(mTmpPkt, mCodecCtx->time_base, stream->time_base);
|
|
// LogInfo("Write mCodecCtx->time_base.num: %d\n", mCodecCtx->time_base.num);
|
|
// LogInfo("Write mCodecCtx->time_base.den: %d\n", mCodecCtx->time_base.den);
|
|
// LogInfo("Write stream->time_base.num: %d\n", stream->time_base.num);
|
|
// LogInfo("Write stream->time_base.den: %d\n", stream->time_base.den);
|
|
mTmpPkt->stream_index = stream->index;
|
|
// LogInfo(" Write frame mTmpPkt->pts: %llu\n", mTmpPkt->pts);
|
|
|
|
if (callback) {
|
|
// if (mCodecId == AV_CODEC_ID_MJPEG) {
|
|
// save_code_stream_file(mTmpPkt->data, mTmpPkt->size);
|
|
// }
|
|
callback(mTmpPkt);
|
|
}
|
|
}
|
|
|
|
return ret == AVERROR_EOF ? 1 : 0;
|
|
}
|
|
bool FfmpegEncoderV2::OpenVideo(AVDictionary *optArg, AVStream *stream)
|
|
{
|
|
int ret = 0;
|
|
AVDictionary *opt = nullptr;
|
|
av_dict_copy(&opt, optArg, 0);
|
|
// av_dict_set(&opt, "strict_std_compliance", "experimental", 0);
|
|
av_opt_set(mCodecCtx, "strict", "unofficial", 0); // Add for jpeg
|
|
/* open the codec */
|
|
ret = avcodec_open2(mCodecCtx, mCodec, &opt);
|
|
av_dict_free(&opt);
|
|
if (ret < 0) {
|
|
char error_str[AV_ERROR_MAX_STRING_SIZE] = {0};
|
|
LogError("Could not open video codec: %s\n", av_make_error_string(error_str, AV_ERROR_MAX_STRING_SIZE, ret));
|
|
return false;
|
|
}
|
|
/* allocate and init a re-usable frame */
|
|
mFrame = alloc_frame(mCodecCtx->pix_fmt, mCodecCtx->width, mCodecCtx->height);
|
|
if (!mFrame) {
|
|
LogError("Could not allocate video frame\n");
|
|
return false;
|
|
}
|
|
if (mCodecCtx->pix_fmt != AV_PIX_FMT_YUV420P) {
|
|
mTmpFrame = alloc_frame(AV_PIX_FMT_YUV420P, mCodecCtx->width, mCodecCtx->height);
|
|
if (!mTmpFrame) {
|
|
LogError("Could not allocate temporary video frame\n");
|
|
return false;
|
|
}
|
|
}
|
|
/* copy the stream parameters to the muxer */
|
|
ret = avcodec_parameters_from_context(stream->codecpar, mCodecCtx);
|
|
if (ret < 0) {
|
|
LogError("Could not copy the stream parameters\n");
|
|
return false;
|
|
}
|
|
LogInfo(" Open video success, mCodecCtx->pix_fmt = %d\n", mCodecCtx->pix_fmt);
|
|
return true;
|
|
}
|
|
bool FfmpegEncoderV2::OpenAudio(AVDictionary *optArg, AVStream *stream)
|
|
{
|
|
int nb_samples = 0;
|
|
int ret = 0;
|
|
AVDictionary *opt = nullptr;
|
|
av_dict_copy(&opt, optArg, 0);
|
|
/* open it */
|
|
ret = avcodec_open2(mCodecCtx, mCodec, &opt);
|
|
av_dict_free(&opt);
|
|
if (ret < 0) {
|
|
char error_str[AV_ERROR_MAX_STRING_SIZE] = {0};
|
|
LogError("Could not open audio codec: %s\n", av_make_error_string(error_str, AV_ERROR_MAX_STRING_SIZE, ret));
|
|
return false;
|
|
}
|
|
if (mCodecCtx->codec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE)
|
|
nb_samples = 10000;
|
|
else
|
|
nb_samples = mCodecCtx->frame_size;
|
|
mFrame = alloc_audio_frame(mCodecCtx->sample_fmt, &mCodecCtx->ch_layout, mCodecCtx->sample_rate, nb_samples);
|
|
// mTmpFrame = alloc_audio_frame(AV_SAMPLE_FMT_S16, &mCodecCtx->ch_layout, mCodecCtx->sample_rate, nb_samples);
|
|
/* copy the stream parameters to the muxer */
|
|
ret = avcodec_parameters_from_context(stream->codecpar, mCodecCtx);
|
|
if (ret < 0) {
|
|
LogError("Could not copy the stream parameters\n");
|
|
return false;
|
|
}
|
|
/* create resampler context */
|
|
mSwrCtx = swr_alloc();
|
|
if (!mSwrCtx) {
|
|
LogError("Could not allocate resampler context\n");
|
|
return false;
|
|
}
|
|
const AVChannelLayout src = (AVChannelLayout)AV_CHANNEL_LAYOUT_MONO;
|
|
AVChannelLayout ch_layout;
|
|
av_channel_layout_copy(&ch_layout, &src);
|
|
/* set options */
|
|
av_opt_set_chlayout(mSwrCtx, "in_chlayout", &ch_layout, 0);
|
|
// av_opt_set_chlayout(mSwrCtx, "in_chlayout", &mCodecCtx->ch_layout, 0);
|
|
av_opt_set_int(mSwrCtx, "in_sample_rate", SOURCE_AUDIO_SAMPEL_RATE, 0);
|
|
av_opt_set_sample_fmt(mSwrCtx, "in_sample_fmt", AV_SAMPLE_FMT_S16, 0);
|
|
av_opt_set_chlayout(mSwrCtx, "out_chlayout", &mCodecCtx->ch_layout, 0);
|
|
av_opt_set_int(mSwrCtx, "out_sample_rate", mCodecCtx->sample_rate, 0);
|
|
av_opt_set_sample_fmt(mSwrCtx, "out_sample_fmt", mCodecCtx->sample_fmt, 0);
|
|
/* initialize the resampling context */
|
|
if ((ret = swr_init(mSwrCtx)) < 0) {
|
|
LogError("Failed to initialize the resampling context\n");
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
AVFrame *FfmpegEncoderV2::ConvertAudioFrame(AVFrame *decodeFrame, struct SwrContext *swr_ctx)
|
|
{
|
|
if (nullptr == decodeFrame) {
|
|
LogError("decodeFrame is null\n");
|
|
return nullptr;
|
|
}
|
|
// LogInfo("decodeFrame->pts = %d\n", decodeFrame->pts);
|
|
// decodeFrame->pts = next_pts;
|
|
// next_pts += decodeFrame->nb_samples;
|
|
int ret = 0;
|
|
int dst_nb_samples = 0;
|
|
/* convert samples from native format to destination codec format, using the resampler */
|
|
/* compute destination number of samples */
|
|
dst_nb_samples = av_rescale_rnd(swr_get_delay(swr_ctx, mCodecCtx->sample_rate) + decodeFrame->nb_samples,
|
|
mCodecCtx->sample_rate,
|
|
SOURCE_AUDIO_SAMPEL_RATE,
|
|
AV_ROUND_UP);
|
|
// av_assert0(dst_nb_samples == decodeFrame->nb_samples);
|
|
|
|
/* when we pass a frame to the encoder, it may keep a reference to it
|
|
* internally;
|
|
* make sure we do not overwrite it here
|
|
*/
|
|
ret = av_frame_make_writable(mFrame);
|
|
if (ret < 0) {
|
|
LogError("av_frame_make_writable failed\n");
|
|
return nullptr;
|
|
}
|
|
|
|
/* convert to destination format */
|
|
ret = swr_convert(
|
|
swr_ctx, mFrame->data, dst_nb_samples, (const uint8_t **)decodeFrame->data, decodeFrame->nb_samples);
|
|
if (ret < 0) {
|
|
LogError("Error while converting\n");
|
|
return nullptr;
|
|
}
|
|
// LogInfo("mCodecCtx->time_base.num = %d, mCodecCtx->time_base.den=%d\n",
|
|
// mCodecCtx->time_base.num,
|
|
// mCodecCtx->time_base.den);
|
|
mFrame->pts = av_rescale_q(decodeFrame->pts, (AVRational){1, 1000000}, mCodecCtx->time_base);
|
|
// LogInfo("decodeFrame->pts = %d\n", decodeFrame->pts);
|
|
// LogInfo("mFrame->pts = %d\n", mFrame->pts);
|
|
mSamplesCount += dst_nb_samples;
|
|
return mFrame;
|
|
}
|
|
AVFrame *FfmpegEncoderV2::alloc_frame(enum AVPixelFormat pix_fmt, int width, int height)
|
|
{
|
|
AVFrame *frame;
|
|
int ret;
|
|
|
|
frame = av_frame_alloc();
|
|
if (!frame)
|
|
return nullptr;
|
|
|
|
frame->format = pix_fmt;
|
|
frame->width = width;
|
|
frame->height = height;
|
|
|
|
/* allocate the buffers for the frame data */
|
|
ret = av_frame_get_buffer(frame, 0);
|
|
if (ret < 0) {
|
|
LogInfo("Could not allocate frame data.\n");
|
|
return nullptr;
|
|
}
|
|
|
|
return frame;
|
|
}
|
|
AVFrame *FfmpegEncoderV2::alloc_audio_frame(enum AVSampleFormat sample_fmt, const AVChannelLayout *channel_layout,
|
|
int sample_rate, int nb_samples)
|
|
{
|
|
AVFrame *frame = av_frame_alloc();
|
|
if (!frame) {
|
|
LogError("Error allocating an audio frame\n");
|
|
return nullptr;
|
|
}
|
|
|
|
frame->format = sample_fmt;
|
|
av_channel_layout_copy(&frame->ch_layout, channel_layout);
|
|
frame->sample_rate = sample_rate;
|
|
frame->nb_samples = nb_samples;
|
|
|
|
if (nb_samples) {
|
|
if (av_frame_get_buffer(frame, 0) < 0) {
|
|
LogError("Error allocating an audio buffer\n");
|
|
return nullptr;
|
|
}
|
|
}
|
|
|
|
return frame;
|
|
} |