ffmpeg mux mp4 with h264 & g711a.

This commit is contained in:
Fancy code 2024-06-29 20:03:12 +08:00
parent 00571f6917
commit abc8fb252a
9 changed files with 135 additions and 96 deletions

View File

@ -40,6 +40,7 @@ FfmpegDecoder::FfmpegDecoder(const enum AVCodecID &codecId)
bool FfmpegDecoder::Init(void)
{
int ret = 0;
LogInfo("find decoder : %s\n", avcodec_get_name(mCodecId));
mCodec = (AVCodec *)avcodec_find_decoder(mCodecId);
// mCodec = (AVCodec *)avcodec_find_decoder_by_name("libfdk_aac");
if (!(mCodec)) {
@ -54,8 +55,9 @@ bool FfmpegDecoder::Init(void)
if (AVMEDIA_TYPE_AUDIO == mCodec->type) {
LogInfo("Audio decoder.\n");
/* put sample parameters */
mCodecCtx->bit_rate = 352800;
mCodecCtx->sample_rate = 8000;
mCodecCtx->bit_rate = 64000;
// mCodecCtx->bit_rate = 352800;
// mCodecCtx->sample_rate = 8000;
/* check that the encoder supports s16 pcm input */
mCodecCtx->sample_fmt = AV_SAMPLE_FMT_S16;
@ -82,6 +84,15 @@ bool FfmpegDecoder::Init(void)
LogError("Could not allocate video frame\n");
return false;
}
if (AVMEDIA_TYPE_AUDIO == mCodec->type) {
mFrame->nb_samples = mCodecCtx->frame_size;
mFrame->format = mCodecCtx->sample_fmt;
ret = av_channel_layout_copy(&(mFrame->ch_layout), &(mCodecCtx->ch_layout));
if (ret < 0) {
LogError("Could not copy channel layout\n");
return false;
}
}
return true;
}
bool FfmpegDecoder::UnInit(void)
@ -104,7 +115,7 @@ void FfmpegDecoder::DecodeData(const void *data, const size_t &size, std::functi
packet->size = size;
int ret = avcodec_send_packet(mCodecCtx, packet);
if (ret < 0) {
LogInfo("Error sending a packet for decoding\n");
LogError("Error sending a packet for decoding\n");
av_packet_unref(packet);
av_packet_free(&packet);
return;
@ -115,7 +126,7 @@ void FfmpegDecoder::DecodeData(const void *data, const size_t &size, std::functi
break;
}
if (ret < 0) {
LogInfo("Error during decoding\n");
LogError("Error during decoding\n");
break;
}
if (callback) {
@ -179,42 +190,4 @@ int FfmpegDecoder::check_sample_fmt(const AVCodec *codec, enum AVSampleFormat sa
p++;
}
return 0;
}
// bool FfmpegDecoder::ConvertAudioFrame(AVFrame *decodeFrame, AVCodecContext *c, struct SwrContext *swr_ctx)
// {
// if (nullptr == decodeFrame) {
// LogError("decodeFrame is null\n");
// return false;
// }
// int ret = 0;
// int dst_nb_samples = 0;
// /* convert samples from native format to destination codec format, using the resampler */
// /* compute destination number of samples */
// dst_nb_samples = av_rescale_rnd(
// swr_get_delay(swr_ctx, c->sample_rate) + decodeFrame->nb_samples, c->sample_rate, c->sample_rate,
// AV_ROUND_UP);
// av_assert0(dst_nb_samples == decodeFrame->nb_samples);
// /* when we pass a frame to the encoder, it may keep a reference to it
// * internally;
// * make sure we do not overwrite it here
// */
// ret = av_frame_make_writable(ost->frame);
// if (ret < 0) {
// LogError("av_frame_make_writable failed\n");
// return false;
// }
// /* convert to destination format */
// ret = swr_convert(
// swr_ctx, ost->frame->data, dst_nb_samples, (const uint8_t **)decodeFrame->data, decodeFrame->nb_samples);
// if (ret < 0) {
// LogError("Error while converting\n");
// return false;
// }
// decodeFrame = ost->frame;
// decodeFrame->pts = av_rescale_q(ost->samples_count, (AVRational){1, c->sample_rate}, c->time_base);
// ost->samples_count += dst_nb_samples;
// return true;
// }
}

View File

@ -46,7 +46,6 @@ private:
static int select_sample_rate(const AVCodec *codec);
static int select_channel_layout(const AVCodec *codec, AVChannelLayout *dst);
static int check_sample_fmt(const AVCodec *codec, enum AVSampleFormat sample_fmt);
// static bool ConvertAudioFrame(AVFrame *decodeFrame, AVCodecContext *c, struct SwrContext *swr_ctx);
private:
const enum AVCodecID mCodecId;

View File

@ -22,11 +22,13 @@ extern "C" {
#include <libavcodec/codec_id.h>
#include <libavcodec/packet.h>
#include <libavformat/avformat.h>
#include <libavutil/avassert.h>
#include <libavutil/avutil.h>
#include <libavutil/channel_layout.h>
#include <libavutil/dict.h>
#include <libavutil/error.h>
#include <libavutil/frame.h>
#include <libavutil/mathematics.h>
#include <libavutil/opt.h>
#include <libavutil/pixfmt.h>
#include <libavutil/samplefmt.h>
@ -34,13 +36,15 @@ extern "C" {
#ifdef __cplusplus
}
#endif
#include <cstdint>
#include <errno.h>
#include <functional>
#define STREAM_DURATION 10.0
#define STREAM_FRAME_RATE 25 /* 25 images/s */
#define STREAM_PIX_FMT AV_PIX_FMT_YUV420P /* default pix_fmt */
FfmpegEncoder::FfmpegEncoder(const enum AVCodecID &codecId)
: mCodecId(codecId), mCodecCtx(nullptr), mCodec(nullptr), mFrame(nullptr), mTmpFrame(nullptr), mTmpPkt(nullptr)
: mCodecId(codecId), mCodecCtx(nullptr), mCodec(nullptr), mFrame(nullptr), mTmpFrame(nullptr), mTmpPkt(nullptr),
mSamplesCount(0), mSwrCtx(nullptr), next_pts(0)
{
}
bool FfmpegEncoder::Init(int &outputFlags)
@ -50,6 +54,7 @@ bool FfmpegEncoder::Init(int &outputFlags)
LogError("Could not allocate AVPacket\n");
return false;
}
LogInfo("find encoder : %s\n", avcodec_get_name(mCodecId));
int i = 0;
/* find the encoder */
mCodec = (AVCodec *)avcodec_find_encoder(mCodecId);
@ -75,6 +80,7 @@ bool FfmpegEncoder::Init(int &outputFlags)
mCodecCtx->sample_rate = 44100;
}
}
mCodecCtx->sample_rate = 8000;
av_channel_layout_copy(&mCodecCtx->ch_layout, &src);
// st->time_base = (AVRational){1, mCodecCtx->sample_rate};
break;
@ -84,8 +90,8 @@ bool FfmpegEncoder::Init(int &outputFlags)
mCodecCtx->bit_rate = 400000;
/* Resolution must be a multiple of two. */
mCodecCtx->width = 352;
mCodecCtx->height = 288;
mCodecCtx->width = 1920;
mCodecCtx->height = 2160;
/* timebase: This is the fundamental unit of time (in seconds) in terms
* of which frame timestamps are represented. For fixed-fps content,
* timebase should be 1/framerate and timestamp increments should be
@ -131,6 +137,7 @@ void FfmpegEncoder::UnInit(void)
mCodecCtx = nullptr;
}
av_packet_free(&mTmpPkt);
swr_free(&mSwrCtx);
}
AVRational FfmpegEncoder::GetTimeBase(void)
{
@ -146,11 +153,11 @@ AVRational FfmpegEncoder::GetTimeBase(void)
return (AVRational){0, -1};
}
}
bool FfmpegEncoder::OpenEncoder(AVDictionary *optArg, AVStream *stream, struct SwrContext *swr_ctx)
bool FfmpegEncoder::OpenEncoder(AVDictionary *optArg, AVStream *stream)
{
switch (mCodec->type) {
case AVMEDIA_TYPE_AUDIO:
return OpenAudio(optArg, stream, swr_ctx);
return OpenAudio(optArg, stream);
case AVMEDIA_TYPE_VIDEO:
return OpenVideo(optArg, stream);
@ -162,9 +169,17 @@ bool FfmpegEncoder::OpenEncoder(AVDictionary *optArg, AVStream *stream, struct S
}
int FfmpegEncoder::EncodeData(AVFrame *frame, AVStream *stream, std::function<void(AVPacket *pkt)> callback)
{
int ret;
int ret = 0;
AVFrame *tmpFrame = frame;
if (AVMEDIA_TYPE_AUDIO == mCodec->type) {
tmpFrame = ConvertAudioFrame(frame, mSwrCtx);
}
if (!tmpFrame) {
LogError("Could not convert audio frame.\n");
return AVERROR_EXIT;
}
// send the frame to the encoder
ret = avcodec_send_frame(mCodecCtx, frame);
ret = avcodec_send_frame(mCodecCtx, tmpFrame);
if (ret < 0) {
char error_str[AV_ERROR_MAX_STRING_SIZE] = {0};
LogInfo("Error sending a frame to the encoder: %s\n",
@ -238,7 +253,7 @@ bool FfmpegEncoder::OpenVideo(AVDictionary *optArg, AVStream *stream)
}
return true;
}
bool FfmpegEncoder::OpenAudio(AVDictionary *optArg, AVStream *stream, struct SwrContext *swr_ctx)
bool FfmpegEncoder::OpenAudio(AVDictionary *optArg, AVStream *stream)
{
int nb_samples = 0;
int ret = 0;
@ -264,15 +279,66 @@ bool FfmpegEncoder::OpenAudio(AVDictionary *optArg, AVStream *stream, struct Swr
LogError("Could not copy the stream parameters\n");
return false;
}
/* create resampler context */
mSwrCtx = swr_alloc();
if (!mSwrCtx) {
LogError("Could not allocate resampler context\n");
return false;
}
/* set options */
av_opt_set_chlayout(swr_ctx, "in_chlayout", &mCodecCtx->ch_layout, 0);
av_opt_set_int(swr_ctx, "in_sample_rate", mCodecCtx->sample_rate, 0);
av_opt_set_sample_fmt(swr_ctx, "in_sample_fmt", AV_SAMPLE_FMT_S16, 0);
av_opt_set_chlayout(swr_ctx, "out_chlayout", &mCodecCtx->ch_layout, 0);
av_opt_set_int(swr_ctx, "out_sample_rate", mCodecCtx->sample_rate, 0);
av_opt_set_sample_fmt(swr_ctx, "out_sample_fmt", mCodecCtx->sample_fmt, 0);
av_opt_set_chlayout(mSwrCtx, "in_chlayout", &mCodecCtx->ch_layout, 0);
av_opt_set_int(mSwrCtx, "in_sample_rate", mCodecCtx->sample_rate, 0);
av_opt_set_sample_fmt(mSwrCtx, "in_sample_fmt", AV_SAMPLE_FMT_S16, 0);
av_opt_set_chlayout(mSwrCtx, "out_chlayout", &mCodecCtx->ch_layout, 0);
av_opt_set_int(mSwrCtx, "out_sample_rate", mCodecCtx->sample_rate, 0);
av_opt_set_sample_fmt(mSwrCtx, "out_sample_fmt", mCodecCtx->sample_fmt, 0);
/* initialize the resampling context */
if ((ret = swr_init(mSwrCtx)) < 0) {
LogError("Failed to initialize the resampling context\n");
return false;
}
return true;
}
AVFrame *FfmpegEncoder::ConvertAudioFrame(AVFrame *decodeFrame, struct SwrContext *swr_ctx)
{
if (nullptr == decodeFrame) {
LogError("decodeFrame is null\n");
return nullptr;
}
decodeFrame->pts = next_pts;
next_pts += decodeFrame->nb_samples;
int ret = 0;
int dst_nb_samples = 0;
/* convert samples from native format to destination codec format, using the resampler */
/* compute destination number of samples */
dst_nb_samples = av_rescale_rnd(swr_get_delay(swr_ctx, mCodecCtx->sample_rate) + decodeFrame->nb_samples,
mCodecCtx->sample_rate,
mCodecCtx->sample_rate,
AV_ROUND_UP);
av_assert0(dst_nb_samples == decodeFrame->nb_samples);
/* when we pass a frame to the encoder, it may keep a reference to it
* internally;
* make sure we do not overwrite it here
*/
ret = av_frame_make_writable(mFrame);
if (ret < 0) {
LogError("av_frame_make_writable failed\n");
return nullptr;
}
/* convert to destination format */
ret = swr_convert(
swr_ctx, mFrame->data, dst_nb_samples, (const uint8_t **)decodeFrame->data, decodeFrame->nb_samples);
if (ret < 0) {
LogError("Error while converting\n");
return nullptr;
}
mFrame->pts = av_rescale_q(mSamplesCount, (AVRational){1, mCodecCtx->sample_rate}, mCodecCtx->time_base);
mSamplesCount += dst_nb_samples;
return mFrame;
}
AVFrame *FfmpegEncoder::alloc_frame(enum AVPixelFormat pix_fmt, int width, int height)
{
AVFrame *frame;

View File

@ -41,12 +41,13 @@ public:
bool Init(int &outputFlags);
void UnInit(void);
AVRational GetTimeBase(void);
bool OpenEncoder(AVDictionary *optArg, AVStream *stream, struct SwrContext *swr_ctx);
bool OpenEncoder(AVDictionary *optArg, AVStream *stream);
int EncodeData(AVFrame *frame, AVStream *stream, std::function<void(AVPacket *pkt)> callback);
private:
bool OpenVideo(AVDictionary *optArg, AVStream *stream);
bool OpenAudio(AVDictionary *optArg, AVStream *stream, struct SwrContext *swr_ctx);
bool OpenAudio(AVDictionary *optArg, AVStream *stream);
AVFrame *ConvertAudioFrame(AVFrame *decodeFrame, struct SwrContext *swr_ctx);
private:
static AVFrame *alloc_frame(enum AVPixelFormat pix_fmt, int width, int height);
@ -60,5 +61,8 @@ private:
AVFrame *mFrame;
AVFrame *mTmpFrame;
AVPacket *mTmpPkt;
int mSamplesCount;
struct SwrContext *mSwrCtx;
int64_t next_pts;
};
#endif

View File

@ -32,6 +32,7 @@ extern "C" {
#include <cstddef>
#include <functional>
#include <memory>
// #include <mutex>
#include <string>
FfmpegMuxStreamV2::FfmpegMuxStreamV2() : mOutputFormat(nullptr), mOptions(nullptr)
{
@ -45,8 +46,12 @@ StatusCode FfmpegMuxStreamV2::CloseOutputFile(void)
if (mOutputFormat && mOutputFormat->pb) {
av_write_trailer(mOutputFormat);
}
mVideoStream->UnInit();
mAudioStream->UnInit();
if (mVideoStream) {
mVideoStream->UnInit();
}
if (mAudioStream) {
mAudioStream->UnInit();
}
if (nullptr == mOutputFormat) {
return CreateStatusCode(STATUS_CODE_OK);
}
@ -59,12 +64,10 @@ StatusCode FfmpegMuxStreamV2::CloseOutputFile(void)
}
void FfmpegMuxStreamV2::GetStreamData(const void *data, const size_t &size, const StreamInfo &streamInfo)
{
if (streamInfo.mType == STREAM_TYPE_VIDEO_H264) {
// GetVideoStream(data, size, streamInfo);
if (streamInfo.mType == STREAM_TYPE_VIDEO_H264 && mVideoStream) {
mVideoStream->WriteSourceData(data, size);
}
if (streamInfo.mType == STREAM_TYPE_AUDIO_G711A) {
// GetAudioStream(data, size, streamInfo);
if (streamInfo.mType == STREAM_TYPE_AUDIO_G711A && mAudioStream) {
mAudioStream->WriteSourceData(data, size);
}
}
@ -81,12 +84,12 @@ StatusCode FfmpegMuxStreamV2::OpenMuxOutputFile(const std::string &fileName)
/* Add the audio and video streams using the default format codecs
* and initialize the codecs. */
if (mOutputFormat->oformat->video_codec != AV_CODEC_ID_NONE) {
mVideoStream = AddStream(mOutputFormat, mOutputFormat->oformat->video_codec);
mVideoStream = AddStream(mOutputFormat, mOutputFormat->oformat->video_codec, AV_CODEC_ID_H264);
mVideoStream->SetWriteSourceDataCallback(
std::bind(&FfmpegMuxStreamV2::GetAVPacketDataCallback, this, std::placeholders::_1));
}
if (mOutputFormat->oformat->audio_codec != AV_CODEC_ID_NONE) {
mAudioStream = AddStream(mOutputFormat, mOutputFormat->oformat->video_codec);
mAudioStream = AddStream(mOutputFormat, mOutputFormat->oformat->audio_codec, AV_CODEC_ID_PCM_ALAW);
mAudioStream->SetWriteSourceDataCallback(
std::bind(&FfmpegMuxStreamV2::GetAVPacketDataCallback, this, std::placeholders::_1));
}
@ -113,6 +116,7 @@ StatusCode FfmpegMuxStreamV2::OpenMuxOutputFile(const std::string &fileName)
}
void FfmpegMuxStreamV2::GetAVPacketDataCallback(AVPacket *pkt)
{
// std::lock_guard<std::mutex> locker(mMutex);
int ret = 0;
ret = av_interleaved_write_frame(mOutputFormat, pkt);
/* pkt is now blank (av_interleaved_write_frame() takes ownership of
@ -124,9 +128,10 @@ void FfmpegMuxStreamV2::GetAVPacketDataCallback(AVPacket *pkt)
av_make_error_string(error_str, AV_ERROR_MAX_STRING_SIZE, ret));
}
}
std::shared_ptr<FfmpegOutputStream> FfmpegMuxStreamV2::AddStream(AVFormatContext *outputFormat, enum AVCodecID codecId)
std::shared_ptr<FfmpegOutputStream> FfmpegMuxStreamV2::AddStream(AVFormatContext *outputFormat,
enum AVCodecID encodecId, enum AVCodecID decodecId)
{
auto stream = std::make_shared<FfmpegOutputStream>(codecId);
auto stream = std::make_shared<FfmpegOutputStream>(encodecId, decodecId);
stream->Init(outputFormat);
return stream;
}

View File

@ -36,6 +36,7 @@ extern "C" {
}
#endif
#include <memory>
#include <mutex>
#include <string>
class FfmpegMuxStreamV2 : virtual public FfmpegBase
{
@ -53,9 +54,11 @@ private:
void GetAVPacketDataCallback(AVPacket *pkt);
private:
static std::shared_ptr<FfmpegOutputStream> AddStream(AVFormatContext *outputFormat, enum AVCodecID codecId);
static std::shared_ptr<FfmpegOutputStream> AddStream(AVFormatContext *outputFormat, enum AVCodecID encodecId,
enum AVCodecID decodecId);
private:
std::mutex mMutex;
AVFormatContext *mOutputFormat;
std::shared_ptr<FfmpegOutputStream> mVideoStream;
std::shared_ptr<FfmpegOutputStream> mAudioStream;

View File

@ -13,6 +13,7 @@
* limitations under the License.
*/
#include "FfmpegOutputStream.h"
#include "FfmpegDecoder.h"
#include "FfmpegEncoder.h"
#include "ILog.h"
#ifdef __cplusplus
@ -22,54 +23,42 @@ extern "C" {
#include <libavcodec/packet.h>
#include <libavformat/avformat.h>
#include <libavutil/frame.h>
#include <libswresample/swresample.h>
#ifdef __cplusplus
}
#endif
#include <cstddef>
#include <functional>
#include <memory>
FfmpegOutputStream::FfmpegOutputStream(const AVCodecID &codecId)
: mCodecId(codecId), mTmpPkt(nullptr), mStream(nullptr), swr_ctx(nullptr)
FfmpegOutputStream::FfmpegOutputStream(const AVCodecID &encodecId, const AVCodecID &dncodecId)
: mEncodecId(encodecId), mDeccodecId(dncodecId), mTmpPkt(nullptr), mStream(nullptr)
{
}
bool FfmpegOutputStream::Init(AVFormatContext *outputFormat)
{
mDecodeCallback = std::bind(&FfmpegOutputStream::GetDecodeDataCallback, this, std::placeholders::_1);
// mEncodeCallback = std::bind(&FfmpegOutputStream::GetEncodeDataCallback, this, std::placeholders::_1);
int ret = 0;
mTmpPkt = av_packet_alloc();
if (!mTmpPkt) {
LogError("Could not allocate AVPacket\n");
return false;
}
/* create resampler context */
swr_ctx = swr_alloc();
if (!swr_ctx) {
LogError("Could not allocate resampler context\n");
return false;
}
mStream = avformat_new_stream(outputFormat, nullptr);
if (!mStream) {
LogError("Could not allocate stream\n");
return false;
}
mDecoder = std::make_shared<FfmpegDecoder>(mDeccodecId);
mDecoder->Init();
mStream->id = outputFormat->nb_streams - 1;
mEncoder = std::make_shared<FfmpegEncoder>(mCodecId);
mEncoder = std::make_shared<FfmpegEncoder>(mEncodecId);
mEncoder->Init(outputFormat->flags);
mEncoder->OpenEncoder(nullptr, mStream, swr_ctx);
/* initialize the resampling context */
if ((ret = swr_init(swr_ctx)) < 0) {
LogError("Failed to initialize the resampling context\n");
return false;
}
// mDecoder = std::make_shared<FfmpegDecoder>();
mStream->time_base = mEncoder->GetTimeBase();
mEncoder->OpenEncoder(nullptr, mStream);
return true;
}
void FfmpegOutputStream::UnInit(void)
{
mEncoder->UnInit();
swr_free(&swr_ctx);
mDecoder->UnInit();
av_packet_free(&mTmpPkt);
}
void FfmpegOutputStream::WriteSourceData(const void *data, const size_t &size)

View File

@ -39,7 +39,7 @@ extern "C" {
class FfmpegOutputStream
{
public:
FfmpegOutputStream(const AVCodecID &codecId);
FfmpegOutputStream(const AVCodecID &encodecId, const AVCodecID &dncodecId);
virtual ~FfmpegOutputStream() = default;
bool Init(AVFormatContext *outputFormat);
void UnInit(void);
@ -51,12 +51,12 @@ private:
void GetEncodeDataCallback(AVPacket *pkt);
private:
const AVCodecID mCodecId;
const AVCodecID mEncodecId;
const AVCodecID mDeccodecId;
AVPacket *mTmpPkt;
std::shared_ptr<FfmpegEncoder> mEncoder;
std::shared_ptr<FfmpegDecoder> mDecoder;
AVStream *mStream;
struct SwrContext *swr_ctx;
std::function<void(AVFrame *)> mDecodeCallback;
std::function<void(AVPacket *)> mEncodeCallback;
};

View File

@ -15,11 +15,11 @@
#ifndef MEDIA_BASE_IMPL_H
#define MEDIA_BASE_IMPL_H
#include "FfmpegBase.h"
#include "FfmpegMuxStream.h"
#include "FfmpegMuxStreamV2.h"
#include "FfmpegReadFile.h"
#include "IMediaBase.h"
#include <thread>
class MediaBaseImpl : public FfmpegReadFile, public FfmpegMuxStream
class MediaBaseImpl : public FfmpegReadFile, public FfmpegMuxStreamV2
{
public:
MediaBaseImpl(const MediaHandleType &type);