/* * Copyright (c) 2023 Fancy Code. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "FfmpegEncoderV2.h" #include "ILog.h" #ifdef __cplusplus extern "C" { #endif #include #include #include #include #include // #include #include #include #include #include #include #include #include #include #include #include #ifdef __cplusplus } #endif #include #include #include constexpr long SOURCE_AUDIO_SAMPEL_RATE = 8000; #define STREAM_DURATION 10.0 #define STREAM_FRAME_RATE 25 /* 25 images/s */ #define STREAM_PIX_FMT AV_PIX_FMT_YUV420P /* default pix_fmt */ FfmpegEncoderV2::FfmpegEncoderV2(const enum AVCodecID &codecId, const AVPixelFormat &encodePixelFormat, const int &width, const int &height) : mCodecId(codecId), mCodecCtx(nullptr), mCodec(nullptr), mFrame(nullptr), mTmpFrame(nullptr), mTmpPkt(nullptr), mSamplesCount(0), mSwrCtx(nullptr), next_pts(0), mVideoWidth(width), mVideoHeight(height), mEncodePixelFormat(encodePixelFormat) { } bool FfmpegEncoderV2::Init(const int &outputFlags) { mTmpPkt = av_packet_alloc(); if (!mTmpPkt) { LogError("Could not allocate AVPacket\n"); return false; } LogInfo("find encoder : %s\n", avcodec_get_name(mCodecId)); int i = 0; /* find the encoder */ mCodec = (AVCodec *)avcodec_find_encoder(mCodecId); if (!mCodec) { LogError("Could not find encoder for '%s'\n", avcodec_get_name(mCodecId)); return false; } mCodecCtx = avcodec_alloc_context3(mCodec); if (!mCodecCtx) { LogError("Could not alloc an encoding context\n"); return false; } const AVChannelLayout src = (AVChannelLayout)AV_CHANNEL_LAYOUT_MONO; switch (mCodec->type) { case AVMEDIA_TYPE_AUDIO: mCodecCtx->sample_fmt = mCodec->sample_fmts ? mCodec->sample_fmts[0] : AV_SAMPLE_FMT_FLTP; mCodecCtx->bit_rate = 64000; // mCodecCtx->bit_rate = 24000; mCodecCtx->sample_rate = 44100; if (mCodec->supported_samplerates) { mCodecCtx->sample_rate = mCodec->supported_samplerates[0]; for (i = 0; mCodec->supported_samplerates[i]; i++) { if (mCodec->supported_samplerates[i] == 44100) mCodecCtx->sample_rate = 44100; } } mCodecCtx->sample_rate = 16000; // mCodecCtx->time_base = (AVRational){1, mCodecCtx->sample_rate}; // mCodecCtx->ch_layout.nb_channels = 1; // av_channel_layout_default(&mCodecCtx->ch_layout, 1); av_channel_layout_copy(&mCodecCtx->ch_layout, &src); break; case AVMEDIA_TYPE_VIDEO: mCodecCtx->codec_id = mCodecId; mCodecCtx->bit_rate = 300000; /* Resolution must be a multiple of two. */ mCodecCtx->width = mVideoWidth; mCodecCtx->height = mVideoHeight; /* timebase: This is the fundamental unit of time (in seconds) in terms * of which frame timestamps are represented. For fixed-fps content, * timebase should be 1/framerate and timestamp increments should be * identical to 1. */ mCodecCtx->time_base = (AVRational){1, STREAM_FRAME_RATE}; mCodecCtx->gop_size = 12; /* emit one intra frame every twelve frames at most */ mCodecCtx->pix_fmt = mEncodePixelFormat; if (mCodecCtx->codec_id == AV_CODEC_ID_MPEG2VIDEO) { /* just for testing, we also add B-frames */ mCodecCtx->max_b_frames = 2; } if (mCodecCtx->codec_id == AV_CODEC_ID_MPEG1VIDEO) { /* Needed to avoid using macroblocks in which some coeffs overflow. * This does not happen with normal video, it just happens here as * the motion of the chroma plane does not match the luma plane. */ mCodecCtx->mb_decision = 2; } break; default: break; } /* Some formats want stream headers to be separate. */ if (outputFlags & AVFMT_GLOBALHEADER) { mCodecCtx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; } return true; } void FfmpegEncoderV2::UnInit(void) { if (mFrame) { av_frame_free(&mFrame); mFrame = nullptr; } if (mTmpFrame) { av_frame_free(&mTmpFrame); mTmpFrame = nullptr; } if (mCodecCtx) { avcodec_free_context(&mCodecCtx); mCodecCtx = nullptr; } av_packet_free(&mTmpPkt); swr_free(&mSwrCtx); } AVRational FfmpegEncoderV2::GetTimeBase(void) { switch (mCodec->type) { case AVMEDIA_TYPE_AUDIO: return (AVRational){1, mCodecCtx->sample_rate}; case AVMEDIA_TYPE_VIDEO: return mCodecCtx->time_base; default: LogError("Unsupported media type.\n"); return (AVRational){0, -1}; } } bool FfmpegEncoderV2::OpenEncoder(AVDictionary *optArg, AVStream *stream) { switch (mCodec->type) { case AVMEDIA_TYPE_AUDIO: return OpenAudio(optArg, stream); case AVMEDIA_TYPE_VIDEO: return OpenVideo(optArg, stream); default: LogError("Unsupported media type.\n"); return false; } } // static void save_code_stream_file(const void *data, const size_t &size) // { // char OutPath[16]; // const void *pData = data; // FILE *file = NULL; // LogInfo("save_code_stream_file: %d\n", size); // sprintf(OutPath, "./test.jpg"); // file = fopen(OutPath, "a+"); // if (file) { // TODO: Don't open very time. // fwrite(pData, 1, size, file); // fflush(file); // } // if (file) // fclose(file); // } int FfmpegEncoderV2::EncodeData(AVFrame *frame, AVStream *stream, std::function callback) { int ret = 0; AVFrame *tmpFrame = frame; if (AVMEDIA_TYPE_AUDIO == mCodec->type) { tmpFrame = ConvertAudioFrame(frame, mSwrCtx); } if (!tmpFrame) { LogError("Could not convert audio frame.\n"); return AVERROR_EXIT; } // send the frame to the encoder ret = avcodec_send_frame(mCodecCtx, tmpFrame); if (ret < 0) { char error_str[AV_ERROR_MAX_STRING_SIZE] = {0}; LogInfo("Error sending a frame to the encoder: %s\n", av_make_error_string(error_str, AV_ERROR_MAX_STRING_SIZE, ret)); return AVERROR_EXIT; } while (ret >= 0) { ret = avcodec_receive_packet(mCodecCtx, mTmpPkt); if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) { break; } if (ret < 0) { char error_str[AV_ERROR_MAX_STRING_SIZE] = {0}; LogInfo("Error encoding a frame: %s\n", av_make_error_string(error_str, AV_ERROR_MAX_STRING_SIZE, ret)); return AVERROR_EXIT; } /* rescale output packet timestamp values from codec to stream timebase */ av_packet_rescale_ts(mTmpPkt, mCodecCtx->time_base, stream->time_base); // LogInfo("Write mCodecCtx->time_base.num: %d\n", mCodecCtx->time_base.num); // LogInfo("Write mCodecCtx->time_base.den: %d\n", mCodecCtx->time_base.den); // LogInfo("Write stream->time_base.num: %d\n", stream->time_base.num); // LogInfo("Write stream->time_base.den: %d\n", stream->time_base.den); mTmpPkt->stream_index = stream->index; // LogInfo(" Write frame mTmpPkt->pts: %llu\n", mTmpPkt->pts); if (callback) { // if (mCodecId == AV_CODEC_ID_MJPEG) { // save_code_stream_file(mTmpPkt->data, mTmpPkt->size); // } callback(mTmpPkt); } } return ret == AVERROR_EOF ? 1 : 0; } bool FfmpegEncoderV2::OpenVideo(AVDictionary *optArg, AVStream *stream) { int ret = 0; AVDictionary *opt = nullptr; av_dict_copy(&opt, optArg, 0); // av_dict_set(&opt, "strict_std_compliance", "experimental", 0); av_opt_set(mCodecCtx, "strict", "unofficial", 0); // Add for jpeg /* open the codec */ ret = avcodec_open2(mCodecCtx, mCodec, &opt); av_dict_free(&opt); if (ret < 0) { char error_str[AV_ERROR_MAX_STRING_SIZE] = {0}; LogError("Could not open video codec: %s\n", av_make_error_string(error_str, AV_ERROR_MAX_STRING_SIZE, ret)); return false; } /* allocate and init a re-usable frame */ mFrame = alloc_frame(mCodecCtx->pix_fmt, mCodecCtx->width, mCodecCtx->height); if (!mFrame) { LogError("Could not allocate video frame\n"); return false; } if (mCodecCtx->pix_fmt != AV_PIX_FMT_YUV420P) { mTmpFrame = alloc_frame(AV_PIX_FMT_YUV420P, mCodecCtx->width, mCodecCtx->height); if (!mTmpFrame) { LogError("Could not allocate temporary video frame\n"); return false; } } /* copy the stream parameters to the muxer */ ret = avcodec_parameters_from_context(stream->codecpar, mCodecCtx); if (ret < 0) { LogError("Could not copy the stream parameters\n"); return false; } LogInfo(" Open video success, mCodecCtx->pix_fmt = %d\n", mCodecCtx->pix_fmt); return true; } bool FfmpegEncoderV2::OpenAudio(AVDictionary *optArg, AVStream *stream) { int nb_samples = 0; int ret = 0; AVDictionary *opt = nullptr; av_dict_copy(&opt, optArg, 0); /* open it */ ret = avcodec_open2(mCodecCtx, mCodec, &opt); av_dict_free(&opt); if (ret < 0) { char error_str[AV_ERROR_MAX_STRING_SIZE] = {0}; LogError("Could not open audio codec: %s\n", av_make_error_string(error_str, AV_ERROR_MAX_STRING_SIZE, ret)); return false; } if (mCodecCtx->codec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE) nb_samples = 10000; else nb_samples = mCodecCtx->frame_size; mFrame = alloc_audio_frame(mCodecCtx->sample_fmt, &mCodecCtx->ch_layout, mCodecCtx->sample_rate, nb_samples); // mTmpFrame = alloc_audio_frame(AV_SAMPLE_FMT_S16, &mCodecCtx->ch_layout, mCodecCtx->sample_rate, nb_samples); /* copy the stream parameters to the muxer */ ret = avcodec_parameters_from_context(stream->codecpar, mCodecCtx); if (ret < 0) { LogError("Could not copy the stream parameters\n"); return false; } /* create resampler context */ mSwrCtx = swr_alloc(); if (!mSwrCtx) { LogError("Could not allocate resampler context\n"); return false; } const AVChannelLayout src = (AVChannelLayout)AV_CHANNEL_LAYOUT_MONO; AVChannelLayout ch_layout; av_channel_layout_copy(&ch_layout, &src); /* set options */ av_opt_set_chlayout(mSwrCtx, "in_chlayout", &ch_layout, 0); // av_opt_set_chlayout(mSwrCtx, "in_chlayout", &mCodecCtx->ch_layout, 0); av_opt_set_int(mSwrCtx, "in_sample_rate", SOURCE_AUDIO_SAMPEL_RATE, 0); av_opt_set_sample_fmt(mSwrCtx, "in_sample_fmt", AV_SAMPLE_FMT_S16, 0); av_opt_set_chlayout(mSwrCtx, "out_chlayout", &mCodecCtx->ch_layout, 0); av_opt_set_int(mSwrCtx, "out_sample_rate", mCodecCtx->sample_rate, 0); av_opt_set_sample_fmt(mSwrCtx, "out_sample_fmt", mCodecCtx->sample_fmt, 0); /* initialize the resampling context */ if ((ret = swr_init(mSwrCtx)) < 0) { LogError("Failed to initialize the resampling context\n"); return false; } return true; } AVFrame *FfmpegEncoderV2::ConvertAudioFrame(AVFrame *decodeFrame, struct SwrContext *swr_ctx) { if (nullptr == decodeFrame) { LogError("decodeFrame is null\n"); return nullptr; } // LogInfo("decodeFrame->pts = %d\n", decodeFrame->pts); // decodeFrame->pts = next_pts; // next_pts += decodeFrame->nb_samples; int ret = 0; int dst_nb_samples = 0; /* convert samples from native format to destination codec format, using the resampler */ /* compute destination number of samples */ dst_nb_samples = av_rescale_rnd(swr_get_delay(swr_ctx, mCodecCtx->sample_rate) + decodeFrame->nb_samples, mCodecCtx->sample_rate, SOURCE_AUDIO_SAMPEL_RATE, AV_ROUND_UP); // av_assert0(dst_nb_samples == decodeFrame->nb_samples); /* when we pass a frame to the encoder, it may keep a reference to it * internally; * make sure we do not overwrite it here */ ret = av_frame_make_writable(mFrame); if (ret < 0) { LogError("av_frame_make_writable failed\n"); return nullptr; } /* convert to destination format */ ret = swr_convert( swr_ctx, mFrame->data, dst_nb_samples, (const uint8_t **)decodeFrame->data, decodeFrame->nb_samples); if (ret < 0) { LogError("Error while converting\n"); return nullptr; } // LogInfo("mCodecCtx->time_base.num = %d, mCodecCtx->time_base.den=%d\n", // mCodecCtx->time_base.num, // mCodecCtx->time_base.den); mFrame->pts = av_rescale_q(decodeFrame->pts, (AVRational){1, 1000000}, mCodecCtx->time_base); // LogInfo("decodeFrame->pts = %d\n", decodeFrame->pts); // LogInfo("mFrame->pts = %d\n", mFrame->pts); mSamplesCount += dst_nb_samples; return mFrame; } AVFrame *FfmpegEncoderV2::alloc_frame(enum AVPixelFormat pix_fmt, int width, int height) { AVFrame *frame; int ret; frame = av_frame_alloc(); if (!frame) return nullptr; frame->format = pix_fmt; frame->width = width; frame->height = height; /* allocate the buffers for the frame data */ ret = av_frame_get_buffer(frame, 0); if (ret < 0) { LogInfo("Could not allocate frame data.\n"); return nullptr; } return frame; } AVFrame *FfmpegEncoderV2::alloc_audio_frame(enum AVSampleFormat sample_fmt, const AVChannelLayout *channel_layout, int sample_rate, int nb_samples) { AVFrame *frame = av_frame_alloc(); if (!frame) { LogError("Error allocating an audio frame\n"); return nullptr; } frame->format = sample_fmt; av_channel_layout_copy(&frame->ch_layout, channel_layout); frame->sample_rate = sample_rate; frame->nb_samples = nb_samples; if (nb_samples) { if (av_frame_get_buffer(frame, 0) < 0) { LogError("Error allocating an audio buffer\n"); return nullptr; } } return frame; }