/* * Copyright (c) 2023 Fancy Code. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "FfmpegMuxStream.h" #include "ILog.h" #include "MediaBase.h" #include "StatusCode.h" #include #include #include #include #include #ifdef __cplusplus extern "C" { #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef __cplusplus } #endif #include #include #define STREAM_DURATION 10.0 #define STREAM_FRAME_RATE 25 /* 25 images/s */ #define STREAM_PIX_FMT AV_PIX_FMT_YUV420P /* default pix_fmt */ FfmpegMuxStream::FfmpegMuxStream() : mCodecVideo(nullptr), mCodecVideoContext(nullptr), mFrameVideo(nullptr), mCodecAudio(nullptr), mCodecAudioContext(nullptr), mFrameAudio(nullptr), mOc(nullptr) { memset(&mVideoSt, 0, sizeof(mVideoSt)); memset(&mAudioSt, 0, sizeof(mAudioSt)); } StatusCode FfmpegMuxStream::OpenOutputFile(const std::string &fileName) { if (!InitCodecVideo(AV_CODEC_ID_H264, &mCodecVideo, &mCodecVideoContext, &mFrameVideo)) { LogError("InitCodec failed\n"); return CreateStatusCode(STATUS_CODE_NOT_OK); } if (!InitCodecAudio(AV_CODEC_ID_PCM_ALAW, &mCodecAudio, &mCodecAudioContext, &mFrameAudio)) { LogError("InitCodec failed\n"); return CreateStatusCode(STATUS_CODE_NOT_OK); } int ret; AVFormatContext *oc = nullptr; int have_video = 0, have_audio = 0; int encode_video = 0, encode_audio = 0; const AVCodec *audio_codec, *video_codec; AVDictionary *opt = nullptr; avformat_alloc_output_context2(&oc, nullptr, "mp4", fileName.c_str()); if (!oc) { LogError("Could not deduce output format from file extension: using MPEG.\n"); return CreateStatusCode(STATUS_CODE_NOT_OK); } mOc = oc; const AVOutputFormat *fmt = oc->oformat; /* Add the audio and video streams using the default format codecs * and initialize the codecs. */ if (fmt->video_codec != AV_CODEC_ID_NONE) { LogInfo("Add video stream\n"); add_stream(&mVideoSt, oc, &video_codec, fmt->video_codec); have_video = 1; encode_video = 1; } if (fmt->audio_codec != AV_CODEC_ID_NONE) { LogInfo("Add audio stream\n"); add_stream(&mAudioSt, oc, &audio_codec, fmt->audio_codec); have_audio = 1; encode_audio = 1; } /* Now that all the parameters are set, we can open the audio and * video codecs and allocate the necessary encode buffers. */ if (have_video) { open_video(oc, video_codec, &mVideoSt, opt); } if (have_audio) { open_audio(oc, audio_codec, &mAudioSt, opt); } av_dump_format(oc, 0, fileName.c_str(), 1); if (!(fmt->flags & AVFMT_NOFILE)) { ret = avio_open(&oc->pb, fileName.c_str(), AVIO_FLAG_WRITE); if (ret < 0) { char error_str[AV_ERROR_MAX_STRING_SIZE] = {0}; LogInfo("Could not open '%s': %s\n", fileName.c_str(), av_make_error_string(error_str, AV_ERROR_MAX_STRING_SIZE, ret)); return CreateStatusCode(STATUS_CODE_NOT_OK); } } /* Write the stream header, if any. */ ret = avformat_write_header(oc, &opt); if (ret < 0) { char error_str[AV_ERROR_MAX_STRING_SIZE] = {0}; LogInfo("Error occurred when opening output file: %s\n", av_make_error_string(error_str, AV_ERROR_MAX_STRING_SIZE, ret)); return CreateStatusCode(STATUS_CODE_NOT_OK); } return CreateStatusCode(STATUS_CODE_OK); } StatusCode FfmpegMuxStream::CloseOutputFile(void) { av_write_trailer(mOc); if (mFrameVideo) { av_frame_free(&mFrameVideo); mFrameVideo = nullptr; } if (mFrameAudio) { av_frame_free(&mFrameAudio); mFrameAudio = nullptr; } if (mCodecVideoContext) { avcodec_free_context(&mCodecVideoContext); mCodecVideoContext = nullptr; } if (mCodecAudioContext) { avcodec_free_context(&mCodecAudioContext); mCodecAudioContext = nullptr; } close_stream(mOc, &mVideoSt); close_stream(mOc, &mAudioSt); memset(&mVideoSt, 0, sizeof(mVideoSt)); memset(&mAudioSt, 0, sizeof(mAudioSt)); if (!(mOc->oformat->flags & AVFMT_NOFILE)) { /* Close the output file. */ avio_closep(&mOc->pb); } avformat_free_context(mOc); return CreateStatusCode(STATUS_CODE_OK); } void FfmpegMuxStream::GetStreamData(const void *data, const size_t &size, const StreamInfo &streamInfo) { if (streamInfo.mType == STREAM_TYPE_VIDEO_H264) { GetVideoStream(data, size, streamInfo); } if (streamInfo.mType == STREAM_TYPE_AUDIO_G711A) { GetAudioStream(data, size, streamInfo); } } void FfmpegMuxStream::GetVideoStream(const void *data, const size_t &size, const StreamInfo &streamInfo) { AVPacket *packet = nullptr; packet = av_packet_alloc(); packet->data = (unsigned char *)data; packet->size = size; int ret = avcodec_send_packet(mCodecVideoContext, packet); if (ret < 0) { LogInfo("Error sending a packet for decoding\n"); av_packet_unref(packet); av_packet_free(&packet); return; } while (ret >= 0) { ret = avcodec_receive_frame(mCodecVideoContext, mFrameVideo); if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) { break; } if (ret < 0) { LogInfo("Error during decoding\n"); break; } write_frame(mOc, mVideoSt.enc, mVideoSt.st, mFrameVideo, mVideoSt.tmp_pkt); break; } av_packet_unref(packet); av_packet_free(&packet); } void FfmpegMuxStream::GetAudioStream(const void *data, const size_t &size, const StreamInfo &streamInfo) { AVPacket *packet = nullptr; packet = av_packet_alloc(); packet->data = (unsigned char *)data; packet->size = size; int ret = avcodec_send_packet(mCodecAudioContext, packet); if (ret < 0) { LogInfo("Error sending a packet for decoding\n"); av_packet_unref(packet); av_packet_free(&packet); return; } while (ret >= 0) { ret = avcodec_receive_frame(mCodecAudioContext, mFrameAudio); if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) { break; } if (ret < 0) { LogInfo("Error during decoding\n"); break; } mFrameAudio->pts = mAudioSt.next_pts; mAudioSt.next_pts += mFrameAudio->nb_samples; ConvertAudioFrame(mFrameVideo, mAudioSt.enc, &mAudioSt); write_frame(mOc, mAudioSt.enc, mAudioSt.st, mAudioSt.frame, mAudioSt.tmp_pkt); break; } av_packet_unref(packet); av_packet_free(&packet); } bool FfmpegMuxStream::add_stream(OutputStream *ost, AVFormatContext *oc, const AVCodec **codec, enum AVCodecID codec_id) { AVCodecContext *c; int i; /* find the encoder */ *codec = avcodec_find_encoder(codec_id); if (!(*codec)) { LogError("Could not find encoder for '%s'\n", avcodec_get_name(codec_id)); return false; } ost->tmp_pkt = av_packet_alloc(); if (!ost->tmp_pkt) { LogError("Could not allocate AVPacket\n"); return false; } ost->st = avformat_new_stream(oc, nullptr); if (!ost->st) { LogError("Could not allocate stream\n"); return false; } ost->st->id = oc->nb_streams - 1; c = avcodec_alloc_context3(*codec); if (!c) { LogError("Could not alloc an encoding context\n"); return false; } ost->enc = c; const AVChannelLayout src = (AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO; switch ((*codec)->type) { case AVMEDIA_TYPE_AUDIO: c->sample_fmt = (*codec)->sample_fmts ? (*codec)->sample_fmts[0] : AV_SAMPLE_FMT_FLTP; c->bit_rate = 64000; c->sample_rate = 8000; if ((*codec)->supported_samplerates) { c->sample_rate = (*codec)->supported_samplerates[0]; for (i = 0; (*codec)->supported_samplerates[i]; i++) { if ((*codec)->supported_samplerates[i] == 44100) c->sample_rate = 44100; } } av_channel_layout_copy(&c->ch_layout, &src); ost->st->time_base = (AVRational){1, c->sample_rate}; break; case AVMEDIA_TYPE_VIDEO: c->codec_id = codec_id; c->bit_rate = 400000; /* Resolution must be a multiple of two. */ c->width = 1920; c->height = 2160; /* timebase: This is the fundamental unit of time (in seconds) in terms * of which frame timestamps are represented. For fixed-fps content, * timebase should be 1/framerate and timestamp increments should be * identical to 1. */ ost->st->time_base = (AVRational){1, STREAM_FRAME_RATE}; c->time_base = ost->st->time_base; c->gop_size = 12; /* emit one intra frame every twelve frames at most */ c->pix_fmt = STREAM_PIX_FMT; if (c->codec_id == AV_CODEC_ID_MPEG2VIDEO) { /* just for testing, we also add B-frames */ c->max_b_frames = 2; } if (c->codec_id == AV_CODEC_ID_MPEG1VIDEO) { /* Needed to avoid using macroblocks in which some coeffs overflow. * This does not happen with normal video, it just happens here as * the motion of the chroma plane does not match the luma plane. */ c->mb_decision = 2; } break; default: break; } /* Some formats want stream headers to be separate. */ if (oc->oformat->flags & AVFMT_GLOBALHEADER) { c->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; } return true; } void FfmpegMuxStream::close_stream(AVFormatContext *oc, OutputStream *ost) { avcodec_free_context(&ost->enc); av_frame_free(&ost->frame); av_frame_free(&ost->tmp_frame); av_packet_free(&ost->tmp_pkt); sws_freeContext(ost->sws_ctx); swr_free(&ost->swr_ctx); } bool FfmpegMuxStream::open_video(AVFormatContext *oc, const AVCodec *codec, OutputStream *ost, AVDictionary *opt_arg) { int ret; AVCodecContext *c = ost->enc; AVDictionary *opt = nullptr; av_dict_copy(&opt, opt_arg, 0); /* open the codec */ ret = avcodec_open2(c, codec, &opt); av_dict_free(&opt); if (ret < 0) { char error_str[AV_ERROR_MAX_STRING_SIZE] = {0}; LogInfo("Could not open video codec: %s\n", av_make_error_string(error_str, AV_ERROR_MAX_STRING_SIZE, ret)); return false; } /* allocate and init a re-usable frame */ ost->frame = alloc_frame(c->pix_fmt, c->width, c->height); if (!ost->frame) { LogInfo("Could not allocate video frame\n"); return false; } /* If the output format is not YUV420P, then a temporary YUV420P * picture is needed too. It is then converted to the required * output format. */ ost->tmp_frame = nullptr; if (c->pix_fmt != AV_PIX_FMT_YUV420P) { ost->tmp_frame = alloc_frame(AV_PIX_FMT_YUV420P, c->width, c->height); if (!ost->tmp_frame) { LogInfo("Could not allocate temporary video frame\n"); return false; } } /* copy the stream parameters to the muxer */ ret = avcodec_parameters_from_context(ost->st->codecpar, c); if (ret < 0) { LogInfo("Could not copy the stream parameters\n"); return false; } return true; } bool FfmpegMuxStream::open_audio(AVFormatContext *oc, const AVCodec *codec, OutputStream *ost, AVDictionary *opt_arg) { AVCodecContext *c = nullptr; int nb_samples = 0; int ret = 0; AVDictionary *opt = nullptr; c = ost->enc; /* open it */ av_dict_copy(&opt, opt_arg, 0); ret = avcodec_open2(c, codec, &opt); av_dict_free(&opt); if (ret < 0) { char error_str[AV_ERROR_MAX_STRING_SIZE] = {0}; LogInfo("Could not open audio codec: %s\n", av_make_error_string(error_str, AV_ERROR_MAX_STRING_SIZE, ret)); return false; } /* init signal generator */ ost->t = 0; ost->tincr = 2 * M_PI * 110.0 / c->sample_rate; /* increment frequency by 110 Hz per second */ ost->tincr2 = 2 * M_PI * 110.0 / c->sample_rate / c->sample_rate; if (c->codec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE) nb_samples = 10000; else nb_samples = c->frame_size; ost->frame = alloc_audio_frame(c->sample_fmt, &c->ch_layout, c->sample_rate, nb_samples); ost->tmp_frame = alloc_audio_frame(AV_SAMPLE_FMT_S16, &c->ch_layout, c->sample_rate, nb_samples); /* copy the stream parameters to the muxer */ ret = avcodec_parameters_from_context(ost->st->codecpar, c); if (ret < 0) { LogInfo("Could not copy the stream parameters\n"); return false; } /* create resampler context */ ost->swr_ctx = swr_alloc(); if (!ost->swr_ctx) { LogInfo("Could not allocate resampler context\n"); return false; } /* set options */ av_opt_set_chlayout(ost->swr_ctx, "in_chlayout", &c->ch_layout, 0); av_opt_set_int(ost->swr_ctx, "in_sample_rate", c->sample_rate, 0); av_opt_set_sample_fmt(ost->swr_ctx, "in_sample_fmt", AV_SAMPLE_FMT_S16, 0); av_opt_set_chlayout(ost->swr_ctx, "out_chlayout", &c->ch_layout, 0); av_opt_set_int(ost->swr_ctx, "out_sample_rate", c->sample_rate, 0); av_opt_set_sample_fmt(ost->swr_ctx, "out_sample_fmt", c->sample_fmt, 0); /* initialize the resampling context */ if ((ret = swr_init(ost->swr_ctx)) < 0) { LogInfo("Failed to initialize the resampling context\n"); return false; } return true; } AVFrame *FfmpegMuxStream::alloc_audio_frame(enum AVSampleFormat sample_fmt, const AVChannelLayout *channel_layout, int sample_rate, int nb_samples) { AVFrame *frame = av_frame_alloc(); if (!frame) { LogInfo("Error allocating an audio frame\n"); return nullptr; } frame->format = sample_fmt; av_channel_layout_copy(&frame->ch_layout, channel_layout); frame->sample_rate = sample_rate; frame->nb_samples = nb_samples; if (nb_samples) { if (av_frame_get_buffer(frame, 0) < 0) { LogInfo("Error allocating an audio buffer\n"); return nullptr; } } return frame; } AVFrame *FfmpegMuxStream::alloc_frame(enum AVPixelFormat pix_fmt, int width, int height) { AVFrame *frame; int ret; frame = av_frame_alloc(); if (!frame) return nullptr; frame->format = pix_fmt; frame->width = width; frame->height = height; /* allocate the buffers for the frame data */ ret = av_frame_get_buffer(frame, 0); if (ret < 0) { LogInfo("Could not allocate frame data.\n"); return nullptr; } return frame; } bool FfmpegMuxStream::InitCodecVideo(enum AVCodecID codecId, AVCodec **codec, AVCodecContext **codec_ctx, AVFrame **frame) { int ret = 0; *codec = (AVCodec *)avcodec_find_decoder(codecId); if (!(*codec)) { LogError("Codec not found\n"); return false; } *codec_ctx = avcodec_alloc_context3((const AVCodec *)(*codec)); if (!(*codec_ctx)) { LogError("Could not allocate codec context\n"); return false; } if ((ret = avcodec_open2(*codec_ctx, *codec, nullptr)) < 0) { char error_str[AV_ERROR_MAX_STRING_SIZE] = {0}; LogError("Could not open codec:%s\n", av_make_error_string(error_str, AV_ERROR_MAX_STRING_SIZE, ret)); return false; } *frame = av_frame_alloc(); if (!frame) { LogError("Could not allocate video frame\n"); return false; } return true; } bool FfmpegMuxStream::InitCodecAudio(enum AVCodecID codecId, AVCodec **codec, AVCodecContext **codec_ctx, AVFrame **frame) { int ret = 0; *codec = (AVCodec *)avcodec_find_decoder(codecId); // *codec = (AVCodec *)avcodec_find_encoder_by_name("libfdk_aac"); if (!(*codec)) { LogError("Codec not found\n"); return false; } *codec_ctx = avcodec_alloc_context3((const AVCodec *)(*codec)); if (!(*codec_ctx)) { LogError("Could not allocate codec context\n"); return false; } /* put sample parameters */ (*codec_ctx)->bit_rate = 64000; /* check that the encoder supports s16 pcm input */ (*codec_ctx)->sample_fmt = AV_SAMPLE_FMT_S16; if (!check_sample_fmt((*codec), (*codec_ctx)->sample_fmt)) { fprintf(stderr, "Encoder does not support sample format %s", av_get_sample_fmt_name((*codec_ctx)->sample_fmt)); return false; } /* select other audio parameters supported by the encoder */ (*codec_ctx)->sample_rate = select_sample_rate((*codec)); ret = select_channel_layout((*codec), &((*codec_ctx)->ch_layout)); if (ret < 0) { LogError("Could not set channel layout\n"); return false; } if ((ret = avcodec_open2(*codec_ctx, *codec, nullptr)) < 0) { char error_str[AV_ERROR_MAX_STRING_SIZE] = {0}; LogError("Could not open codec:%s\n", av_make_error_string(error_str, AV_ERROR_MAX_STRING_SIZE, ret)); return false; } *frame = av_frame_alloc(); if (!(*frame)) { LogError("Could not allocate video frame\n"); return false; } (*frame)->nb_samples = (*codec_ctx)->frame_size; (*frame)->format = (*codec_ctx)->sample_fmt; ret = av_channel_layout_copy(&((*frame)->ch_layout), &((*codec_ctx)->ch_layout)); if (ret < 0) { LogError("Could not copy channel layout\n"); return false; } return true; } int FfmpegMuxStream::write_frame(AVFormatContext *fmt_ctx, AVCodecContext *c, AVStream *st, AVFrame *frame, AVPacket *pkt) { int ret = 0; // send the frame to the encoder ret = avcodec_send_frame(c, frame); if (ret < 0) { char error_str[AV_ERROR_MAX_STRING_SIZE] = {0}; LogInfo("Error sending a frame to the encoder: %s\n", av_make_error_string(error_str, AV_ERROR_MAX_STRING_SIZE, ret)); return -1; } while (ret >= 0) { ret = avcodec_receive_packet(c, pkt); if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) { break; } if (ret < 0) { char error_str[AV_ERROR_MAX_STRING_SIZE] = {0}; LogInfo("Error encoding a frame: %s\n", av_make_error_string(error_str, AV_ERROR_MAX_STRING_SIZE, ret)); return -1; } /* rescale output packet timestamp values from codec to stream timebase */ av_packet_rescale_ts(pkt, c->time_base, st->time_base); pkt->stream_index = st->index; /* Write the compressed frame to the media file. */ log_packet(fmt_ctx, pkt); ret = av_interleaved_write_frame(fmt_ctx, pkt); /* pkt is now blank (av_interleaved_write_frame() takes ownership of * its contents and resets pkt), so that no unreferencing is necessary. * This would be different if one used av_write_frame(). */ if (ret < 0) { char error_str[AV_ERROR_MAX_STRING_SIZE] = {0}; LogInfo("Error while writing output packet: %s\n", av_make_error_string(error_str, AV_ERROR_MAX_STRING_SIZE, ret)); return -1; } } return ret == AVERROR_EOF ? 1 : 0; } void FfmpegMuxStream::log_packet(const AVFormatContext *fmt_ctx, const AVPacket *pkt) { char pts[AV_TS_MAX_STRING_SIZE] = {0}; char dts[AV_TS_MAX_STRING_SIZE] = {0}; char duration[AV_TS_MAX_STRING_SIZE] = {0}; char pts_time[AV_TS_MAX_STRING_SIZE] = {0}; char dts_time[AV_TS_MAX_STRING_SIZE] = {0}; char duration_time[AV_TS_MAX_STRING_SIZE]; AVRational *time_base = &fmt_ctx->streams[pkt->stream_index]->time_base; av_ts_make_string(pts, pkt->pts); LogInfo("pts:%s pts_time:%s dts:%s dts_time:%s duration:%s duration_time:%s stream_index:%d\n", av_ts_make_string(pts, pkt->pts), av_ts_make_time_string(pts_time, pkt->pts, time_base), av_ts_make_string(dts, pkt->dts), av_ts_make_time_string(dts_time, pkt->dts, time_base), av_ts_make_string(duration, pkt->duration), av_ts_make_time_string(duration_time, pkt->duration, time_base), pkt->stream_index); } /* check that a given sample format is supported by the encoder */ int FfmpegMuxStream::check_sample_fmt(const AVCodec *codec, enum AVSampleFormat sample_fmt) { const enum AVSampleFormat *p = codec->sample_fmts; while (*p != AV_SAMPLE_FMT_NONE) { if (*p == sample_fmt) return 1; p++; } return 0; } /* just pick the highest supported samplerate */ int FfmpegMuxStream::select_sample_rate(const AVCodec *codec) { const int *p; int best_samplerate = 0; if (!codec->supported_samplerates) return 44100; p = codec->supported_samplerates; while (*p) { if (!best_samplerate || abs(44100 - *p) < abs(44100 - best_samplerate)) best_samplerate = *p; p++; } return best_samplerate; } /* select layout with the highest channel count */ int FfmpegMuxStream::select_channel_layout(const AVCodec *codec, AVChannelLayout *dst) { const AVChannelLayout *p, *best_ch_layout = nullptr; int best_nb_channels = 0; AVChannelLayout channelLayout = AV_CHANNEL_LAYOUT_STEREO; if (!codec->ch_layouts) return av_channel_layout_copy(dst, &channelLayout); p = codec->ch_layouts; while (p->nb_channels) { int nb_channels = p->nb_channels; if (nb_channels > best_nb_channels) { best_ch_layout = p; best_nb_channels = nb_channels; } p++; } return av_channel_layout_copy(dst, best_ch_layout); } bool FfmpegMuxStream::ConvertAudioFrame(AVFrame *frame, AVCodecContext *c, OutputStream *ost) { if (nullptr == frame) { LogError("frame is null\n"); return false; } int ret = 0; int dst_nb_samples = 0; /* convert samples from native format to destination codec format, using the resampler */ /* compute destination number of samples */ dst_nb_samples = av_rescale_rnd( swr_get_delay(ost->swr_ctx, c->sample_rate) + frame->nb_samples, c->sample_rate, c->sample_rate, AV_ROUND_UP); av_assert0(dst_nb_samples == frame->nb_samples); /* when we pass a frame to the encoder, it may keep a reference to it * internally; * make sure we do not overwrite it here */ ret = av_frame_make_writable(ost->frame); if (ret < 0) { LogError("av_frame_make_writable failed\n"); return false; } /* convert to destination format */ ret = swr_convert(ost->swr_ctx, ost->frame->data, dst_nb_samples, (const uint8_t **)frame->data, frame->nb_samples); if (ret < 0) { LogError("Error while converting\n"); return false; } frame = ost->frame; frame->pts = av_rescale_q(ost->samples_count, (AVRational){1, c->sample_rate}, c->time_base); ost->samples_count += dst_nb_samples; return true; }