I’m working on a project where we stream a set of videos to a meeting for training purposes that can be done on automation. To eventually deploy as a Zoom meeting bot that streams video.
I was able to get the basic set up done with the help of the example here: GitHub - zoom/meetingsdk-linux-raw-recording-sample
Tagging @chunsiong.zoom for a response since most of this implementation is by you.
Here’s my implementation of PlayVideoFiletoVirtualCamera in ZoomSDKVideoSource.cpp using FFMPEG converted to YUV420 for finally sending. I do see that the width and height being sent matches with the original width and height, yet on the Zoom Output I see it cropped. Also, is there a limit on the resolution that can be sent via meeting SDK?
Another issue is that when I play the Zoom output and the original video side by side, even though I’ve defined the same framerate, I see the original video go faster and eventually there is a delay of a few seconds at the end of 2 minutes.
Would really appreciate any support in this regard Thank you so much.
void PlayVideoFileToVirtualCamera(IZoomSDKVideoSender* video_sender, const std::string& video_source) {
avformat_network_init();
AVFormatContext* fmt_ctx = nullptr;
if (avformat_open_input(&fmt_ctx, video_source.c_str(), nullptr, nullptr) < 0) {
std::cerr << "Failed to open video file: " << video_source << std::endl;
return;
}
if (avformat_find_stream_info(fmt_ctx, nullptr) < 0) {
std::cerr << "Failed to find stream information" << std::endl;
avformat_close_input(&fmt_ctx);
return;
}
AVCodecParameters* codecpar = nullptr;
int video_stream_index = av_find_best_stream(fmt_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, nullptr, 0);
if (video_stream_index < 0) {
std::cerr << "Failed to find video stream" << std::endl;
avformat_close_input(&fmt_ctx);
return;
}
codecpar = fmt_ctx->streams[video_stream_index]->codecpar;
AVCodec* codec = avcodec_find_decoder(codecpar->codec_id);
if (!codec) {
std::cerr << "Failed to find codec" << std::endl;
avformat_close_input(&fmt_ctx);
return;
}
AVCodecContext* codec_ctx = avcodec_alloc_context3(codec);
if (!codec_ctx) {
std::cerr << "Failed to allocate codec context" << std::endl;
avformat_close_input(&fmt_ctx);
return;
}
if (avcodec_parameters_to_context(codec_ctx, codecpar) < 0) {
std::cerr << "Failed to copy codec parameters to context" << std::endl;
avcodec_free_context(&codec_ctx);
avformat_close_input(&fmt_ctx);
return;
}
if (avcodec_open2(codec_ctx, codec, nullptr) < 0) {
std::cerr << "Failed to open codec" << std::endl;
avcodec_free_context(&codec_ctx);
avformat_close_input(&fmt_ctx);
return;
}
AVFrame* frame = av_frame_alloc();
if (!frame) {
std::cerr << "Failed to allocate frame" << std::endl;
avcodec_free_context(&codec_ctx);
avformat_close_input(&fmt_ctx);
return;
}
AVPacket packet;
packet = AVPacket();
packet.data = nullptr;
packet.size = 0;
SwsContext* sws_ctx = sws_getContext(codec_ctx->width, codec_ctx->height, codec_ctx->pix_fmt,
codec_ctx->width, codec_ctx->height, AV_PIX_FMT_YUV420P,
SWS_BILINEAR, nullptr, nullptr, nullptr);
if (!sws_ctx) {
std::cerr << "Failed to create scaling context" << std::endl;
av_frame_free(&frame);
avcodec_free_context(&codec_ctx);
avformat_close_input(&fmt_ctx);
return;
}
uint8_t* buffer = nullptr;
int buffer_size = av_image_get_buffer_size(AV_PIX_FMT_YUV420P, codec_ctx->width, codec_ctx->height, 1);
buffer = static_cast<uint8_t*>(av_malloc(buffer_size));
av_image_fill_arrays(frame->data, frame->linesize, buffer, AV_PIX_FMT_YUV420P,
codec_ctx->width, codec_ctx->height, 1);
int frame_count = 0;
while (av_read_frame(fmt_ctx, &packet) >= 0) {
if (packet.stream_index == video_stream_index) {
int ret = avcodec_send_packet(codec_ctx, &packet);
if (ret < 0) {
std::cerr << "Failed to send packet to decoder" << std::endl;
break;
}
while (ret >= 0) {
ret = avcodec_receive_frame(codec_ctx, frame);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
break;
} else if (ret < 0) {
std::cerr << "Failed to receive frame from decoder" << std::endl;
break;
}
sws_scale(sws_ctx, frame->data, frame->linesize, 0, codec_ctx->height,
frame->data, frame->linesize);
std::vector<char> frameBuffer(frame->linesize[0] * codec_ctx->height + frame->linesize[1] * codec_ctx->height / 2 + frame->linesize[2] * codec_ctx->height / 2);
char* dst = frameBuffer.data();
int dstStride[] = { frame->linesize[0], frame->linesize[1], frame->linesize[2] };
for (int i = 0; i < 3; ++i) {
uint8_t* src = frame->data[i];
int srcStride = frame->linesize[i];
int height = (i == 0) ? codec_ctx->height : codec_ctx->height / 2;
for (int y = 0; y < height; ++y) {
std::memcpy(dst, src, srcStride);
dst += dstStride[i];
src += srcStride;
}
}
std::cerr << "Width: " << codec_ctx->width << std::endl;
std::cerr << "Height: " << codec_ctx->height << std::endl;
SDKError err = video_sender->sendVideoFrame(frameBuffer.data(), codec_ctx->width, codec_ctx->height, frameBuffer.size(), 0);
if (err != SDKERR_SUCCESS) {
std::cerr << "sendVideoFrame failed: Error " << err << std::endl;
}
std::this_thread::sleep_for(std::chrono::milliseconds(36));
frame_count++;
}
}
av_packet_unref(&packet);
}
std::cout << "Total frames sent: " << frame_count << std::endl;
av_free(buffer);
sws_freeContext(sws_ctx);
av_frame_free(&frame);
avcodec_free_context(&codec_ctx);
avformat_close_input(&fmt_ctx);
}