Raw Video Sending Getting Cropped and Frame Drops/Slow Rate

I’m working on a project where we stream a set of videos to a meeting for training purposes that can be done on automation. To eventually deploy as a Zoom meeting bot that streams video.

I was able to get the basic set up done with the help of the example here: GitHub - zoom/meetingsdk-linux-raw-recording-sample

Tagging @chunsiong.zoom for a response since most of this implementation is by you.

Here’s my implementation of PlayVideoFiletoVirtualCamera in ZoomSDKVideoSource.cpp using FFMPEG converted to YUV420 for finally sending. I do see that the width and height being sent matches with the original width and height, yet on the Zoom Output I see it cropped. Also, is there a limit on the resolution that can be sent via meeting SDK?

Another issue is that when I play the Zoom output and the original video side by side, even though I’ve defined the same framerate, I see the original video go faster and eventually there is a delay of a few seconds at the end of 2 minutes.

Would really appreciate any support in this regard :pray: Thank you so much.

void PlayVideoFileToVirtualCamera(IZoomSDKVideoSender* video_sender, const std::string& video_source) {
    avformat_network_init();

    AVFormatContext* fmt_ctx = nullptr;
    if (avformat_open_input(&fmt_ctx, video_source.c_str(), nullptr, nullptr) < 0) {
        std::cerr << "Failed to open video file: " << video_source << std::endl;
        return;
    }

    if (avformat_find_stream_info(fmt_ctx, nullptr) < 0) {
        std::cerr << "Failed to find stream information" << std::endl;
        avformat_close_input(&fmt_ctx);
        return;
    }


   
    AVCodecParameters* codecpar = nullptr;
    int video_stream_index = av_find_best_stream(fmt_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, nullptr, 0);
    if (video_stream_index < 0) {
        std::cerr << "Failed to find video stream" << std::endl;
        avformat_close_input(&fmt_ctx);
        return;
    }


    codecpar = fmt_ctx->streams[video_stream_index]->codecpar;

    AVCodec* codec = avcodec_find_decoder(codecpar->codec_id);
    if (!codec) {
        std::cerr << "Failed to find codec" << std::endl;
        avformat_close_input(&fmt_ctx);
        return;
    }

    AVCodecContext* codec_ctx = avcodec_alloc_context3(codec);
    if (!codec_ctx) {
        std::cerr << "Failed to allocate codec context" << std::endl;
        avformat_close_input(&fmt_ctx);
        return;
    }

    if (avcodec_parameters_to_context(codec_ctx, codecpar) < 0) {
        std::cerr << "Failed to copy codec parameters to context" << std::endl;
        avcodec_free_context(&codec_ctx);
        avformat_close_input(&fmt_ctx);
        return;
    }

    if (avcodec_open2(codec_ctx, codec, nullptr) < 0) {
        std::cerr << "Failed to open codec" << std::endl;
        avcodec_free_context(&codec_ctx);
        avformat_close_input(&fmt_ctx);
        return;
    }

    AVFrame* frame = av_frame_alloc();
    if (!frame) {
        std::cerr << "Failed to allocate frame" << std::endl;
        avcodec_free_context(&codec_ctx);
        avformat_close_input(&fmt_ctx);
        return;
    }

    AVPacket packet;
    packet = AVPacket();
    packet.data = nullptr;
    packet.size = 0;

    SwsContext* sws_ctx = sws_getContext(codec_ctx->width, codec_ctx->height, codec_ctx->pix_fmt,
                                         codec_ctx->width, codec_ctx->height, AV_PIX_FMT_YUV420P,
                                         SWS_BILINEAR, nullptr, nullptr, nullptr);

    if (!sws_ctx) {
        std::cerr << "Failed to create scaling context" << std::endl;
        av_frame_free(&frame);
        avcodec_free_context(&codec_ctx);
        avformat_close_input(&fmt_ctx);
        return;
    }

    uint8_t* buffer = nullptr;
    int buffer_size = av_image_get_buffer_size(AV_PIX_FMT_YUV420P, codec_ctx->width, codec_ctx->height, 1);
    buffer = static_cast<uint8_t*>(av_malloc(buffer_size));

    av_image_fill_arrays(frame->data, frame->linesize, buffer, AV_PIX_FMT_YUV420P,
                         codec_ctx->width, codec_ctx->height, 1);

    int frame_count = 0;
   
    while (av_read_frame(fmt_ctx, &packet) >= 0) {
        if (packet.stream_index == video_stream_index) {
            int ret = avcodec_send_packet(codec_ctx, &packet);
            if (ret < 0) {
                std::cerr << "Failed to send packet to decoder" << std::endl;
                break;
            }

            while (ret >= 0) {
                ret = avcodec_receive_frame(codec_ctx, frame);
                if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
                    break;
                } else if (ret < 0) {
                    std::cerr << "Failed to receive frame from decoder" << std::endl;
                    break;
                }

                sws_scale(sws_ctx, frame->data, frame->linesize, 0, codec_ctx->height,
                                        frame->data, frame->linesize);            

                std::vector<char> frameBuffer(frame->linesize[0] * codec_ctx->height + frame->linesize[1] * codec_ctx->height / 2 + frame->linesize[2] * codec_ctx->height / 2);

                char* dst = frameBuffer.data();
                int dstStride[] = { frame->linesize[0], frame->linesize[1], frame->linesize[2] };

                for (int i = 0; i < 3; ++i) {
                    uint8_t* src = frame->data[i];
                    int srcStride = frame->linesize[i];
                    int height = (i == 0) ? codec_ctx->height : codec_ctx->height / 2;

                    for (int y = 0; y < height; ++y) {
                        std::memcpy(dst, src, srcStride);
                        dst += dstStride[i];
                        src += srcStride;
                    }
                }
                std::cerr << "Width: " << codec_ctx->width << std::endl;
                std::cerr << "Height: " << codec_ctx->height << std::endl;

                SDKError err = video_sender->sendVideoFrame(frameBuffer.data(), codec_ctx->width, codec_ctx->height, frameBuffer.size(), 0);
                if (err != SDKERR_SUCCESS) {
                    std::cerr << "sendVideoFrame failed: Error " << err << std::endl;
                }

             
                std::this_thread::sleep_for(std::chrono::milliseconds(36)); 
                frame_count++;
            }
        }

        av_packet_unref(&packet);
    }

    std::cout << "Total frames sent: " << frame_count << std::endl;

    av_free(buffer);
    sws_freeContext(sws_ctx);
    av_frame_free(&frame);
    avcodec_free_context(&codec_ctx);
    avformat_close_input(&fmt_ctx);
}