I’m trying to create a meeting bot that streams a video file along with audio into a Zoom meeting. I was able to get the code here working to send both audio and video GitHub - zoom/meetingsdk-linux-raw-recording-sample
But after a few seconds, the video slows down and goes out of sync with the audio. How can I ensure that both remain in sync?
The video is playing at 30fps and I’m adding sleep to ensure this frame rate.
void PlayVideoFileToVirtualCamera(IZoomSDKVideoSender* video_sender, const std::string& video_source)
{
char* frameBuffer;
int frameLen = height / 2 * 3 * width;
frameBuffer = (char*)malloc(frameLen);
const int fps = 30;
const std::chrono::microseconds frameDuration(1000000 / fps); // 1 second = 1,000,000 microseconds
// execute in a thread.
while (video_play_flag > 0 && video_sender) {
Mat frame;
VideoCapture cap;
cap.open(video_source);
if (!cap.isOpened()) {
cerr << "ERROR! Unable to open camera\n";
video_play_flag = 0;
break;
}
else {
//--- GRAB AND WRITE LOOP
std::cout << "Start grabbing" << endl;
while (video_play_flag > 0)
{
auto start = std::chrono::high_resolution_clock::now();
// wait for a new frame from camera and store it into 'frame'
cap.read(frame);
// check if we succeeded
if (frame.empty()) {
cerr << "ERROR! blank frame grabbed\n";
break;
}
Mat resizedFrame;
resize(frame, resizedFrame, Size(width, height), 0, 0, INTER_LINEAR);
//covert Mat to YUV buffer
Mat yuv;
cv::cvtColor(resizedFrame, yuv, COLOR_BGRA2YUV_I420);
char* p;
for (int i = 0; i < height / 2 * 3; ++i) {
p = yuv.ptr<char>(i);
for (int j = 0; j < width; ++j) {
frameBuffer[i * width + j] = p[j];
}
}
SDKError err = ((IZoomSDKVideoSender*)video_sender)->sendVideoFrame(frameBuffer, width, height, frameLen, 0);
if (err != SDKERR_SUCCESS) {
std::cout << "sendVideoFrame failed: Error " << err << endl;
}
auto end = std::chrono::high_resolution_clock::now();
auto elapsed = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
if (elapsed < frameDuration) {
std::this_thread::sleep_for(frameDuration - elapsed);
std::cout << "frame sleep " << endl;
}
}
cap.release();
}
}
video_play_flag = -1;
}
The audio is being played while being processed as chunks:
void PlayAudioFileToVirtualMic(IZoomSDKAudioRawDataSender* audio_sender, string audio_source)
{
printf("PLAYAUDIOFILETOVIRTUALMIC invoked!");
int chunkSize = 4096; // 4096 bytes per chunk
int sampleRate = 44100; // samples per second
int bytesPerSample = 2; // 16-bit audio
// execute in a thread.
while (audio_play_flag > 0 && audio_sender) {
// Check if the file exists
ifstream file(audio_source, ios::binary | ios::ate);
if (!file.is_open()) {
std::cout << "Error: File not found. Tried to open " << audio_source << std::endl;
return;
}
// Get the file size
int file_size = file.tellg();
file.seekg(0, ios::beg);
vector<char> buffer(chunkSize);
while (file.read(buffer.data(), buffer.size()) || file.gcount() > 0) {
size_t bytesRead = file.gcount();
SDKError err = audio_sender->send(buffer.data(), bytesRead, 44100);
if (err != SDKERR_SUCCESS) {
cout << "Error: Failed to send audio data to virtual mic. Error code: " << err << endl;
break;
}
std::this_thread::sleep_for(std::chrono::milliseconds(chunkSize / (sampleRate * bytesPerSample / 1000)));
}
file.close();
audio_play_flag = -1;
}
}

