Hello everyone,
I’ve recently been testing MJPEG hardware decoding on the Duo-S (SG2000), but I ran into an issue: when I send certain frames from an MJPEG file to vdec using SendStream, the function takes nearly 2 seconds to run. I haven’t seen this problem with H.264 hardware decoding, but with MJPEG it sometimes happens.
Details:
- Development board: Milk-V Duo-S
- Firmware version: V1
What I’ve tried:
- Changed the third parameter of SendStream to -1, 0, and 200(ms) — no effect
- Replaced problematic frames with other frames from the MJPEG file that don’t cause delays — this avoids delays at the original positions, but new delays then appear in other places
- Split file reading/decoding and frame fetching/YOLO inference into two threads — no effect
Other information:
- For MJPEG files, the frames that cause delays are always fixed. For UVC streams, the problematic frames are not fixed
- YOLO inference runs at around 20 FPS, while the video file itself is 30 FPS
My code:
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#include <unistd.h>
#include <time.h>
#include <signal.h>
#include <sys/stat.h>
#include "cvi_tdl.h"
#include "cvi_tdl_media.h"
#include "core/utils/vpss_helper.h"
#include "cvi_vb.h"
#include "cvi_sys.h"
#include "cvi_vdec.h"
// Define common constants for clarity
#define VIDEO_WIDTH 1920
#define VIDEO_HEIGHT 1088
#define STREAM_BUFFER_SIZE (1024 * 1024 * 2) // 2MB buffer for reading stream data
// Global flag to handle program termination via signals.
static volatile bool bExit = false;
// Signal handler for graceful shutdown on SIGINT or SIGTERM.
void handle_sigint(int sig) {
if (sig == SIGINT || sig == SIGTERM) {
printf("\nCaught signal, preparing to exit...\n");
bExit = true;
}
}
/**
* @brief Reads a single H.264 frame (NAL unit) from a file stream.
* This function locates NAL unit start codes (0x000001 or 0x00000001) to delimit frames.
* @param fp File pointer to the H.264 stream.
* @param pu8Buf Buffer to store the frame data.
* @param pu32Len Output pointer for the length of the read frame.
* @return CVI_SUCCESS on success, CVI_FAILURE on EOF or error.
*/
CVI_S32 h264_read_frame(FILE *fp, CVI_U8 *pu8Buf, CVI_U32 *pu32Len) {
int read_len;
int start_code_found = 0;
int len = 0;
int zero_count = 0;
unsigned char *p;
if (feof(fp)) {
return CVI_FAILURE;
}
read_len = fread(pu8Buf, 1, STREAM_BUFFER_SIZE, fp);
if (read_len <= 0) {
return CVI_FAILURE;
}
p = pu8Buf;
while (p < pu8Buf + read_len) {
if (*p == 0) {
zero_count++;
} else if (*p == 1 && zero_count >= 2) {
if (start_code_found) {
fseek(fp, (long)(p - zero_count - (pu8Buf + read_len)), SEEK_CUR);
*pu32Len = len - zero_count;
return CVI_SUCCESS;
}
start_code_found = 1;
zero_count = 0;
} else {
zero_count = 0;
}
len++;
p++;
}
*pu32Len = len;
return CVI_SUCCESS;
}
/**
* @brief Reads a single MJPEG frame from a file stream.
* This function robustly finds a complete JPEG frame by locating the
* Start Of Image (SOI) 0xFFD8 and End Of Image (EOI) 0xFFD9 markers.
* @param fp File pointer to the MJPEG stream.
* @param pu8FrameBuf Buffer to store the complete frame data.
* @param pu32Len Output pointer for the length of the read frame.
* @param u32BufSize The total size of pu8FrameBuf to prevent overflow.
* @return CVI_SUCCESS on success, CVI_FAILURE on error or end of stream.
*/
CVI_S32 mjpeg_read_frame(FILE *fp, CVI_U8 *pu8FrameBuf, CVI_U32 *pu32Len, CVI_U32 u32BufSize) {
int c, prev_c = EOF;
CVI_U32 len = 0;
bool soi_found = false;
// 1. Find Start of Image (SOI) marker: 0xFFD8
while ((c = fgetc(fp)) != EOF) {
if (prev_c == 0xFF && c == 0xD8) {
soi_found = true;
break;
}
prev_c = c;
}
if (!soi_found) {
return CVI_FAILURE; // End of stream or corrupted file
}
// 2. We found SOI. The frame data starts with it. Store it in the buffer.
if (len + 2 > u32BufSize) {
fprintf(stderr, "Buffer too small for JPEG frame.\n");
return CVI_FAILURE;
}
pu8FrameBuf[len++] = 0xFF;
pu8FrameBuf[len++] = 0xD8;
prev_c = 0xD8;
// 3. Read and store bytes until the End of Image (EOI) marker (0xFFD9) is found.
while ((c = fgetc(fp)) != EOF) {
if (len >= u32BufSize) {
fprintf(stderr, "Error: MJPEG frame is larger than the buffer size (%u bytes).\n", u32BufSize);
return CVI_FAILURE;
}
pu8FrameBuf[len++] = (CVI_U8)c;
if (prev_c == 0xFF && c == 0xD9) {
*pu32Len = len;
return CVI_SUCCESS; // Frame is complete
}
prev_c = c;
}
// Reached end of file before finding the EOI marker.
return CVI_FAILURE;
}
/**
* @brief Initializes YOLOv8 model pre-processing and algorithm parameters.
* This function is mandatory and sets the specific configurations required for YOLOv8.
* @param tdl_handle Handle to the TDL (SDK's AI library).
* @return CVI_SUCCESS on success, otherwise a CVI error code.
*/
CVI_S32 init_yolo_param(const cvitdl_handle_t tdl_handle) {
printf("Setting up YOLOv8 parameters...\n");
YoloPreParam preprocess_cfg = CVI_TDL_Get_YOLO_Preparam(tdl_handle, CVI_TDL_SUPPORTED_MODEL_YOLOV8_DETECTION);
for (int i = 0; i < 3; i++) {
preprocess_cfg.factor[i] = 0.003922; // 1/255.0
preprocess_cfg.mean[i] = 0.0;
}
preprocess_cfg.format = PIXEL_FORMAT_RGB_888_PLANAR;
preprocess_cfg.rescale_type = RESCALE_CENTER;
CVI_S32 ret = CVI_TDL_Set_YOLO_Preparam(tdl_handle, CVI_TDL_SUPPORTED_MODEL_YOLOV8_DETECTION, preprocess_cfg);
if (ret != CVI_SUCCESS) {
fprintf(stderr, "Failed to set YOLOv8 preprocess parameters, error: %#x\n", ret);
return ret;
}
YoloAlgParam yolov8_param = CVI_TDL_Get_YOLO_Algparam(tdl_handle, CVI_TDL_SUPPORTED_MODEL_YOLOV8_DETECTION);
yolov8_param.cls = 2; // Example: set expected class number
ret = CVI_TDL_Set_YOLO_Algparam(tdl_handle, CVI_TDL_SUPPORTED_MODEL_YOLOV8_DETECTION, yolov8_param);
if (ret != CVI_SUCCESS) {
fprintf(stderr, "Failed to set YOLOv8 algorithm parameters, error: %#x\n", ret);
return ret;
}
CVI_TDL_SetModelThreshold(tdl_handle, CVI_TDL_SUPPORTED_MODEL_YOLOV8_DETECTION, 0.5);
CVI_TDL_SetModelNmsThreshold(tdl_handle, CVI_TDL_SUPPORTED_MODEL_YOLOV8_DETECTION, 0.5);
printf("YOLOv8 parameters setup successfully.\n");
return CVI_SUCCESS;
}
int main(int argc, char **argv) {
if (argc != 3) {
fprintf(stderr, "Usage: %s <yolo_model_path> <video_file_path (.h264 or .mjpg/.mjpeg)>\n", argv[0]);
return CVI_FAILURE;
}
const char *yolo_model_path = argv[1];
const char *video_file_path = argv[2];
// --- Variable Declarations ---
CVI_S32 s32Ret = CVI_SUCCESS;
cvitdl_handle_t tdl_handle = NULL;
VDEC_CHN VdChn = 0;
VIDEO_FRAME_INFO_S stFrameInfo;
cvtdl_object_t obj_meta = {0};
FILE *fpStrm = NULL;
CVI_U8 *pu8Buf = NULL;
int frame_count = 0;
struct timespec start_time, end_time;
double elapsed_seconds;
VB_CONFIG_S stVbConf;
VDEC_CHN_ATTR_S stVdecChnAttr;
PAYLOAD_TYPE_E enType;
struct timespec read_start, read_end, decode_start, decode_end, inference_start, inference_end;
double read_ms = 0, decode_ms = 0, inference_ms = 0;
// --- Determine Video Type ---
const char *file_ext = strrchr(video_file_path, '.');
if (file_ext && (strcmp(file_ext, ".mjpg") == 0 || strcmp(file_ext, ".mjpeg") == 0)) {
enType = PT_MJPEG;
printf("Info: Detected MJPEG stream: %s\n", video_file_path);
} else if (file_ext && strcmp(file_ext, ".h264") == 0) {
enType = PT_H264;
printf("Info: Detected H.264 stream: %s\n", video_file_path);
} else {
fprintf(stderr, "Error: Unsupported file type. Please use .h264, .mjpg, or .mjpeg\n");
return CVI_FAILURE;
}
// --- Signal Handler ---
signal(SIGINT, handle_sigint);
signal(SIGTERM, handle_sigint);
// --- System & VB Initialization ---
memset(&stVbConf, 0, sizeof(VB_CONFIG_S));
stVbConf.u32MaxPoolCnt = 1;
if (enType == PT_MJPEG) {
stVbConf.astCommPool[0].u32BlkSize = VDEC_GetPicBufferSize(
PT_MJPEG, VIDEO_WIDTH, VIDEO_HEIGHT,
PIXEL_FORMAT_YUV_PLANAR_444, DATA_BITWIDTH_8, COMPRESS_MODE_NONE);
stVbConf.astCommPool[0].u32BlkCnt = 3;
} else {
stVbConf.astCommPool[0].u32BlkSize = VDEC_GetPicBufferSize(
PT_H264, VIDEO_WIDTH, VIDEO_HEIGHT,
PIXEL_FORMAT_YUV_PLANAR_420, DATA_BITWIDTH_8, COMPRESS_MODE_NONE);
stVbConf.astCommPool[0].u32BlkCnt = 10;
}
stVbConf.astCommPool[0].enRemapMode = VB_REMAP_MODE_CACHED;
s32Ret = CVI_VB_SetConfig(&stVbConf);
if (s32Ret != CVI_SUCCESS) {
fprintf(stderr, "CVI_VB_SetConfig failed with %#x!\n", s32Ret);
return s32Ret;
}
s32Ret = CVI_VB_Init();
if (s32Ret != CVI_SUCCESS) {
fprintf(stderr, "CVI_VB_Init failed with %#x!\n", s32Ret);
return s32Ret;
}
s32Ret = CVI_SYS_Init();
if (s32Ret != CVI_SUCCESS) {
fprintf(stderr, "CVI_SYS_Init failed with %#x!\n", s32Ret);
goto cleanup_vb;
}
// --- TDL (AI Model) Initialization ---
s32Ret = CVI_TDL_CreateHandle(&tdl_handle);
if (s32Ret != CVI_SUCCESS) {
fprintf(stderr, "CVI_TDL_CreateHandle failed with %#x!\n", s32Ret);
goto cleanup_sys;
}
s32Ret = init_yolo_param(tdl_handle);
if (s32Ret != CVI_SUCCESS) {
fprintf(stderr, "init_yolo_param failed!\n");
goto cleanup_tdl;
}
s32Ret = CVI_TDL_OpenModel(tdl_handle, CVI_TDL_SUPPORTED_MODEL_YOLOV8_DETECTION, yolo_model_path);
if (s32Ret != CVI_SUCCESS) {
fprintf(stderr, "CVI_TDL_OpenModel failed for %s with %#x\n", yolo_model_path, s32Ret);
goto cleanup_tdl;
}
printf("YOLOv8 model opened successfully.\n");
// --- VDEC Channel Initialization ---
memset(&stVdecChnAttr, 0, sizeof(VDEC_CHN_ATTR_S));
stVdecChnAttr.enType = enType;
stVdecChnAttr.enMode = VIDEO_MODE_FRAME;
stVdecChnAttr.u32PicWidth = VIDEO_WIDTH;
stVdecChnAttr.u32PicHeight = VIDEO_HEIGHT;
// Set buffer counts based on codec type
stVdecChnAttr.u32FrameBufCnt = (enType == PT_MJPEG) ? 1 : 5; // 1 for MJPEG, 5 for H264 is safe
stVdecChnAttr.u32StreamBufSize = 0; // Set to 0 to let the driver manage
s32Ret = CVI_VDEC_CreateChn(VdChn, &stVdecChnAttr);
if (s32Ret != CVI_SUCCESS) {
fprintf(stderr, "CVI_VDEC_CreateChn failed with %#x\n", s32Ret);
goto cleanup_tdl;
}
// --- CORRECTION: Set detailed parameters using Get/Set ChnParam ---
VDEC_CHN_PARAM_S stChnParam;
s32Ret = CVI_VDEC_GetChnParam(VdChn, &stChnParam);
if (s32Ret != CVI_SUCCESS) {
fprintf(stderr, "CVI_VDEC_GetChnParam failed with %#x!\n", s32Ret);
goto cleanup_vdec_chn;
}
// Now, modify the parameters within the correct structure
if (enType == PT_MJPEG) {
stChnParam.enPixelFormat = PIXEL_FORMAT_YUV_PLANAR_444;
stChnParam.u32DisplayFrameNum = 0;
} else { // PT_H264
stChnParam.enPixelFormat = PIXEL_FORMAT_YUV_PLANAR_420;
stChnParam.u32DisplayFrameNum = 2;
}
s32Ret = CVI_VDEC_SetChnParam(VdChn, &stChnParam);
if (s32Ret != CVI_SUCCESS) {
fprintf(stderr, "CVI_VDEC_SetChnParam failed with %#x!\n", s32Ret);
goto cleanup_vdec_chn;
}
// --- End of Correction ---
s32Ret = CVI_VDEC_StartRecvStream(VdChn);
if (s32Ret != CVI_SUCCESS) {
fprintf(stderr, "CVI_VDEC_StartRecvStream failed with %#x\n", s32Ret);
goto cleanup_vdec_chn;
}
// --- Main Processing Loop ---
fpStrm = fopen(video_file_path, "rb");
if (!fpStrm) {
fprintf(stderr, "Cannot open video file: %s\n", video_file_path);
goto cleanup_vdec_recv;
}
pu8Buf = (CVI_U8*)malloc(STREAM_BUFFER_SIZE);
if (!pu8Buf) {
fprintf(stderr, "Failed to allocate stream buffer.\n");
goto cleanup_file;
}
printf("Starting decoding and YOLO inference loop...\n");
clock_gettime(CLOCK_MONOTONIC, &start_time);
while (!bExit) {
VDEC_STREAM_S stStream = {0};
clock_gettime(CLOCK_MONOTONIC, &read_start);
if (enType == PT_H264) {
s32Ret = h264_read_frame(fpStrm, pu8Buf, &stStream.u32Len);
} else {
s32Ret = mjpeg_read_frame(fpStrm, pu8Buf, &stStream.u32Len, STREAM_BUFFER_SIZE);
}
clock_gettime(CLOCK_MONOTONIC, &read_end);
read_ms = (read_end.tv_sec - read_start.tv_sec) * 1000.0 + (read_end.tv_nsec - read_start.tv_nsec) / 1000000.0;
if (s32Ret != CVI_SUCCESS || stStream.u32Len == 0) {
printf("\nEnd of video stream or read error.\n");
break;
}
stStream.pu8Addr = pu8Buf;
stStream.u64PTS = frame_count;
stStream.bEndOfStream = CVI_FALSE;
stStream.bEndOfFrame = CVI_TRUE;
if (CVI_VDEC_SendStream(VdChn, &stStream, -1) != CVI_SUCCESS) {
fprintf(stderr, "CVI_VDEC_SendStream failed, retrying...\n");
usleep(10000);
continue;
}
clock_gettime(CLOCK_MONOTONIC, &decode_start);
s32Ret = CVI_VDEC_GetFrame(VdChn, &stFrameInfo, -1);
clock_gettime(CLOCK_MONOTONIC, &decode_end);
if (s32Ret != CVI_SUCCESS) {
fprintf(stderr, "\nWarning: CVI_VDEC_GetFrame failed with %#x\n", s32Ret);
usleep(1000);
continue;
}
decode_ms = (decode_end.tv_sec - decode_start.tv_sec) * 1000.0 + (decode_end.tv_nsec - decode_start.tv_nsec) / 1000000.0;
clock_gettime(CLOCK_MONOTONIC, &inference_start);
CVI_TDL_YOLOV8_Detection(tdl_handle, &stFrameInfo, &obj_meta);
clock_gettime(CLOCK_MONOTONIC, &inference_end);
inference_ms = (inference_end.tv_sec - inference_start.tv_sec) * 1000.0 + (inference_end.tv_nsec - inference_start.tv_nsec) / 1000000.0;
printf("\rFrame %d: Detected %u objects. | Read: %.2fms, Decode: %.2fms, Inference: %.2fms ",
frame_count, obj_meta.size, read_ms, decode_ms, inference_ms);
fflush(stdout);
CVI_VDEC_ReleaseFrame(VdChn, &stFrameInfo);
CVI_TDL_Free(&obj_meta);
frame_count++;
}
// --- Performance Calculation ---
clock_gettime(CLOCK_MONOTONIC, &end_time);
elapsed_seconds = (end_time.tv_sec - start_time.tv_sec) +
(end_time.tv_nsec - start_time.tv_nsec) / 1000000000.0;
if (elapsed_seconds > 0) {
double fps = frame_count / elapsed_seconds;
printf("\n----------------------------------------\n");
printf("Processing finished.\n");
printf("Total frames processed: %d\n", frame_count);
printf("Total time: %.2f seconds\n", elapsed_seconds);
printf("Actual FPS (Decode + Inference): %.2f\n", fps);
printf("----------------------------------------\n");
}
// --- Cleanup ---
if(pu8Buf) free(pu8Buf);
cleanup_file:
if(fpStrm) fclose(fpStrm);
cleanup_vdec_recv:
CVI_VDEC_StopRecvStream(VdChn);
cleanup_vdec_chn:
CVI_VDEC_DestroyChn(VdChn);
cleanup_tdl:
CVI_TDL_DestroyHandle(tdl_handle);
cleanup_sys:
CVI_SYS_Exit();
cleanup_vb:
CVI_VB_Exit();
printf("Cleanup complete. Exiting.\n");
return s32Ret;
}
Thanks a lot in advance for any advice or suggestions! I really appreciate your help!