MIPI->ISP->PPA->H264->SD Card pipeline performance
Posted: Wed Nov 19, 2025 2:04 pm
Hi all!
I've been working for a couple of months on a video capturing project on ESP32-P4 and so far I've managed to achieve 1080p@30 fps performance for the MIPI->ISP->H264->SD Card pipeline with no problem, but I'm experiencing framerate drops when including the PPA in my system. At first I thought PSRAM bandwidth limitations was slowing down the loop, because I was executing each step as a separate task and so every single step would read and write big buffers from the PSRAM, but now I've joined everything in a single task and achieved the same 20 fps as before.
Right now I'm using an OV5647 sensor running at 1280x960@45 fps, trying to scale down by a factor of 2 (so 640x480), PSRAM is running at 200 MHz, I'm not writing to SD and I'm printing the following message once every second:
As you can see, FPS don't change with bitrate, so it's not about bandwidth (as far as I can tell).
I understand running the whole pipeline can take more than 22.2 ms (45 FPS), but since there's no information about expected PPA performance, I wanted to ask here if there's anything I could be doing wrong. I'm not using the esp_video library since it's terribly memory-hungry, so I used esp_cam_sensor and esp_h264 to configure the sensor and the encoder.
Code snippets
Component initialization
Pipeline loop
PPA configuration
I've been working for a couple of months on a video capturing project on ESP32-P4 and so far I've managed to achieve 1080p@30 fps performance for the MIPI->ISP->H264->SD Card pipeline with no problem, but I'm experiencing framerate drops when including the PPA in my system. At first I thought PSRAM bandwidth limitations was slowing down the loop, because I was executing each step as a separate task and so every single step would read and write big buffers from the PSRAM, but now I've joined everything in a single task and achieved the same 20 fps as before.
Right now I'm using an OV5647 sensor running at 1280x960@45 fps, trying to scale down by a factor of 2 (so 640x480), PSRAM is running at 200 MHz, I'm not writing to SD and I'm printing the following message once every second:
Code: Select all
I (709451) Video Manager: Bitrate = 997296 bps = 121 kB/s, Set bitrate= 1175040 bps, FPS=20, Set FPS=45
I (710481) Video Manager: Bitrate = 1028288 bps = 125 kB/s, Set bitrate= 1175040 bps, FPS=20, Set FPS=45
I (711501) Video Manager: Bitrate = 1025600 bps = 125 kB/s, Set bitrate= 1175040 bps, FPS=20, Set FPS=45
I (712521) Video Manager: Bitrate = 3149160 bps = 384 kB/s, Set bitrate= 1175040 bps, FPS=20, Set FPS=45
I (713541) Video Manager: Bitrate = 1777744 bps = 217 kB/s, Set bitrate= 1175040 bps, FPS=20, Set FPS=45
I (714561) Video Manager: Bitrate = 3067824 bps = 374 kB/s, Set bitrate= 1175040 bps, FPS=20, Set FPS=45
I (715581) Video Manager: Bitrate = 4489584 bps = 548 kB/s, Set bitrate= 1175040 bps, FPS=20, Set FPS=45
I understand running the whole pipeline can take more than 22.2 ms (45 FPS), but since there's no information about expected PPA performance, I wanted to ask here if there's anything I could be doing wrong. I'm not using the esp_video library since it's terribly memory-hungry, so I used esp_cam_sensor and esp_h264 to configure the sensor and the encoder.
Code snippets
Component initialization
Code: Select all
//--------Camera Sensor and SCCB Init-----------//
example_sensor_handle_t sensor_handle = {
.sccb_handle = NULL,
.i2c_bus_handle = NULL,
};
example_sensor_config_t cam_sensor_config = {
.i2c_port_num = I2C_NUM_0,
.i2c_sda_io_num = GPIO_NUM_7,
.i2c_scl_io_num = GPIO_NUM_8,
.port = ESP_CAM_SENSOR_MIPI_CSI,
.format_name = "MIPI_2lane_24Minput_RAW10_1280x960_binning_45fps",
};
example_sensor_init(&cam_sensor_config, &sensor_handle);
// Configure and create the CSI controller
esp_cam_ctlr_csi_config_t cfg = {
.ctlr_id = 0,
.h_res = HRES,
.v_res = VRES,
.lane_bit_rate_mbps = CSI_LANE_BITRATE_MBPS,
.input_data_color_type = CSI_INPUT_COLOR,
.output_data_color_type = CSI_OUTPUT_COLOR,
.data_lane_num = CSI_LANES,
.byte_swap_en = false,
.queue_items = FRAME_BUF_COUNT, // >1 helps continuous capture
};
ESP_ERROR_CHECK(esp_cam_new_csi_ctlr(&cfg, &s_cam));
// Register callbacks that hand buffers to the driver and notify on completion
esp_cam_ctlr_evt_cbs_t cbs = {.on_trans_finished = on_trans_finished};
//---------------ISP Init------------------//
isp_proc_handle_t isp_proc = NULL;
esp_isp_processor_cfg_t isp_config = {
.clk_hz = 80 * 1000 * 1000,
.input_data_source = ISP_INPUT_DATA_SOURCE_CSI,
.input_data_color_type = ISP_COLOR_RAW10,
.output_data_color_type = ISP_COLOR_YUV420,
.has_line_start_packet = false,
.has_line_end_packet = false,
.h_res = HRES,
.v_res = VRES,
.bayer_order = COLOR_RAW_ELEMENT_ORDER_GBRG,
};
ESP_ERROR_CHECK(esp_isp_new_processor(&isp_config, &isp_proc));
ESP_ERROR_CHECK(esp_isp_enable(isp_proc));
//---------------PPA Client Init------------------//
/// TODO: Go to a fail tag instead of using ESP_ERROR_CHECK
ppa_client_config_t ppa_srm_config = {
.oper_type = PPA_OPERATION_SRM,
.max_pending_trans_num = PPA_BUF_COUNT,
.data_burst_length = PPA_DATA_BURST_LENGTH_128,
};
ESP_ERROR_CHECK(ppa_register_client(&ppa_srm_config, &s_ppa));
ppa_event_callbacks_t ppa_cbs = {
.on_trans_done = on_frame_scaled,
};
ESP_ERROR_CHECK(ppa_client_register_event_callbacks(s_ppa, &ppa_cbs));Code: Select all
static void capture_task(void *arg) {
uint32_t frame_idx = 0;
uint8_t *frame = NULL;
uint64_t now = esp_timer_get_time();
uint64_t encoded = 0;
int frames_per_second = 0;
esp_h264_enc_out_frame_t *out = {0};
// Encoder parameter values (useful for debugging)
esp_h264_enc_param_hw_handle_t param_hd;
uint32_t set_bitrate = 0;
uint8_t set_fps = 0;
while (1) {
ESP_LOGV(TAG, "[%s] Receiving frame from s_free_frame_q", pcTaskGetName(NULL));
xQueueReceive(s_free_frame_q, &frame, portMAX_DELAY);
ESP_LOGV(TAG, "[%s] Got frame", pcTaskGetName(NULL));
esp_cam_ctlr_trans_t trans = {
.buffer = frame,
.buflen = FRAME_BYTES,
};
// Ask the camera sensor to fill the buffer
ESP_LOGV(TAG, "[%s] Receiving transaction from sensor", pcTaskGetName(NULL));
ESP_ERROR_CHECK(esp_cam_ctlr_receive(s_cam, &trans, ESP_CAM_CTLR_MAX_DELAY));
ESP_LOGV(TAG, "[%s] Receiving buffer from s_free_encoded_q", pcTaskGetName(NULL));
xQueueReceive(s_free_encoded_q, &out, portMAX_DELAY); // Get a free encoded buffer
s_srm_cfg.in.buffer = frame;
s_srm_cfg.out.buffer = out->raw_data.buffer;
s_srm_cfg.out.buffer_size = out->raw_data.len;
ESP_LOGV(TAG, "[%s] Scaling", pcTaskGetName(NULL));
ppa_do_scale_rotate_mirror(s_ppa, &s_srm_cfg);
ESP_LOGV(TAG, "[%s] PPA scaling started", pcTaskGetName(NULL));
xSemaphoreTake(ppa_semphr, portMAX_DELAY);
xQueueSendToBack(s_free_frame_q, &frame, portMAX_DELAY); // Return the sensor buffer
esp_h264_enc_in_frame_t in = {0};
in.pts = (frame_idx * 1000U) / H264_FPS; // ms timebase is fine for raw stream
in.raw_data.buffer = s_srm_cfg.out.buffer;
in.raw_data.len = SCALED_BYTES;
esp_h264_err_t er = esp_h264_enc_process(s_enc, &in, out); // Ask the encoder to fill it
// Check for encoder errors
if (er != ESP_H264_ERR_OK) {
ESP_LOGE(TAG, "[%s] H264 process failed (%d)", pcTaskGetName(NULL), (int)er);
xQueueSendToBack(s_free_encoded_q, &out, portMAX_DELAY);
/// TODO: Handle errors
} else {
// Print stream information every second
if (esp_timer_get_time() - now > 1000000ULL) {
esp_h264_enc_hw_get_param_hd(s_enc, ¶m_hd);
esp_h264_enc_get_bitrate(¶m_hd->base, &set_bitrate);
esp_h264_enc_get_fps(¶m_hd->base, &set_fps);
ESP_LOGI(TAG, "Bitrate = %lld bps = %lld kB/s, Set bitrate= %ld bps, FPS=%d, Set FPS=%d",
encoded * 8, encoded >> 10, set_bitrate, frames_per_second, set_fps);
encoded = frames_per_second = 0;
now = esp_timer_get_time();
}
encoded += out->length;
frames_per_second++;
// Send the encoded buffer to the queue to be written to the staging buffer
ESP_LOGV(TAG, "[%s] Sending buffer to s_filled_encoded_q", pcTaskGetName(NULL));
xQueueSendToBack(s_filled_encoded_q, &out, portMAX_DELAY);
}
frame_idx++;
ESP_LOGV(TAG, "[%s] Loop done, back to the beginning", pcTaskGetName(NULL));
}
}Code: Select all
static ppa_srm_oper_config_t s_srm_cfg = {
.in.pic_h = VRES,
.in.pic_w = HRES,
.in.block_h = VRES,
.in.block_w = HRES,
.in.block_offset_x = 0,
.in.block_offset_y = 0,
.in.srm_cm = PPA_SRM_COLOR_MODE_YUV420,
.out.pic_h = 480,
.out.pic_w = 640,
.out.buffer_size = SCALED_BYTES,
.out.block_offset_x = 0,
.out.block_offset_y = 0,
.out.srm_cm = PPA_SRM_COLOR_MODE_YUV420,
.rotation_angle = PPA_SRM_ROTATION_ANGLE_0,
.rgb_swap = 0,
.byte_swap = 0,
.mode = PPA_TRANS_MODE_NON_BLOCKING,
.scale_x = 0.5,
.scale_y = 0.5,
};