debug求助:如何配置raw_opus_decoder的参数,以与最简python server通信,播放音频
Posted: Sat May 10, 2025 6:29 am
板子:esp32s3
idf: 5.3.2
adf:2.4
代码逻辑:在server端,加载wav文件,读取后用opuslib_next.Encoder编码为raw opus帧,在esp32用raw_opus_decoder进行解码播放
问题:在server端,不管开不开启大端序长度前缀,在esp32不管怎么设置参数,都没有播放
已经确认的:
1. 设备正常:用pipeline_http_select_decoder例程可以正常播放https://github.com/espressif/esp-adf/bl ... ADME_CN.md
2. esp32接收到了opus帧
万分感谢
opus_play_server.py:
esp32:
idf: 5.3.2
adf:2.4
代码逻辑:在server端,加载wav文件,读取后用opuslib_next.Encoder编码为raw opus帧,在esp32用raw_opus_decoder进行解码播放
问题:在server端,不管开不开启大端序长度前缀,在esp32不管怎么设置参数,都没有播放
已经确认的:
1. 设备正常:用pipeline_http_select_decoder例程可以正常播放https://github.com/espressif/esp-adf/bl ... ADME_CN.md
2. esp32接收到了opus帧
万分感谢
opus_play_server.py:
Code: Select all
import os
import random
import wave
import numpy as np
import opuslib_next
import time
import struct
import asyncio
import websockets
import logging
import json
from threading import Thread
# 配置日志
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger("opus_server")
# 配置参数
WAV_DIR = 'recordings'
HOST = '0.0.0.0' # 监听所有网络接口
PORT = 8000
SAMPLE_RATE = 16000
CHANNELS = 1
FRAME_SIZE = 960 # 60ms @ 16kHz, 样本数
APPLICATION = opuslib_next.APPLICATION_VOIP
wav_files = [f for f in os.listdir(WAV_DIR) if f.endswith('16000hz.wav')]
def read_wav_file(file_path):
"""读取WAV文件并返回PCM数据"""
with wave.open(file_path, 'rb') as wav_file:
# 检查采样率和通道数
if wav_file.getframerate() != SAMPLE_RATE:
raise ValueError(f"WAV文件采样率必须为{SAMPLE_RATE}Hz")
if wav_file.getnchannels() != CHANNELS:
raise ValueError(f"WAV文件必须为{CHANNELS}通道")
# 读取所有音频数据
pcm_data = wav_file.readframes(wav_file.getnframes())
return pcm_data
def encode_opus(pcm_data):
"""将PCM数据编码为Opus格式"""
# 创建Opus编码器
encoder = opuslib_next.Encoder(SAMPLE_RATE, CHANNELS, APPLICATION)
# 将字节数据转换为16位整数数组
samples = np.frombuffer(pcm_data, dtype=np.int16)
# 按帧大小分割数据
frames = [samples[i:i+FRAME_SIZE] for i in range(0, len(samples), FRAME_SIZE)]
# 编码每一帧
encoded_frames = []
for frame in frames:
# 如果最后一帧不足FRAME_SIZE,则用0填充
if len(frame) < FRAME_SIZE:
frame = np.pad(frame, (0, FRAME_SIZE - len(frame)), 'constant')
# 编码
encoded = encoder.encode(frame.tobytes(), FRAME_SIZE)
frame_length = len(encoded)
# 添加长度前缀
# length_prefix = struct.pack('>H', frame_length) # 2字节大端序长度前缀
# length_prefix = frame_length.to_bytes(2, byteorder='big')
# encoded_frames.append(length_prefix + encoded)
return encoded_frames
async def handle_client(websocket):
"""处理WebSocket客户端连接"""
client_ip = websocket.remote_address[0]
logger.info(f"客户端已连接: {client_ip}")
try:
# 发送初始化消息
await websocket.send(json.dumps({
"type": "hello",
"status": "ok",
"sample_rate": SAMPLE_RATE,
"channels": CHANNELS
}))
while True:
# 获取WAV文件列表
if not wav_files:
logger.warning("没有找到WAV文件")
await websocket.send(json.dumps({
"type": "error",
"message": "没有可用的音频文件"
}))
await asyncio.sleep(5)
continue
# 随机选择一个WAV文件
wav_file = random.choice(wav_files)
wav_path = os.path.join(WAV_DIR, wav_file)
logger.info(f"正在发送文件: {wav_file}")
# 发送文件开始消息
await websocket.send(json.dumps({
"type": "file_start",
"filename": wav_file
}))
try:
# 读取并编码WAV文件
pcm_data = read_wav_file(wav_path)
encoded_frames = encode_opus(pcm_data)
# 发送编码后的数据
for frame in encoded_frames:
logger.info(len(frame))
await websocket.send(frame)
await asyncio.sleep(0.02) # 模拟实时流,每20ms发送一帧
logger.info(f"文件 {wav_file} 发送完成")
# 发送文件结束消息
await websocket.send(json.dumps({
"type": "file_end",
"filename": wav_file
}))
await asyncio.sleep(1) # 文件之间的间隔
except Exception as e:
logger.error(f"处理文件 {wav_file} 时出错: {e}")
await websocket.send(json.dumps({
"type": "error",
"message": f"处理文件出错: {str(e)}"
}))
break
except websockets.exceptions.ConnectionClosed as e:
logger.info(f"客户端断开连接: {client_ip}, 代码: {e.code}, 原因: {e.reason}")
except Exception as e:
logger.error(f"处理客户端 {client_ip} 时出错: {e}", exc_info=True)
async def start_server():
"""启动WebSocket服务器"""
server = await websockets.serve(
handle_client,
HOST,
PORT
)
logger.info(f"Opus WebSocket服务器启动在 ws://{HOST}:{PORT}")
return server
def main():
"""主函数"""
# 检查WAV目录是否存在
if not os.path.exists(WAV_DIR):
os.makedirs(WAV_DIR, exist_ok=True)
logger.info(f"创建WAV目录: {WAV_DIR}")
# 启动异步事件循环
loop = asyncio.get_event_loop()
server = loop.run_until_complete(start_server())
try:
loop.run_forever()
except KeyboardInterrupt:
logger.info("服务器关闭")
finally:
server.close()
loop.run_until_complete(server.wait_closed())
loop.close()
if __name__ == "__main__":
main()
esp32:
Code: Select all
/* 使用多种解码器解码 WebSocket 服务器的 Opus 音频 */
#include <string.h>
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "freertos/event_groups.h"
#include "esp_log.h"
#include "esp_wifi.h"
#include "nvs_flash.h"
#include "sdkconfig.h"
#include "audio_element.h"
#include "audio_pipeline.h"
#include "audio_event_iface.h"
#include "audio_common.h"
#include "esp_peripherals.h"
#include "periph_wifi.h"
#include "board.h"
#include "i2s_stream.h"
#include "raw_stream.h"
#include "filter_resample.h"
#include "ringbuf.h"
// 选择解码器类型
// 可选值:
// 1 - RAW_OPUS_DECODER (带帧长度前缀)
// 2 - RAW_OPUS_DECODER (不带帧长度前缀)
// 3 - OPUS_DECODER
// 4 - ESP_OPUS_DECODER
#define DECODER_TYPE 1
#if (DECODER_TYPE == 1)
#include "raw_opus_decoder.h"
static const char *TAG = "RAW_OPUS_WITH_PREFIX";
#elif (DECODER_TYPE == 2)
#include "raw_opus_decoder.h"
static const char *TAG = "RAW_OPUS_NO_PREFIX";
#elif (DECODER_TYPE == 3)
#include "opus_decoder.h"
static const char *TAG = "OPUS_DECODER";
#else
#error "请选择有效的解码器类型"
#endif
#include "esp_websocket_client.h"
#define WEBSOCKET_URI "ws://192.168.31.132:8000" // 修改为您的服务器地址
#define BUFFER_SIZE 1024
static audio_pipeline_handle_t pipeline;
static audio_element_handle_t raw_write;
static audio_element_handle_t opus_decoder;
static audio_element_handle_t i2s_writer;
static esp_websocket_client_handle_t client;
ringbuf_handle_t raw_in_rb;
ringbuf_handle_t opus_in_rb;
static EventGroupHandle_t s_wifi_event_group = NULL;
// 定义事件组位
#define WIFI_CONNECTED_BIT BIT0
#define WIFI_FAIL_BIT BIT1
// 音频配置
#define SAMPLE_RATE 16000
#define CHANNELS 1
// WebSocket事件处理函数
static void websocket_event_handler(void *handler_args, esp_event_base_t base, int32_t event_id, void *event_data)
{
esp_websocket_event_data_t *data = (esp_websocket_event_data_t *)event_data;
switch (event_id) {
case WEBSOCKET_EVENT_CONNECTED:
ESP_LOGI(TAG, "WEBSOCKET_EVENT_CONNECTED");
break;
case WEBSOCKET_EVENT_DISCONNECTED:
ESP_LOGI(TAG, "WEBSOCKET_EVENT_DISCONNECTED");
break;
case WEBSOCKET_EVENT_DATA:
// 区分文本数据和二进制数据
if (data->op_code == 2) { // 二进制数据的操作码为 2
ESP_LOGI(TAG, "接收到二进制数据, len=%d", data->data_len);
// 打印前8个字节用于分析数据格式
if (data->data_len >= 8) {
ESP_LOGI(TAG, "前8个字节: 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x",
data->data_ptr[0], data->data_ptr[1], data->data_ptr[2], data->data_ptr[3],
data->data_ptr[4], data->data_ptr[5], data->data_ptr[6], data->data_ptr[7]);
}
// 将二进制数据写入Opus解码器
if (data->data_len > 0) {
int bytes_written = raw_stream_write(raw_write, (char *)data->data_ptr, data->data_len);
ESP_LOGI(TAG, "写入 raw_stream %d 字节", bytes_written);
// 检查是否成功写入
if (bytes_written < data->data_len) {
ESP_LOGE(TAG, "写入 raw_stream 时发生错误,可能是缓冲区已满");
}
}
} else {
// 文本数据
ESP_LOGI(TAG, "接收到文本数据, len=%d, 内容: %.*s",
data->data_len, data->data_len, (char *)data->data_ptr);
}
break;
case WEBSOCKET_EVENT_ERROR:
ESP_LOGI(TAG, "WEBSOCKET_EVENT_ERROR");
break;
}
}
// WiFi事件处理
static void wifi_event_handler(void *arg, esp_event_base_t event_base, int32_t event_id, void *event_data)
{
if (event_base == WIFI_EVENT && event_id == WIFI_EVENT_STA_START) {
esp_wifi_connect();
} else if (event_base == WIFI_EVENT && event_id == WIFI_EVENT_STA_DISCONNECTED) {
ESP_LOGI(TAG, "WiFi断开连接,尝试重连...");
esp_wifi_connect();
xEventGroupClearBits(s_wifi_event_group, WIFI_CONNECTED_BIT);
xEventGroupSetBits(s_wifi_event_group, WIFI_FAIL_BIT);
} else if (event_base == IP_EVENT && event_id == IP_EVENT_STA_GOT_IP) {
ip_event_got_ip_t *event = (ip_event_got_ip_t *)event_data;
ESP_LOGI(TAG, "获取到IP地址:" IPSTR, IP2STR(&event->ip_info.ip));
xEventGroupSetBits(s_wifi_event_group, WIFI_CONNECTED_BIT);
}
}
// 初始化WiFi
static void init_wifi(void)
{
s_wifi_event_group = xEventGroupCreate();
ESP_ERROR_CHECK(esp_netif_init());
ESP_ERROR_CHECK(esp_event_loop_create_default());
esp_netif_create_default_wifi_sta();
wifi_init_config_t cfg = WIFI_INIT_CONFIG_DEFAULT();
ESP_ERROR_CHECK(esp_wifi_init(&cfg));
ESP_ERROR_CHECK(esp_event_handler_register(WIFI_EVENT, ESP_EVENT_ANY_ID, &wifi_event_handler, NULL));
ESP_ERROR_CHECK(esp_event_handler_register(IP_EVENT, IP_EVENT_STA_GOT_IP, &wifi_event_handler, NULL));
wifi_config_t wifi_config = {
.sta = {
.ssid = CONFIG_WIFI_SSID,
.password = CONFIG_WIFI_PASSWORD,
},
};
ESP_ERROR_CHECK(esp_wifi_set_mode(WIFI_MODE_STA));
ESP_ERROR_CHECK(esp_wifi_set_config(WIFI_IF_STA, &wifi_config));
ESP_ERROR_CHECK(esp_wifi_start());
ESP_LOGI(TAG, "WiFi初始化完成,等待连接...");
// 等待WiFi连接
EventBits_t bits = xEventGroupWaitBits(s_wifi_event_group,
WIFI_CONNECTED_BIT | WIFI_FAIL_BIT,
pdFALSE,
pdFALSE,
portMAX_DELAY);
if (bits & WIFI_CONNECTED_BIT) {
ESP_LOGI(TAG, "已连接到WiFi");
} else if (bits & WIFI_FAIL_BIT) {
ESP_LOGE(TAG, "WiFi连接失败");
} else {
ESP_LOGE(TAG, "意外错误");
}
}
void app_main(void)
{
esp_log_level_set("*", ESP_LOG_INFO);
esp_log_level_set(TAG, ESP_LOG_DEBUG);
ESP_LOGI(TAG, "[ 1 ] 初始化 NVS");
esp_err_t err = nvs_flash_init();
if (err == ESP_ERR_NVS_NO_FREE_PAGES) {
ESP_ERROR_CHECK(nvs_flash_erase());
err = nvs_flash_init();
}
ESP_ERROR_CHECK(err);
ESP_LOGI(TAG, "[ 2 ] 初始化外设");
// esp_periph_config_t periph_cfg = DEFAULT_ESP_PERIPH_SET_CONFIG();
// esp_periph_set_handle_t set = esp_periph_set_init(&periph_cfg);
ESP_LOGI(TAG, "[ 3 ] 初始化 Wi-Fi");
init_wifi();
ESP_LOGI(TAG, "[ 4 ] 创建音频管道");
audio_pipeline_cfg_t pipeline_cfg = DEFAULT_AUDIO_PIPELINE_CONFIG();
pipeline = audio_pipeline_init(&pipeline_cfg);
mem_assert(pipeline);
// 创建 raw_stream 作为数据输入点
raw_stream_cfg_t raw_cfg = RAW_STREAM_CFG_DEFAULT();
raw_cfg.type = AUDIO_STREAM_WRITER;
raw_write = raw_stream_init(&raw_cfg);
ESP_LOGI(TAG, "[ 5 ] 创建解码器");
#if (DECODER_TYPE == 1)
// RAW_OPUS_DECODER 带帧长度前缀
raw_opus_dec_cfg_t opus_cfg = RAW_OPUS_DEC_CONFIG_DEFAULT();
opus_cfg.sample_rate = SAMPLE_RATE;
opus_cfg.channels = CHANNELS;
opus_cfg.dec_frame_size = 960; // 60ms @ 16kHz = 960 samples
opus_cfg.enable_frame_length_prefix = true; // 启用帧长度前缀
opus_decoder = raw_opus_decoder_init(&opus_cfg);
ESP_LOGI(TAG, "使用 RAW_OPUS_DECODER (带帧长度前缀)");
#elif (DECODER_TYPE == 2)
// RAW_OPUS_DECODER 不带帧长度前缀
raw_opus_dec_cfg_t opus_cfg = RAW_OPUS_DEC_CONFIG_DEFAULT();
opus_cfg.sample_rate = SAMPLE_RATE;
opus_cfg.channels = CHANNELS;
opus_cfg.dec_frame_size = 960; // 60ms @ 16kHz = 960 samples
opus_cfg.enable_frame_length_prefix = false; // 禁用帧长度前缀
opus_cfg.self_delimited = true; // 尝试使用自分隔模式
opus_decoder = raw_opus_decoder_init(&opus_cfg);
ESP_LOGI(TAG, "使用 RAW_OPUS_DECODER (不带帧长度前缀)");
#elif (DECODER_TYPE == 3)
// 标准 OPUS_DECODER
opus_decoder_cfg_t opus_cfg = DEFAULT_OPUS_DECODER_CONFIG();
opus_decoder = decoder_opus_init(&opus_cfg);
ESP_LOGI(TAG, "使用标准 OPUS_DECODER");
#endif
ESP_LOGI(TAG, "[ 6 ] 创建 I2S 流");
i2s_stream_cfg_t i2s_cfg = I2S_STREAM_CFG_DEFAULT();
i2s_cfg.type = AUDIO_STREAM_WRITER;
// 增加 DMA 缓冲区配置
i2s_cfg.chan_cfg.dma_desc_num = 8; // 增加 DMA 缓冲区数量
i2s_cfg.chan_cfg.dma_frame_num = 1024; // 增加 DMA 缓冲区长度
// 设置采样率、位深度和通道
i2s_cfg.std_cfg.clk_cfg.sample_rate_hz = SAMPLE_RATE;
i2s_cfg.std_cfg.slot_cfg.data_bit_width = I2S_DATA_BIT_WIDTH_16BIT;
i2s_cfg.std_cfg.slot_cfg.slot_mode = I2S_SLOT_MODE_MONO;
i2s_cfg.out_rb_size = 16 * 1024;
i2s_writer = i2s_stream_init(&i2s_cfg);
ESP_LOGI(TAG, "[ 7 ] 注册所有元素到音频管道");
audio_pipeline_register(pipeline, raw_write, "raw");
audio_pipeline_register(pipeline, opus_decoder, "opus");
audio_pipeline_register(pipeline, i2s_writer, "i2s");
ESP_LOGI(TAG, "[ 8 ] 链接元素: [raw] --> [opus] --> [i2s]");
const char *link_tag[3] = {"raw", "opus", "i2s"};
audio_pipeline_link(pipeline, &link_tag[0], 3);
ESP_LOGI(TAG, "[ 9 ] 设置事件监听器");
audio_event_iface_cfg_t evt_cfg = AUDIO_EVENT_IFACE_DEFAULT_CFG();
evt_cfg.queue_set_size = 20;
audio_event_iface_handle_t evt = audio_event_iface_init(&evt_cfg);
audio_pipeline_set_listener(pipeline, evt);
ESP_LOGI(TAG, "[ 10 ] 启动音频管道");
audio_pipeline_run(pipeline);
ESP_LOGI(TAG, "[ 11 ] 初始化WebSocket客户端");
esp_websocket_client_config_t websocket_cfg = {
.uri = WEBSOCKET_URI,
};
client = esp_websocket_client_init(&websocket_cfg);
esp_websocket_register_events(client, WEBSOCKET_EVENT_ANY, websocket_event_handler, NULL);
ESP_LOGI(TAG, "[ 12 ] 启动WebSocket客户端");
esp_websocket_client_start(client);
ESP_LOGI(TAG, "[ 13 ] 监听事件");
while (1) {
audio_event_iface_msg_t msg;
esp_err_t ret = audio_event_iface_listen(evt, &msg, 100 / portTICK_PERIOD_MS);
if (ret != ESP_OK) {
ESP_LOGE(TAG, "[ * ] 事件接口错误: %d", ret);
// 增加短暂延迟,避免CPU占用过高
vTaskDelay(pdMS_TO_TICKS(10));
continue;
}
if (msg.source_type == AUDIO_ELEMENT_TYPE_ELEMENT && msg.source == (void *)opus_decoder
&& msg.cmd == AEL_MSG_CMD_REPORT_MUSIC_INFO) {
audio_element_info_t music_info = {0};
audio_element_getinfo(opus_decoder, &music_info);
ESP_LOGI(TAG, "[ * ] 接收到音频信息,采样率: %d, 通道数: %d",
music_info.sample_rates, music_info.channels);
i2s_stream_set_clk(i2s_writer, music_info.sample_rates, music_info.bits, music_info.channels);
continue;
}
/* 处理管道事件 */
if (msg.source_type == AUDIO_ELEMENT_TYPE_ELEMENT && msg.cmd == AEL_MSG_CMD_REPORT_STATUS) {
audio_element_state_t el_state = audio_element_get_state(msg.source);
if (el_state == AEL_STATE_FINISHED) {
ESP_LOGI(TAG, "[ * ] 元素已完成: %s", (char *)msg.data);
}
}
}
// 以下代码永远不会执行,但保留以供参考
ESP_LOGI(TAG, "[ 14 ] 停止音频管道");
audio_pipeline_stop(pipeline);
audio_pipeline_wait_for_stop(pipeline);
audio_pipeline_terminate(pipeline);
/* 释放资源 */
audio_pipeline_unregister(pipeline, opus_decoder);
audio_pipeline_unregister(pipeline, i2s_writer);
audio_pipeline_remove_listener(pipeline);
audio_event_iface_destroy(evt);
audio_pipeline_deinit(pipeline);
audio_element_deinit(opus_decoder);
audio_element_deinit(i2s_writer);
esp_websocket_client_stop(client);
esp_websocket_client_destroy(client);
}