ESP32 Forum

Posted: **Sat May 10, 2025 6:45 am**

On the server side, load the wav file, read and encode it into raw opus frames with opuslib_next.Encoder, then decode and play using raw_opus_decoder on esp32

Problem: On the server side, no matter I use the big endian length prefix or not, and no matter how the parameters are set on esp32, it will not play the audio

Details:
- device:esp32s3
- idf: 5.3.2
- adf:2.4

What i've checked:
1. the speaker can play audio in another example: https://github.com/espressif/esp-adf/bl ... /README.md
2. esp32 has received the opus frames
3. add big endian length prefix on server side or not, set opus_cfg.enable_frame_length_prefix on esp32 side to true or not, the device will not play the audio.
4. in another test, opus encoded by esp-adf raw_opus_encoder can be decoded using opuslib_next.Decoder on server side, so I believe raw_opus_decoder can decode opus frames created by opuslib_next.Encoder

Thanks a lot!

Essential codes:

opus_play_server.py：

Code: Select all

def encode_opus(pcm_data):
    """encode pcm data into raw opus frames"""
    encoder = opuslib_next.Encoder(SAMPLE_RATE, CHANNELS, APPLICATION)
    
    # bytes to int16
    samples = np.frombuffer(pcm_data, dtype=np.int16)
    
    # segment data according to FRAME_SIZE
    frames = [samples[i:i+FRAME_SIZE] for i in range(0, len(samples), FRAME_SIZE)]
    
    # encode pcm frames into opus
    encoded_frames = []
    for frame in frames:
        # zero padding for the last frame
        if len(frame) < FRAME_SIZE:
            frame = np.pad(frame, (0, FRAME_SIZE - len(frame)), 'constant')
        
        encoded = encoder.encode(frame.tobytes(), FRAME_SIZE)
        frame_length = len(encoded)
        
        # add big endian length prefix, 2 bytes
        # length_prefix = struct.pack('>H', frame_length)
        # length_prefix = frame_length.to_bytes(2, byteorder='big')
        # encoded_frames.append(length_prefix + encoded)
    
    return encoded_frames

esp32:

Code: Select all

// RAW_OPUS_DECODER with length prefix
raw_opus_dec_cfg_t opus_cfg = RAW_OPUS_DEC_CONFIG_DEFAULT();
opus_cfg.sample_rate = SAMPLE_RATE;
opus_cfg.channels = CHANNELS;
opus_cfg.dec_frame_size = 960;  // 60ms @ 16kHz = 960 samples
opus_cfg.enable_frame_length_prefix = true; // with length prefix
opus_decoder = raw_opus_decoder_init(&opus_cfg);

Full codes:

opus_play_server.py：

Code: Select all

import os
import random
import wave
import numpy as np
import opuslib_next
import time
import struct
import asyncio
import websockets
import logging
import json
from threading import Thread

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger("opus_server")

WAV_DIR = 'recordings'
HOST = '0.0.0.0'
PORT = 8000
SAMPLE_RATE = 16000
CHANNELS = 1
FRAME_SIZE = 960  # 60ms @ 16kHz, 16000 * (60 / 1000) = 960
APPLICATION = opuslib_next.APPLICATION_VOIP

wav_files = [f for f in os.listdir(WAV_DIR) if f.endswith('16000hz.wav')]


def read_wav_file(file_path):
    """load .wav and return pcm data"""
    with wave.open(file_path, 'rb') as wav_file:
        # check sample rate and channels
        if wav_file.getframerate() != SAMPLE_RATE:
            raise ValueError(f"sample rate doesn't match, expected: {SAMPLE_RATE}Hz")
        if wav_file.getnchannels() != CHANNELS:
            raise ValueError(f"channels doesn't match, expected: {CHANNELS}")
        
        # read all the files
        pcm_data = wav_file.readframes(wav_file.getnframes())
        
    return pcm_data


def encode_opus(pcm_data):
    """encode pcm into raw opus"""
    encoder = opuslib_next.Encoder(SAMPLE_RATE, CHANNELS, APPLICATION)
    
    # bytes to int16
    samples = np.frombuffer(pcm_data, dtype=np.int16)
    
    # segment data according to FRAME_SIZE
    frames = [samples[i:i+FRAME_SIZE] for i in range(0, len(samples), FRAME_SIZE)]
    
    # encode pcm frames into opus
    encoded_frames = []
    for frame in frames:
        # zero padding for the last frame
        if len(frame) < FRAME_SIZE:
            frame = np.pad(frame, (0, FRAME_SIZE - len(frame)), 'constant')
        
        encoded = encoder.encode(frame.tobytes(), FRAME_SIZE)
        frame_length = len(encoded)
        
        # add big endian length prefix, 2 bytes
        # length_prefix = struct.pack('>H', frame_length)
        # length_prefix = frame_length.to_bytes(2, byteorder='big')
        # encoded_frames.append(length_prefix + encoded)
    
    return encoded_frames


async def handle_client(websocket):
    """handle WebSocket client connections"""
    client_ip = websocket.remote_address[0]
    logger.info(f"client connected: {client_ip}")
    
    try:
        # send hello
        await websocket.send(json.dumps({
            "type": "hello",
            "status": "ok",
            "sample_rate": SAMPLE_RATE,
            "channels": CHANNELS
        }))
        
        while True:
            # get local wav files
            if not wav_files:
                logger.warning("no wav files are found")
                await websocket.send(json.dumps({
                    "type": "error",
                    "message": "no available wav files"
                }))
                await asyncio.sleep(5)
                continue
            
            # pick a wav file
            wav_file = random.choice(wav_files)
            wav_path = os.path.join(WAV_DIR, wav_file)
            logger.info(f"sending file: {wav_file}")
            
            await websocket.send(json.dumps({
                "type": "file_start",
                "filename": wav_file
            }))
            
            try:
                # load and encode
                pcm_data = read_wav_file(wav_path)
                encoded_frames = encode_opus(pcm_data)
                
                # send encoded frames
                for frame in encoded_frames:
                    logger.info(len(frame))
                    await websocket.send(frame)
                    await asyncio.sleep(0.02)  # send a frame every 20ms
                
                logger.info(f"done sending {wav_file}")
                
                # send file end
                await websocket.send(json.dumps({
                    "type": "file_end",
                    "filename": wav_file

esp32:

Code: Select all

/* test different settings of opus decoder to decode audio from WebSocket server */

#include <string.h>
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "freertos/event_groups.h"
#include "esp_log.h"
#include "esp_wifi.h"
#include "nvs_flash.h"
#include "sdkconfig.h"
#include "audio_element.h"
#include "audio_pipeline.h"
#include "audio_event_iface.h"
#include "audio_common.h"
#include "esp_peripherals.h"
#include "periph_wifi.h"
#include "board.h"
#include "i2s_stream.h"
#include "raw_stream.h"
#include "filter_resample.h"
#include "ringbuf.h"

// select different type of opus decoder
// 1 - RAW_OPUS_DECODER (with length prefix)
// 2 - RAW_OPUS_DECODER (without length prefix)
// 3 - OPUS_DECODER
#define DECODER_TYPE 1

#if (DECODER_TYPE == 1)
#include "raw_opus_decoder.h"
static const char *TAG = "RAW_OPUS_WITH_PREFIX";
#elif (DECODER_TYPE == 2)
#include "raw_opus_decoder.h"
static const char *TAG = "RAW_OPUS_NO_PREFIX";
#elif (DECODER_TYPE == 3)
#include "opus_decoder.h"
static const char *TAG = "OPUS_DECODER";
#else
#error "select a valid decoder first"
#endif

#include "esp_websocket_client.h"

#define WEBSOCKET_URI "ws://192.168.31.132:8000"
#define BUFFER_SIZE 1024

static audio_pipeline_handle_t pipeline;
static audio_element_handle_t raw_write;
static audio_element_handle_t opus_decoder;
static audio_element_handle_t i2s_writer;
static esp_websocket_client_handle_t client;

ringbuf_handle_t raw_in_rb;
ringbuf_handle_t opus_in_rb;

static EventGroupHandle_t s_wifi_event_group = NULL;
#define WIFI_CONNECTED_BIT BIT0
#define WIFI_FAIL_BIT      BIT1

#define SAMPLE_RATE     16000
#define CHANNELS        1

static void websocket_event_handler(void *handler_args, esp_event_base_t base, int32_t event_id, void *event_data)
{
    esp_websocket_event_data_t *data = (esp_websocket_event_data_t *)event_data;
    
    switch (event_id) {
        case WEBSOCKET_EVENT_CONNECTED:
            ESP_LOGI(TAG, "WEBSOCKET_EVENT_CONNECTED");
            break;
        case WEBSOCKET_EVENT_DISCONNECTED:
            ESP_LOGI(TAG, "WEBSOCKET_EVENT_DISCONNECTED");
            break;
        case WEBSOCKET_EVENT_DATA:
            if (data->op_code == 2) {  // binary data
                ESP_LOGI(TAG, "receive binary data, len=%d", data->data_len);
                
                // print the frist 8 bytes to debug
                if (data->data_len >= 8) {
                    ESP_LOGI(TAG, "frist 8 bytes: 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x",
                        data->data_ptr[0], data->data_ptr[1], data->data_ptr[2], data->data_ptr[3],
                        data->data_ptr[4], data->data_ptr[5], data->data_ptr[6], data->data_ptr[7]);
                }
                
                // write into opus decoder
                if (data->data_len > 0) {
                    int bytes_written = raw_stream_write(raw_write, (char *)data->data_ptr, data->data_len);
                    ESP_LOGI(TAG, "write to raw_stream, written: %d bytes", bytes_written);
                    // check write result
                    if (bytes_written < data->data_len) {
                        ESP_LOGE(TAG, "error writing data into raw_stream，maybe the buffer is full");
                    }
                }
            } else {
                // text data
                ESP_LOGI(TAG, "receive text data, len=%d, content: %.*s", 
                         data->data_len, data->data_len, (char *)data->data_ptr);
            }
            break;
        case WEBSOCKET_EVENT_ERROR:
            ESP_LOGI(TAG, "WEBSOCKET_EVENT_ERROR");
            break;
    }
}

static void wifi_event_handler(void *arg, esp_event_base_t event_base, int32_t event_id, void *event_data)
{
    if (event_base == WIFI_EVENT && event_id == WIFI_EVENT_STA_START) {
        esp_wifi_connect();
    } else if (event_base == WIFI_EVENT && event_id == WIFI_EVENT_STA_DISCONNECTED) {
        ESP_LOGI(TAG, "WiFi disconnected，trying to reconnect...");
        esp_wifi_connect();
        xEventGroupClearBits(s_wifi_event_group, WIFI_CONNECTED_BIT);
        xEventGroupSetBits(s_wifi_event_group, WIFI_FAIL_BIT);
    } else if (event_base == IP_EVENT && event_id == IP_EVENT_STA_GOT_IP) {
        ip_event_got_ip_t *event = (ip_event_got_ip_t *)event_data;
        ESP_LOGI(TAG, "Got IP address:" IPSTR, IP2STR(&event->ip_info.ip));
        xEventGroupSetBits(s_wifi_event_group, WIFI_CONNECTED_BIT);
    }
}

static void init_wifi(void)
{
    s_wifi_event_group = xEventGroupCreate();
    
    ESP_ERROR_CHECK(esp_netif_init());
    ESP_ERROR_CHECK(esp_event_loop_create_default());
    esp_netif_create_default_wifi_sta();
    
    wifi_init_config_t cfg = WIFI_INIT_CONFIG_DEFAULT();
    ESP_ERROR_CHECK(esp_wifi_init(&cfg));
    
    ESP_ERROR_CHECK(esp_event_handler_register(WIFI_EVENT, ESP_EVENT_ANY_ID, &wifi_event_handler, NULL));
    ESP_ERROR_CHECK(esp_event_handler_register(IP_EVENT, IP_EVENT_STA_GOT_IP, &wifi_event_handler, NULL));
    
    wifi_config_t wifi_config = {
        .sta = {
            .ssid = CONFIG_WIFI_SSID,
            .password = CONFIG_WIFI_PASSWORD,
        },
    };
    
    ESP_ERROR_CHECK(esp_wifi_set_mode(WIFI_MODE_STA));
    ESP_ERROR_CHECK(esp_wifi_set_config(WIFI_IF_STA, &wifi_config));
    ESP_ERROR_CHECK(esp_wifi_start());
    
    ESP_LOGI(TAG, "WiFi initialized, waiting for connections...");
    
    // wait for connections
    EventBits_t bits = xEventGroupWaitBits(s_wifi_event_group,
                                          WIFI_CONNECTED_BIT | WIFI_FAIL_BIT,
                                          pdFALSE,
                                          pdFALSE,
                                          portMAX_DELAY);
    
    if (bits & WIFI_CONNECTED_BIT) {
        ESP_LOGI(TAG, "WiFi Connected");
    } else if (bits & WIFI_FAIL_BIT) {
        ESP_LOGE(TAG, "fail to connect to WiFi");
    } else {
        ESP_LOGE(TAG, "unexpected error");
    }
}

void app_main(void)
{
    esp_log_level_set("*", ESP_LOG_INFO);
    esp_log_level_set(TAG, ESP_LOG_DEBUG);

    ESP_LOGI(TAG, "[ 1 ] initializing NVS");
    esp_err_t err = nvs_flash_init();
    if (err == ESP_ERR_NVS_NO_FREE_PAGES) {
        ESP_ERROR_CHECK(nvs_flash_erase());
        err = nvs_flash_init();
    }
    ESP_ERROR_CHECK(err);

    ESP_LOGI(TAG, "[ 2 ] initializing peripherals");
    // esp_periph_config_t periph_cfg = DEFAULT_ESP_PERIPH_SET_CONFIG();
    // esp_periph_set_handle_t set = esp_periph_set_init(&periph_cfg);

    ESP_LOGI(TAG, "[ 3 ] initializing Wi-Fi");
    init_wifi();

    ESP_LOGI(TAG, "[ 4 ] setup audio pipeline");
    audio_pipeline_cfg_t pipeline_cfg = DEFAULT_AUDIO_PIPELINE_CONFIG();
    pipeline = audio_pipeline_init(&pipeline_cfg);
    mem_assert(pipeline);

    // setup raw_stream as a data entry
    raw_stream_cfg_t raw_cfg = RAW_STREAM_CFG_DEFAULT();
    raw_cfg.type = AUDIO_STREAM_WRITER;
    raw_write = raw_stream_init(&raw_cfg);

    ESP_LOGI(TAG, "[ 5 ] setup decoder");
#if (DECODER_TYPE == 1)
    // RAW_OPUS_DECODER with length prefix
    raw_opus_dec_cfg_t opus_cfg = RAW_OPUS_DEC_CONFIG_DEFAULT();
    opus_cfg.sample_rate = SAMPLE_RATE;
    opus_cfg.channels = CHANNELS;
    opus_cfg.dec_frame_size = 960;  // 60ms @ 16kHz = 960 samples
    opus_cfg.enable_frame_length_prefix = true; // with length prefix
    opus_decoder = raw_opus_decoder_init(&opus_cfg);
    ESP_LOGI(TAG, "using RAW_OPUS_DECODER (with length prefix)");
#elif (DECODER_TYPE == 2)
    // RAW_OPUS_DECODER without length prefix
    raw_opus_dec_cfg_t opus_cfg = RAW_OPUS_DEC_CONFIG_DEFAULT();
    opus_cfg.sample_rate = SAMPLE_RATE;
    opus_cfg.channels = CHANNELS;
    opus_cfg.dec_frame_size = 960;  // 60ms @ 16kHz = 960 samples
    opus_cfg.enable_frame_length_prefix = false; // disable length prefix
    opus_cfg.self_delimited = true;
    opus_decoder = raw_opus_decoder_init(&opus_cfg);
    ESP_LOGI(TAG, "using RAW_OPUS_DECODER (without length prefix)");
#elif (DECODER_TYPE == 3)
    // standard OPUS_DECODER(ogg)
    opus_decoder_cfg_t opus_cfg = DEFAULT_OPUS_DECODER_CONFIG();
    opus_decoder = decoder_opus_init(&opus_cfg);
    ESP_LOGI(TAG, "using standard OPUS_DECODER(ogg)");
#endif

    ESP_LOGI(TAG, "[ 6 ] setup i2s stream");
    i2s_stream_cfg_t i2s_cfg = I2S_STREAM_CFG_DEFAULT();
    i2s_cfg.type = AUDIO_STREAM_WRITER;
    // dma buffer setup
    i2s_cfg.chan_cfg.dma_desc_num = 8;
    i2s_cfg.chan_cfg.dma_frame_num = 1024;
    // sample rate、bit width、channel
    i2s_cfg.std_cfg.clk_cfg.sample_rate_hz = SAMPLE_RATE;
    i2s_cfg.std_cfg.slot_cfg.data_bit_width = I2S_DATA_BIT_WIDTH_16BIT;
    i2s_cfg.std_cfg.slot_cfg.slot_mode = I2S_SLOT_MODE_MONO;
    i2s_cfg.out_rb_size = 16 * 1024;
    i2s_writer = i2s_stream_init(&i2s_cfg);

    ESP_LOGI(TAG, "[ 7 ] registering all elements to pipeline");
    audio_pipeline_register(pipeline, raw_write, "raw");
    audio_pipeline_register(pipeline, opus_decoder, "opus");
    audio_pipeline_register(pipeline, i2s_writer, "i2s");

    ESP_LOGI(TAG, "[ 8 ] linking elements: [raw] --> [opus] --> [i2s]");
    const char *link_tag[3] = {"raw", "opus", "i2s"};
    audio_pipeline_link(pipeline, &link_tag[0], 3);

    ESP_LOGI(TAG, "[ 9 ] setup event listener");
    audio_event_iface_cfg_t evt_cfg = AUDIO_EVENT_IFACE_DEFAULT_CFG();
    evt_cfg.queue_set_size = 20;
    audio_event_iface_handle_t evt = audio_event_iface_init(&evt_cfg);
    audio_pipeline_set_listener(pipeline, evt);

    ESP_LOGI(TAG, "[ 10 ] launch the audio pipeline");
    audio_pipeline_run(pipeline);

    ESP_LOGI(TAG, "[ 11 ] initializing WebSocket client");
    esp_websocket_client_config_t websocket_cfg = {
        .uri = WEBSOCKET_URI,
    };
    client = esp_websocket_client_init(&websocket_cfg);
    esp_websocket_register_events(client, WEBSOCKET_EVENT_ANY, websocket_event_handler, NULL);

    ESP_LOGI(TAG, "[ 12 ] starting WebSocket client");
    esp_websocket_client_start(client);

    ESP_LOGI(TAG, "[ 13 ] listen for events");
    while (1) {
        audio_event_iface_msg_t msg;
        esp_err_t ret = audio_event_iface_listen(evt, &msg, 100 / portTICK_PERIOD_MS);
        if (ret != ESP_OK) {
            ESP_LOGE(TAG, "[ * ] event iface error: %d", ret);
            // small delay to prevent cpu overload
            vTaskDelay(pdMS_TO_TICKS(10));
            continue;
        }

        if (msg.source_type == AUDIO_ELEMENT_TYPE_ELEMENT && msg.source == (void *)opus_decoder
            && msg.cmd == AEL_MSG_CMD_REPORT_MUSIC_INFO) {
            audio_element_info_t music_info = {0};
            audio_element_getinfo(opus_decoder, &music_info);
            ESP_LOGI(TAG, "[ * ] audio info received，sample rate: %d, channels: %d", 
                     music_info.sample_rates, music_info.channels);
            i2s_stream_set_clk(i2s_writer, music_info.sample_rates, music_info.bits, music_info.channels);
            continue;
        }

        /* handle pipeline event */
        if (msg.source_type == AUDIO_ELEMENT_TYPE_ELEMENT && msg.cmd == AEL_MSG_CMD_REPORT_STATUS) {
            audio_element_state_t el_state = audio_element_get_state(msg.source);
            if (el_state == AEL_STATE_FINISHED) {
                ESP_LOGI(TAG, "[ * ] element done processing: %s", (char *)msg.data);
            }
        }
    }

}

Posted: **Thu May 15, 2025 11:21 am**

Thanks for sharing! I’m running into the same issue with raw_opus_decoder on ESP32S3 — server sends frames fine, ESP receives them, but no audio plays. Tried both with and without length prefix. Curious if you’ve solved this — any tips would be much appreciated!

Posted: **Mon May 19, 2025 2:54 am**

Thanks for sharing! I’m running into the same issue with raw_opus_decoder on ESP32S3 — server sends frames fine, ESP receives them, but no audio plays. Tried both with and without length prefix. Curious if you’ve solved this — any tips would be much appreciated!

did't solve it but find a workaround

reference: https://github.com/espressif/esp-adf/bl ... mple.c#L64

key idea: convert wav to opus file using pydub(rely on ffmpeg)， and then stream it to esp32, which use opus_decoder (rather than raw_opus_decoder) to decode and play

server:

Code: Select all

from pydub import AudioSegment
import os

def wav_to_opus(input_file, output_file=None, bitrate='64k'):
    """
    convert wav file to opus using pydub
    """
    if not os.path.exists(input_file):
        raise FileNotFoundError(f"file not found: {input_file}")
    
    if output_file is None:
        base = os.path.splitext(input_file)[0]
        output_file = f"{base}.opus"
    
    # load wav and export to opus
    audio = AudioSegment.from_wav(input_file)
    
    audio.export(output_file, 
                 format='opus',
                 codec='libopus',
                 bitrate=bitrate)
    
    return output_file
    
async def stream_wav_file(self, websocket, filename):
        """load and convert wav to opus, then stream the result file"""
        wav_path = os.path.join(self.wav_dir, filename)
        
        if not os.path.exists(wav_path):
            await websocket.send(json.dumps({
                "type": "error",
                "message": f"file not found: {filename}"
            }))
            return
        
        try:
            await websocket.send(json.dumps({
                "type": "file_start",
                "filename": filename
            }))
            
            with wave.open(wav_path, 'rb') as wav_file:
                channels = wav_file.getnchannels()
                sample_rate = wav_file.getframerate()
                sample_width = wav_file.getsampwidth()
                
                logger.info(f"WAV info: channels={channels}, sample_rate={sample_rate}, sample_width={sample_width*8}")
            
            # temp opus
            base_name = os.path.splitext(filename)[0]
            opus_filename = f"{base_name}.opus"
            opus_path = os.path.join(self.tmp_dir, opus_filename)
            
            logger.info(f"convert wav to  opus: {wav_path} -> {opus_path}")
            wav_to_opus(wav_path, opus_path, bitrate='64k')
            logger.info(f"convertion done: {opus_path}")
            
            with open(opus_path, 'rb') as opus_file:
                opus_data = opus_file.read()
            
            logger.info(f"Opus Read Successfully: {opus_filename}, size: {len(opus_data)} bytes")
            
            # send opus in chunks
            chunk_size = 4096
            for i in range(0, len(opus_data), chunk_size):
                chunk = opus_data[i:i+chunk_size]
                await websocket.send(chunk)
                await asyncio.sleep(0.05)  # small delay
            
            await websocket.send(json.dumps({
                "type": "file_end",
                "filename": filename
            }))
            
            logger.info(f"done streaming file: {filename}")
            
            if os.path.exists(opus_path):
                os.remove(opus_path)
                logger.info(f"temp file deleted: {opus_path}")
        
        except Exception as e:
            logger.error(f"error streaming {filename}: {e}", exc_info=True)
            await websocket.send(json.dumps({
                "type": "error",
                "message": f"error streaming: {str(e)}"
            }))
            
            opus_path = os.path.join(self.tmp_dir, f"{os.path.splitext(filename)[0]}.opus")
            if os.path.exists(opus_path):
                os.remove(opus_path)
                logger.info(f"temp file deleted: {opus_path}")

esp32:

Code: Select all

#include "opus_decoder.h"
#include "audio_element.h"
#include "audio_pipeline.h"

static char const* TAG = "AUDIO_PLAY";

extern audio_pipeline_handle_t play_pipeline;
extern audio_element_handle_t raw_write, i2s_stream_writer, opus_decoder;

esp_err_t init_play_pipeline(void)
{
    audio_pipeline_cfg_t pipeline_cfg = DEFAULT_AUDIO_PIPELINE_CONFIG();
    play_pipeline = audio_pipeline_init(&pipeline_cfg);
    if (play_pipeline == NULL) {
        return ESP_FAIL;
    }

    raw_stream_cfg_t raw_cfg = RAW_STREAM_CFG_DEFAULT();
    raw_cfg.type = AUDIO_STREAM_WRITER;
    raw_cfg.out_rb_size = 128 * 1024;
    raw_write = raw_stream_init(&raw_cfg);
    if (raw_write == NULL) {
        audio_pipeline_deinit(play_pipeline);
        return ESP_FAIL;
    }

    opus_decoder_cfg_t opus_cfg = DEFAULT_OPUS_DECODER_CONFIG();
    opus_decoder = decoder_opus_init(&opus_cfg);
    if (opus_decoder == NULL) {
        audio_element_deinit(raw_write);
        audio_pipeline_deinit(play_pipeline);
        return ESP_FAIL;
    }

    i2s_stream_cfg_t i2s_cfg = I2S_STREAM_CFG_DEFAULT();
    i2s_cfg.type = AUDIO_STREAM_WRITER;
    i2s_cfg.chan_cfg.dma_desc_num = 8;
    i2s_cfg.chan_cfg.dma_frame_num = 1024;
    i2s_cfg.std_cfg.clk_cfg.sample_rate_hz = 48000;
    i2s_cfg.std_cfg.slot_cfg.data_bit_width = I2S_DATA_BIT_WIDTH_16BIT;
    i2s_cfg.std_cfg.slot_cfg.slot_mode = I2S_SLOT_MODE_MONO;
    i2s_stream_writer = i2s_stream_init(&i2s_cfg);
    if (i2s_stream_writer == NULL) {
        audio_element_deinit(opus_decoder);
        audio_element_deinit(raw_write);
        audio_pipeline_deinit(play_pipeline);
        return ESP_FAIL;
    }

    audio_pipeline_register(play_pipeline, raw_write, "raw");
    audio_pipeline_register(play_pipeline, opus_decoder, "opus_decode");
    audio_pipeline_register(play_pipeline, i2s_stream_writer, "i2s_write");

    char const* link_tag[3] = {"raw", "opus_decode", "i2s_write"};
    audio_pipeline_link(play_pipeline, &link_tag[0], 3);

    esp_err_t ret = audio_pipeline_run(play_pipeline);
    if (ret != ESP_OK) {
        audio_pipeline_deinit(play_pipeline);
        return ESP_FAIL;
    }

    return ESP_OK;
}

ESP32 Forum

How to configure raw_opus_decoder to play audio from a simple Python server?

How to configure raw_opus_decoder to play audio from a simple Python server?

Re: Need help: How to configure raw_opus_decoder to communicate with the simplest python server and play audio

Re: Need help: How to configure raw_opus_decoder to communicate with the simplest python server and play audio