From 7fccab935cba0c2b667513d0ae9c3aa449fda3d0 Mon Sep 17 00:00:00 2001 From: Your Name Date: Fri, 20 Feb 2026 23:49:31 -0800 Subject: [PATCH] Fix #2793: convert stereo/multi-channel audio to mono in extract_fbank --- funasr/utils/load_utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/funasr/utils/load_utils.py b/funasr/utils/load_utils.py index d208f7dc0..86bbaa466 100644 --- a/funasr/utils/load_utils.py +++ b/funasr/utils/load_utils.py @@ -200,10 +200,14 @@ def extract_fbank(data, data_len=None, data_type: str = "sound", frontend=None, data = torch.from_numpy(data) if len(data.shape) < 2: data = data[None, :] # data: [batch, N] + elif data.shape[0] > 1: + data = data.mean(dim=0, keepdim=True) # convert stereo/multi-channel to mono data_len = [data.shape[1]] if data_len is None else data_len elif isinstance(data, torch.Tensor): if len(data.shape) < 2: data = data[None, :] # data: [batch, N] + elif data.shape[0] > 1: + data = data.mean(dim=0, keepdim=True) # convert stereo/multi-channel to mono data_len = [data.shape[1]] if data_len is None else data_len elif isinstance(data, (list, tuple)): data_list, data_len = [], []