//! 音频解码模块 //! //! 使用 hound 库解码 WAV 文件 use anyhow::{Context, Result}; use std::path::Path; use tracing::info; use crate::audio::AudioData; /// 解码 WAV 文件 /// /// 目前仅支持 WAV 格式 /// 返回解码后的音频数据 pub fn decode_audio(path: &Path) -> Result { info!("解码音频文件:{:?}", path); let reader = hound::WavReader::open(path) .with_context(|| format!("无法打开文件:{:?}", path))?; let spec = reader.spec(); let len = reader.len() as usize; info!( "音频信息:采样率={}, 声道={}, 位深={:?}, 样本数={}", spec.sample_rate, spec.channels, spec.sample_format, len ); let samples: Vec = match spec.sample_format { hound::SampleFormat::Int => { let bits = spec.bits_per_sample as u32; let max_val = (1 << (bits - 1)) as f32; if bits <= 16 { reader .into_samples::() .filter_map(|s| s.ok()) .map(|s| s as f32 / max_val) .collect() } else { reader .into_samples::() .filter_map(|s| s.ok()) .map(|s| s as f32 / max_val) .collect() } } hound::SampleFormat::Float => { reader .into_samples::() .filter_map(|s| s.ok()) .collect() } }; Ok(AudioData::new(samples, spec.sample_rate, spec.channels)) } /// 解码音频并转换为 ASR 所需的格式 /// /// 转换为单声道、16kHz 采样率 pub fn decode_audio_for_asr(path: &Path) -> Result { let mut audio = decode_audio(path)?; // 转换为单声道 let mono_samples = audio.to_mono(); // 重采样到 16kHz (如果需要) if audio.sample_rate != 16000 { audio.samples = resample_to_16k(&mono_samples, audio.sample_rate)?; audio.sample_rate = 16000; audio.channels = 1; } else { audio.samples = mono_samples; audio.channels = 1; } Ok(audio) } /// 重采样到 16kHz fn resample_to_16k(samples: &[f32], original_rate: u32) -> Result> { if original_rate == 16000 { return Ok(samples.to_vec()); } // 简单的线性插值重采样 let ratio = 16000.0 / original_rate as f32; let new_len = (samples.len() as f32 * ratio) as usize; let mut resampled = Vec::with_capacity(new_len); for i in 0..new_len { let src_idx = i as f32 / ratio; let src_idx_floor = src_idx.floor() as usize; let src_idx_ceil = src_idx.ceil() as usize; if src_idx_ceil >= samples.len() { resampled.push(*samples.last().unwrap_or(&0.0)); } else { let frac = src_idx - src_idx_floor as f32; let sample = samples[src_idx_floor] * (1.0 - frac) + samples[src_idx_ceil] * frac; resampled.push(sample); } } Ok(resampled) }