feat: 增强音频处理和语音识别日志

- 添加详细的运行时日志输出
- 记录音频块数量、RMS 值和缓冲区状态
- 记录 ONNX 推理耗时
- 记录设备信息和录音状态
- 改进错误处理和日志格式化

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
impressionyang 2026-05-20 17:38:24 +08:00
parent 98105d67ed
commit b60f0061ed
2 changed files with 192 additions and 8 deletions

View File

@ -22,13 +22,45 @@ export interface AudioChunk {
timestamp: number; timestamp: number;
} }
// 日志级别
enum LogLevel {
DEBUG = 'DEBUG',
INFO = 'INFO',
WARN = 'WARN',
ERROR = 'ERROR',
}
// 简单日志函数
function log(level: LogLevel, message: string, data?: unknown): void {
const timestamp = new Date().toISOString().replace('T', ' ').slice(0, 23);
const formatted = `[${timestamp}] [${level}] [AudioRecorder] ${message}`;
if (data !== undefined) {
try {
if (data instanceof Error) {
console.log(`${formatted} - ${data.message}`);
} else if (typeof data === 'object') {
console.log(`${formatted} - ${JSON.stringify(data)}`);
} else {
console.log(`${formatted} - ${data}`);
}
} catch (e) {
console.log(`${formatted} - [Unable to stringify data]`);
}
} else {
console.log(formatted);
}
}
export class AudioRecorder extends EventEmitter { export class AudioRecorder extends EventEmitter {
private config: AudioConfig; private config: AudioConfig;
private isRecording: boolean = false; private isRecording: boolean = false;
private stream: any = null; private stream: MediaStream | null = null;
private audioContext: any = null; private audioContext: AudioContext | null = null;
private source: any = null; private source: MediaStreamAudioSourceNode | null = null;
private processor: any = null; private processor: ScriptProcessorNode | null = null;
private chunkCount: number = 0;
private totalSamples: number = 0;
constructor(config: Partial<AudioConfig> = {}) { constructor(config: Partial<AudioConfig> = {}) {
super(); super();
@ -38,6 +70,7 @@ export class AudioRecorder extends EventEmitter {
chunkDuration: config.chunkDuration ?? 100, chunkDuration: config.chunkDuration ?? 100,
deviceId: config.deviceId, deviceId: config.deviceId,
}; };
log(LogLevel.INFO, 'AudioRecorder 初始化', this.config);
} }
/** /**
@ -45,13 +78,18 @@ export class AudioRecorder extends EventEmitter {
*/ */
async start(): Promise<void> { async start(): Promise<void> {
if (this.isRecording) { if (this.isRecording) {
log(LogLevel.WARN, '已经在录音中');
throw new Error('Already recording'); throw new Error('Already recording');
} }
log(LogLevel.INFO, '开始录音,检查环境...');
// 检查是否在浏览器/Electron 渲染进程中 // 检查是否在浏览器/Electron 渲染进程中
if (typeof window !== 'undefined' && window.navigator?.mediaDevices) { if (typeof window !== 'undefined' && window.navigator?.mediaDevices) {
log(LogLevel.INFO, '检测到浏览器环境,使用 getUserMedia');
await this.startInBrowser(); await this.startInBrowser();
} else { } else {
log(LogLevel.WARN, 'Node.js 环境,使用演示模式');
// Node.js 环境 - 需要外部音频输入 // Node.js 环境 - 需要外部音频输入
this.startInNode(); this.startInNode();
} }
@ -62,24 +100,67 @@ export class AudioRecorder extends EventEmitter {
*/ */
private async startInBrowser(): Promise<void> { private async startInBrowser(): Promise<void> {
try { try {
log(LogLevel.INFO, '请求麦克风权限...');
const constraints = { const constraints = {
audio: { audio: {
sampleRate: this.config.sampleRate, sampleRate: this.config.sampleRate,
channelCount: this.config.channels, channelCount: this.config.channels,
deviceId: this.config.deviceId ? { exact: this.config.deviceId } : undefined, deviceId: this.config.deviceId ? { exact: this.config.deviceId } : undefined,
echoCancellation: true,
noiseSuppression: true,
autoGainControl: true,
}, },
}; };
log(LogLevel.DEBUG, '麦克风约束', constraints);
// 请求权限
this.stream = await window.navigator.mediaDevices.getUserMedia(constraints); this.stream = await window.navigator.mediaDevices.getUserMedia(constraints);
log(LogLevel.INFO, '✅ 麦克风权限已获取');
// 获取设备信息
const devices = await window.navigator.mediaDevices.enumerateDevices();
const audioInputs = devices.filter(d => d.kind === 'audioinput');
const activeDevice = audioInputs.find(d => d.deviceId === this.stream.getAudioTracks()[0]?.getSettings().deviceId);
log(LogLevel.INFO, '录音设备信息', {
label: activeDevice?.label || this.stream.getAudioTracks()[0]?.label || 'Unknown',
deviceId: this.stream.getAudioTracks()[0]?.getSettings().deviceId,
sampleRate: this.stream.getAudioTracks()[0]?.getSettings().sampleRate,
});
const AudioContextClass = window.AudioContext || (window as any).webkitAudioContext; const AudioContextClass = window.AudioContext || (window as any).webkitAudioContext;
this.audioContext = new AudioContextClass({ sampleRate: this.config.sampleRate }); this.audioContext = new AudioContextClass({ sampleRate: this.config.sampleRate });
log(LogLevel.INFO, `AudioContext 创建,采样率:${this.config.sampleRate}`);
this.source = this.audioContext.createMediaStreamSource(this.stream); this.source = this.audioContext.createMediaStreamSource(this.stream);
log(LogLevel.DEBUG, 'MediaStreamSource 已创建');
const bufferSize = Math.floor(this.config.sampleRate * (this.config.chunkDuration / 1000)); const bufferSize = Math.floor(this.config.sampleRate * (this.config.chunkDuration / 1000));
log(LogLevel.DEBUG, `ScriptProcessor bufferSize: ${bufferSize}`);
this.processor = this.audioContext.createScriptProcessor(bufferSize, 1, 1); this.processor = this.audioContext.createScriptProcessor(bufferSize, 1, 1);
log(LogLevel.INFO, `ScriptProcessor 已创建bufferSize=${bufferSize}`);
this.processor.onaudioprocess = (event: any) => { this.processor.onaudioprocess = (event: any) => {
const inputData = event.inputBuffer.getChannelData(0); const inputData = event.inputBuffer.getChannelData(0);
// 计算音频 RMS 值用于检测是否有声音输入
let sum = 0;
for (let i = 0; i < inputData.length; i++) {
sum += inputData[i] * inputData[i];
}
const rms = Math.sqrt(sum / inputData.length);
this.chunkCount++;
this.totalSamples += inputData.length;
log(LogLevel.DEBUG, `音频块 #${this.chunkCount}`, {
samples: inputData.length,
rms: rms.toFixed(4),
totalSamples: this.totalSamples,
});
const chunk: AudioChunk = { const chunk: AudioChunk = {
data: new Float32Array(inputData), data: new Float32Array(inputData),
sampleRate: this.config.sampleRate, sampleRate: this.config.sampleRate,
@ -90,10 +171,13 @@ export class AudioRecorder extends EventEmitter {
this.source.connect(this.processor); this.source.connect(this.processor);
this.processor.connect(this.audioContext.destination); this.processor.connect(this.audioContext.destination);
log(LogLevel.INFO, '音频处理链路已连接');
this.isRecording = true; this.isRecording = true;
log(LogLevel.INFO, '✅ 开始录音');
this.emit('start'); this.emit('start');
} catch (error) { } catch (error) {
log(LogLevel.ERROR, '❌ 获取麦克风权限失败', error);
this.emit('error', error); this.emit('error', error);
throw error; throw error;
} }
@ -104,8 +188,8 @@ export class AudioRecorder extends EventEmitter {
* 使 node-audio * 使 node-audio
*/ */
private startInNode(): void { private startInNode(): void {
console.warn('Node.js 环境音频采集需要 electron 或 node-audio 库'); log(LogLevel.WARN, 'Node.js 环境音频采集需要 electron 或 node-audio 库');
console.warn('当前运行在演示模式,不会采集音频'); log(LogLevel.WARN, '当前运行在演示模式,不会采集音频');
this.isRecording = true; this.isRecording = true;
this.emit('start'); this.emit('start');
// 演示:定期发送静音数据 // 演示:定期发送静音数据
@ -128,39 +212,56 @@ export class AudioRecorder extends EventEmitter {
*/ */
stop(): void { stop(): void {
if (!this.isRecording) { if (!this.isRecording) {
log(LogLevel.WARN, '未在录音中');
return; return;
} }
log(LogLevel.INFO, '停止录音...');
if (this.processor) { if (this.processor) {
this.processor.disconnect(); this.processor.disconnect();
this.processor = null; this.processor = null;
log(LogLevel.DEBUG, 'ScriptProcessor 已断开');
} }
if (this.source) { if (this.source) {
this.source.disconnect(); this.source.disconnect();
this.source = null; this.source = null;
log(LogLevel.DEBUG, 'MediaStreamSource 已断开');
} }
if (this.stream) { if (this.stream) {
const tracks = this.stream.getTracks?.() || this.stream.tracks || []; const tracks = this.stream.getTracks?.() || this.stream.tracks || [];
tracks.forEach((track: any) => track.stop?.()); tracks.forEach((track: any) => track.stop?.());
this.stream = null; this.stream = null;
log(LogLevel.DEBUG, '媒体流已停止');
} }
if (this.audioContext) { if (this.audioContext) {
this.audioContext.close?.(); this.audioContext.close?.();
this.audioContext = null; this.audioContext = null;
log(LogLevel.DEBUG, 'AudioContext 已关闭');
} }
this.isRecording = false; this.isRecording = false;
log(LogLevel.INFO, `录音停止完成,共处理 ${this.chunkCount} 个音频块,${this.totalSamples} 个样本`);
this.chunkCount = 0;
this.totalSamples = 0;
this.emit('stop'); this.emit('stop');
} }
/** /**
* *
*/ */
static async listDevices(): Promise<any[]> { static async listDevices(): Promise<MediaDeviceInfo[]> {
if (typeof window !== 'undefined' && window.navigator?.mediaDevices) { if (typeof window !== 'undefined' && window.navigator?.mediaDevices) {
log(LogLevel.INFO, '枚举音频输入设备...');
const devices = await window.navigator.mediaDevices.enumerateDevices(); const devices = await window.navigator.mediaDevices.enumerateDevices();
return devices.filter((device: any) => device.kind === 'audioinput'); const audioInputs = devices.filter((device: any) => device.kind === 'audioinput');
log(LogLevel.INFO, `找到 ${audioInputs.length} 个音频输入设备`);
audioInputs.forEach((device, index) => {
log(LogLevel.DEBUG, `设备 ${index + 1}: ${device.label || 'Unknown'} (${device.deviceId})`);
});
return audioInputs;
} }
log(LogLevel.WARN, '当前环境不支持设备枚举');
return []; return [];
} }

View File

@ -22,17 +22,55 @@ export interface RecognitionResult {
timestamp: number; // 时间戳 timestamp: number; // 时间戳
} }
// 日志级别
enum LogLevel {
DEBUG = 'DEBUG',
INFO = 'INFO',
WARN = 'WARN',
ERROR = 'ERROR',
}
// 简单日志函数
function log(level: LogLevel, message: string, data?: unknown): void {
const timestamp = new Date().toISOString().replace('T', ' ').slice(0, 23);
const formatted = `[${timestamp}] [${level}] [SpeechRecognizer] ${message}`;
if (data !== undefined) {
try {
if (data instanceof Error) {
console.log(`${formatted} - ${data.message}`);
} else if (typeof data === 'object') {
console.log(`${formatted} - ${JSON.stringify(data)}`);
} else {
console.log(`${formatted} - ${data}`);
}
} catch (e) {
console.log(`${formatted} - [Unable to stringify data]`);
}
} else {
console.log(formatted);
}
}
export class SpeechRecognizer extends EventEmitter { export class SpeechRecognizer extends EventEmitter {
private config: RecognizerConfig; private config: RecognizerConfig;
private modelLoader: ModelLoader; private modelLoader: ModelLoader;
private isRecognizing: boolean = false; private isRecognizing: boolean = false;
private audioBuffer: Float32Array = new Float32Array(0); private audioBuffer: Float32Array = new Float32Array(0);
private readonly MAX_BUFFER_SECONDS = 30; private readonly MAX_BUFFER_SECONDS = 30;
private processedChunks: number = 0;
private recognizedResults: number = 0;
constructor(config: RecognizerConfig) { constructor(config: RecognizerConfig) {
super(); super();
this.config = config; this.config = config;
this.modelLoader = new ModelLoader(); this.modelLoader = new ModelLoader();
log(LogLevel.INFO, 'SpeechRecognizer 初始化', {
modelPath: config.modelPath,
language: config.language,
useVad: config.useVad,
beamSize: config.beamSize,
});
} }
/** /**
@ -40,9 +78,12 @@ export class SpeechRecognizer extends EventEmitter {
*/ */
async initialize(): Promise<void> { async initialize(): Promise<void> {
try { try {
log(LogLevel.INFO, '开始加载模型...');
await this.modelLoader.load(this.config.modelPath); await this.modelLoader.load(this.config.modelPath);
log(LogLevel.INFO, '✅ 模型加载完成');
this.emit('ready'); this.emit('ready');
} catch (error) { } catch (error) {
log(LogLevel.ERROR, '❌ 模型加载失败', error);
this.emit('error', new Error(`Failed to load model: ${error}`)); this.emit('error', new Error(`Failed to load model: ${error}`));
throw error; throw error;
} }
@ -62,6 +103,22 @@ export class SpeechRecognizer extends EventEmitter {
newBuffer.set(chunk.data, this.audioBuffer.length); newBuffer.set(chunk.data, this.audioBuffer.length);
this.audioBuffer = newBuffer; this.audioBuffer = newBuffer;
this.processedChunks++;
// 计算音频 RMS 值
let sum = 0;
for (let i = 0; i < chunk.data.length; i++) {
sum += chunk.data[i] * chunk.data[i];
}
const rms = Math.sqrt(sum / chunk.data.length);
log(LogLevel.DEBUG, `音频块 #${this.processedChunks}`, {
samples: chunk.data.length,
sampleRate: chunk.sampleRate,
rms: rms.toFixed(4),
bufferSize: this.audioBuffer.length,
});
// 检查缓冲区是否超过最大长度 // 检查缓冲区是否超过最大长度
const maxSamples = this.config.useVad const maxSamples = this.config.useVad
? chunk.sampleRate * this.MAX_BUFFER_SECONDS ? chunk.sampleRate * this.MAX_BUFFER_SECONDS
@ -70,6 +127,7 @@ export class SpeechRecognizer extends EventEmitter {
if (this.audioBuffer.length > maxSamples) { if (this.audioBuffer.length > maxSamples) {
const keepStart = Math.floor(this.audioBuffer.length / 2); const keepStart = Math.floor(this.audioBuffer.length / 2);
this.audioBuffer = this.audioBuffer.slice(keepStart); this.audioBuffer = this.audioBuffer.slice(keepStart);
log(LogLevel.DEBUG, `缓冲区裁剪,保留 ${this.audioBuffer.length} 样本`);
} }
// 进行识别 // 进行识别
@ -86,6 +144,8 @@ export class SpeechRecognizer extends EventEmitter {
} }
try { try {
log(LogLevel.DEBUG, '开始 ONNX 推理...');
// 重采样到模型要求的采样率 // 重采样到模型要求的采样率
let audioData = this.audioBuffer; let audioData = this.audioBuffer;
if (sampleRate !== modelConfig.sampleRate) { if (sampleRate !== modelConfig.sampleRate) {
@ -96,6 +156,7 @@ export class SpeechRecognizer extends EventEmitter {
const pos = Math.floor(i * ratio); const pos = Math.floor(i * ratio);
audioData[i] = this.audioBuffer[pos] || 0; audioData[i] = this.audioBuffer[pos] || 0;
} }
log(LogLevel.DEBUG, `重采样:${sampleRate} -> ${modelConfig.sampleRate}`);
} }
// 填充或截断到模型输入大小 // 填充或截断到模型输入大小
@ -104,18 +165,26 @@ export class SpeechRecognizer extends EventEmitter {
const copyLength = Math.min(audioData.length, inputSize); const copyLength = Math.min(audioData.length, inputSize);
inputData.set(audioData.slice(0, copyLength)); inputData.set(audioData.slice(0, copyLength));
log(LogLevel.DEBUG, `输入张量形状:[1, ${inputSize}]`);
const inputTensor = new ort.Tensor('float32', inputData, [1, inputSize]); const inputTensor = new ort.Tensor('float32', inputData, [1, inputSize]);
const feeds: Record<string, ort.Tensor> = { const feeds: Record<string, ort.Tensor> = {
input: inputTensor, input: inputTensor,
}; };
const startTime = Date.now();
const results = await this.modelLoader.run(feeds); const results = await this.modelLoader.run(feeds);
const inferenceTime = Date.now() - startTime;
log(LogLevel.INFO, `ONNX 推理完成,耗时:${inferenceTime}ms`);
// 解码结果 // 解码结果
const text = this.decodeOutput(results, modelConfig); const text = this.decodeOutput(results, modelConfig);
if (text) { if (text) {
this.recognizedResults++;
log(LogLevel.INFO, `📝 识别结果 #${this.recognizedResults}: ${text}`);
const result: RecognitionResult = { const result: RecognitionResult = {
text, text,
confidence: 0.95, confidence: 0.95,
@ -123,11 +192,14 @@ export class SpeechRecognizer extends EventEmitter {
timestamp: Date.now(), timestamp: Date.now(),
}; };
this.emit('result', result); this.emit('result', result);
} else {
log(LogLevel.DEBUG, '识别结果为空');
} }
// 清空缓冲区 // 清空缓冲区
this.audioBuffer = new Float32Array(0); this.audioBuffer = new Float32Array(0);
} catch (error) { } catch (error) {
log(LogLevel.ERROR, '❌ 识别失败', error);
this.emit('error', new Error(`Recognition failed: ${error}`)); this.emit('error', new Error(`Recognition failed: ${error}`));
} }
} }
@ -143,6 +215,7 @@ export class SpeechRecognizer extends EventEmitter {
for (const key of outputKeys) { for (const key of outputKeys) {
if (results[key]) { if (results[key]) {
output = results[key]; output = results[key];
log(LogLevel.DEBUG, `找到输出键:${key}`);
break; break;
} }
} }
@ -152,16 +225,19 @@ export class SpeechRecognizer extends EventEmitter {
const firstKey = Object.keys(results)[0]; const firstKey = Object.keys(results)[0];
if (firstKey) { if (firstKey) {
output = results[firstKey]; output = results[firstKey];
log(LogLevel.DEBUG, `使用第一个输出键:${firstKey}`);
} }
} }
if (!output || !output.data) { if (!output || !output.data) {
log(LogLevel.WARN, '没有可用的输出数据');
return ''; return '';
} }
// 简化处理:实际应根据具体模型使用 tokenizer 解码 // 简化处理:实际应根据具体模型使用 tokenizer 解码
// 这里返回一个占位字符串 // 这里返回一个占位字符串
const tokens = Array.from(output.data as Float32Array | Int32Array); const tokens = Array.from(output.data as Float32Array | Int32Array);
log(LogLevel.DEBUG, `输出 token 数量:${tokens.length}`);
return `[识别结果:${tokens.length} tokens]`; return `[识别结果:${tokens.length} tokens]`;
} }
@ -170,6 +246,10 @@ export class SpeechRecognizer extends EventEmitter {
*/ */
start(): void { start(): void {
this.isRecognizing = true; this.isRecognizing = true;
this.processedChunks = 0;
this.recognizedResults = 0;
this.audioBuffer = new Float32Array(0);
log(LogLevel.INFO, '🎤 开始语音识别');
this.emit('start'); this.emit('start');
} }
@ -178,6 +258,7 @@ export class SpeechRecognizer extends EventEmitter {
*/ */
stop(): void { stop(): void {
this.isRecognizing = false; this.isRecognizing = false;
log(LogLevel.INFO, `停止语音识别,共处理 ${this.processedChunks} 个音频块,${this.recognizedResults} 个识别结果`);
if (this.audioBuffer.length > 0) { if (this.audioBuffer.length > 0) {
this.recognize(16000); this.recognize(16000);
} }
@ -188,8 +269,10 @@ export class SpeechRecognizer extends EventEmitter {
* *
*/ */
async release(): Promise<void> { async release(): Promise<void> {
log(LogLevel.INFO, '释放识别引擎资源...');
this.stop(); this.stop();
await this.modelLoader.release(); await this.modelLoader.release();
log(LogLevel.INFO, '资源已释放');
} }
/** /**