diff --git a/src/core/audio-recorder.ts b/src/core/audio-recorder.ts index 67dc064..b721d0e 100644 --- a/src/core/audio-recorder.ts +++ b/src/core/audio-recorder.ts @@ -22,13 +22,45 @@ export interface AudioChunk { timestamp: number; } +// 日志级别 +enum LogLevel { + DEBUG = 'DEBUG', + INFO = 'INFO', + WARN = 'WARN', + ERROR = 'ERROR', +} + +// 简单日志函数 +function log(level: LogLevel, message: string, data?: unknown): void { + const timestamp = new Date().toISOString().replace('T', ' ').slice(0, 23); + const formatted = `[${timestamp}] [${level}] [AudioRecorder] ${message}`; + + if (data !== undefined) { + try { + if (data instanceof Error) { + console.log(`${formatted} - ${data.message}`); + } else if (typeof data === 'object') { + console.log(`${formatted} - ${JSON.stringify(data)}`); + } else { + console.log(`${formatted} - ${data}`); + } + } catch (e) { + console.log(`${formatted} - [Unable to stringify data]`); + } + } else { + console.log(formatted); + } +} + export class AudioRecorder extends EventEmitter { private config: AudioConfig; private isRecording: boolean = false; - private stream: any = null; - private audioContext: any = null; - private source: any = null; - private processor: any = null; + private stream: MediaStream | null = null; + private audioContext: AudioContext | null = null; + private source: MediaStreamAudioSourceNode | null = null; + private processor: ScriptProcessorNode | null = null; + private chunkCount: number = 0; + private totalSamples: number = 0; constructor(config: Partial = {}) { super(); @@ -38,6 +70,7 @@ export class AudioRecorder extends EventEmitter { chunkDuration: config.chunkDuration ?? 100, deviceId: config.deviceId, }; + log(LogLevel.INFO, 'AudioRecorder 初始化', this.config); } /** @@ -45,13 +78,18 @@ export class AudioRecorder extends EventEmitter { */ async start(): Promise { if (this.isRecording) { + log(LogLevel.WARN, '已经在录音中'); throw new Error('Already recording'); } + log(LogLevel.INFO, '开始录音,检查环境...'); + // 检查是否在浏览器/Electron 渲染进程中 if (typeof window !== 'undefined' && window.navigator?.mediaDevices) { + log(LogLevel.INFO, '检测到浏览器环境,使用 getUserMedia'); await this.startInBrowser(); } else { + log(LogLevel.WARN, 'Node.js 环境,使用演示模式'); // Node.js 环境 - 需要外部音频输入 this.startInNode(); } @@ -62,24 +100,67 @@ export class AudioRecorder extends EventEmitter { */ private async startInBrowser(): Promise { try { + log(LogLevel.INFO, '请求麦克风权限...'); + const constraints = { audio: { sampleRate: this.config.sampleRate, channelCount: this.config.channels, deviceId: this.config.deviceId ? { exact: this.config.deviceId } : undefined, + echoCancellation: true, + noiseSuppression: true, + autoGainControl: true, }, }; + log(LogLevel.DEBUG, '麦克风约束', constraints); + + // 请求权限 this.stream = await window.navigator.mediaDevices.getUserMedia(constraints); + log(LogLevel.INFO, '✅ 麦克风权限已获取'); + + // 获取设备信息 + const devices = await window.navigator.mediaDevices.enumerateDevices(); + const audioInputs = devices.filter(d => d.kind === 'audioinput'); + const activeDevice = audioInputs.find(d => d.deviceId === this.stream.getAudioTracks()[0]?.getSettings().deviceId); + log(LogLevel.INFO, '录音设备信息', { + label: activeDevice?.label || this.stream.getAudioTracks()[0]?.label || 'Unknown', + deviceId: this.stream.getAudioTracks()[0]?.getSettings().deviceId, + sampleRate: this.stream.getAudioTracks()[0]?.getSettings().sampleRate, + }); + const AudioContextClass = window.AudioContext || (window as any).webkitAudioContext; this.audioContext = new AudioContextClass({ sampleRate: this.config.sampleRate }); + log(LogLevel.INFO, `AudioContext 创建,采样率:${this.config.sampleRate}`); + this.source = this.audioContext.createMediaStreamSource(this.stream); + log(LogLevel.DEBUG, 'MediaStreamSource 已创建'); const bufferSize = Math.floor(this.config.sampleRate * (this.config.chunkDuration / 1000)); + log(LogLevel.DEBUG, `ScriptProcessor bufferSize: ${bufferSize}`); + this.processor = this.audioContext.createScriptProcessor(bufferSize, 1, 1); + log(LogLevel.INFO, `ScriptProcessor 已创建,bufferSize=${bufferSize}`); this.processor.onaudioprocess = (event: any) => { const inputData = event.inputBuffer.getChannelData(0); + + // 计算音频 RMS 值用于检测是否有声音输入 + let sum = 0; + for (let i = 0; i < inputData.length; i++) { + sum += inputData[i] * inputData[i]; + } + const rms = Math.sqrt(sum / inputData.length); + + this.chunkCount++; + this.totalSamples += inputData.length; + + log(LogLevel.DEBUG, `音频块 #${this.chunkCount}`, { + samples: inputData.length, + rms: rms.toFixed(4), + totalSamples: this.totalSamples, + }); + const chunk: AudioChunk = { data: new Float32Array(inputData), sampleRate: this.config.sampleRate, @@ -90,10 +171,13 @@ export class AudioRecorder extends EventEmitter { this.source.connect(this.processor); this.processor.connect(this.audioContext.destination); + log(LogLevel.INFO, '音频处理链路已连接'); this.isRecording = true; + log(LogLevel.INFO, '✅ 开始录音'); this.emit('start'); } catch (error) { + log(LogLevel.ERROR, '❌ 获取麦克风权限失败', error); this.emit('error', error); throw error; } @@ -104,8 +188,8 @@ export class AudioRecorder extends EventEmitter { * 实际使用需要 node-audio 等库 */ private startInNode(): void { - console.warn('Node.js 环境音频采集需要 electron 或 node-audio 库'); - console.warn('当前运行在演示模式,不会采集音频'); + log(LogLevel.WARN, 'Node.js 环境音频采集需要 electron 或 node-audio 库'); + log(LogLevel.WARN, '当前运行在演示模式,不会采集音频'); this.isRecording = true; this.emit('start'); // 演示:定期发送静音数据 @@ -128,39 +212,56 @@ export class AudioRecorder extends EventEmitter { */ stop(): void { if (!this.isRecording) { + log(LogLevel.WARN, '未在录音中'); return; } + log(LogLevel.INFO, '停止录音...'); + if (this.processor) { this.processor.disconnect(); this.processor = null; + log(LogLevel.DEBUG, 'ScriptProcessor 已断开'); } if (this.source) { this.source.disconnect(); this.source = null; + log(LogLevel.DEBUG, 'MediaStreamSource 已断开'); } if (this.stream) { const tracks = this.stream.getTracks?.() || this.stream.tracks || []; tracks.forEach((track: any) => track.stop?.()); this.stream = null; + log(LogLevel.DEBUG, '媒体流已停止'); } if (this.audioContext) { this.audioContext.close?.(); this.audioContext = null; + log(LogLevel.DEBUG, 'AudioContext 已关闭'); } this.isRecording = false; + log(LogLevel.INFO, `录音停止完成,共处理 ${this.chunkCount} 个音频块,${this.totalSamples} 个样本`); + this.chunkCount = 0; + this.totalSamples = 0; this.emit('stop'); } /** * 获取可用音频设备列表(仅浏览器环境) */ - static async listDevices(): Promise { + static async listDevices(): Promise { if (typeof window !== 'undefined' && window.navigator?.mediaDevices) { + log(LogLevel.INFO, '枚举音频输入设备...'); const devices = await window.navigator.mediaDevices.enumerateDevices(); - return devices.filter((device: any) => device.kind === 'audioinput'); + const audioInputs = devices.filter((device: any) => device.kind === 'audioinput'); + log(LogLevel.INFO, `找到 ${audioInputs.length} 个音频输入设备`); + audioInputs.forEach((device, index) => { + log(LogLevel.DEBUG, `设备 ${index + 1}: ${device.label || 'Unknown'} (${device.deviceId})`); + }); + return audioInputs; } + log(LogLevel.WARN, '当前环境不支持设备枚举'); return []; } diff --git a/src/core/speech-recognizer.ts b/src/core/speech-recognizer.ts index 2c31835..680b256 100644 --- a/src/core/speech-recognizer.ts +++ b/src/core/speech-recognizer.ts @@ -22,17 +22,55 @@ export interface RecognitionResult { timestamp: number; // 时间戳 } +// 日志级别 +enum LogLevel { + DEBUG = 'DEBUG', + INFO = 'INFO', + WARN = 'WARN', + ERROR = 'ERROR', +} + +// 简单日志函数 +function log(level: LogLevel, message: string, data?: unknown): void { + const timestamp = new Date().toISOString().replace('T', ' ').slice(0, 23); + const formatted = `[${timestamp}] [${level}] [SpeechRecognizer] ${message}`; + + if (data !== undefined) { + try { + if (data instanceof Error) { + console.log(`${formatted} - ${data.message}`); + } else if (typeof data === 'object') { + console.log(`${formatted} - ${JSON.stringify(data)}`); + } else { + console.log(`${formatted} - ${data}`); + } + } catch (e) { + console.log(`${formatted} - [Unable to stringify data]`); + } + } else { + console.log(formatted); + } +} + export class SpeechRecognizer extends EventEmitter { private config: RecognizerConfig; private modelLoader: ModelLoader; private isRecognizing: boolean = false; private audioBuffer: Float32Array = new Float32Array(0); private readonly MAX_BUFFER_SECONDS = 30; + private processedChunks: number = 0; + private recognizedResults: number = 0; constructor(config: RecognizerConfig) { super(); this.config = config; this.modelLoader = new ModelLoader(); + log(LogLevel.INFO, 'SpeechRecognizer 初始化', { + modelPath: config.modelPath, + language: config.language, + useVad: config.useVad, + beamSize: config.beamSize, + }); } /** @@ -40,9 +78,12 @@ export class SpeechRecognizer extends EventEmitter { */ async initialize(): Promise { try { + log(LogLevel.INFO, '开始加载模型...'); await this.modelLoader.load(this.config.modelPath); + log(LogLevel.INFO, '✅ 模型加载完成'); this.emit('ready'); } catch (error) { + log(LogLevel.ERROR, '❌ 模型加载失败', error); this.emit('error', new Error(`Failed to load model: ${error}`)); throw error; } @@ -62,6 +103,22 @@ export class SpeechRecognizer extends EventEmitter { newBuffer.set(chunk.data, this.audioBuffer.length); this.audioBuffer = newBuffer; + this.processedChunks++; + + // 计算音频 RMS 值 + let sum = 0; + for (let i = 0; i < chunk.data.length; i++) { + sum += chunk.data[i] * chunk.data[i]; + } + const rms = Math.sqrt(sum / chunk.data.length); + + log(LogLevel.DEBUG, `音频块 #${this.processedChunks}`, { + samples: chunk.data.length, + sampleRate: chunk.sampleRate, + rms: rms.toFixed(4), + bufferSize: this.audioBuffer.length, + }); + // 检查缓冲区是否超过最大长度 const maxSamples = this.config.useVad ? chunk.sampleRate * this.MAX_BUFFER_SECONDS @@ -70,6 +127,7 @@ export class SpeechRecognizer extends EventEmitter { if (this.audioBuffer.length > maxSamples) { const keepStart = Math.floor(this.audioBuffer.length / 2); this.audioBuffer = this.audioBuffer.slice(keepStart); + log(LogLevel.DEBUG, `缓冲区裁剪,保留 ${this.audioBuffer.length} 样本`); } // 进行识别 @@ -86,6 +144,8 @@ export class SpeechRecognizer extends EventEmitter { } try { + log(LogLevel.DEBUG, '开始 ONNX 推理...'); + // 重采样到模型要求的采样率 let audioData = this.audioBuffer; if (sampleRate !== modelConfig.sampleRate) { @@ -96,6 +156,7 @@ export class SpeechRecognizer extends EventEmitter { const pos = Math.floor(i * ratio); audioData[i] = this.audioBuffer[pos] || 0; } + log(LogLevel.DEBUG, `重采样:${sampleRate} -> ${modelConfig.sampleRate}`); } // 填充或截断到模型输入大小 @@ -104,18 +165,26 @@ export class SpeechRecognizer extends EventEmitter { const copyLength = Math.min(audioData.length, inputSize); inputData.set(audioData.slice(0, copyLength)); + log(LogLevel.DEBUG, `输入张量形状:[1, ${inputSize}]`); + const inputTensor = new ort.Tensor('float32', inputData, [1, inputSize]); const feeds: Record = { input: inputTensor, }; + const startTime = Date.now(); const results = await this.modelLoader.run(feeds); + const inferenceTime = Date.now() - startTime; + + log(LogLevel.INFO, `ONNX 推理完成,耗时:${inferenceTime}ms`); // 解码结果 const text = this.decodeOutput(results, modelConfig); if (text) { + this.recognizedResults++; + log(LogLevel.INFO, `📝 识别结果 #${this.recognizedResults}: ${text}`); const result: RecognitionResult = { text, confidence: 0.95, @@ -123,11 +192,14 @@ export class SpeechRecognizer extends EventEmitter { timestamp: Date.now(), }; this.emit('result', result); + } else { + log(LogLevel.DEBUG, '识别结果为空'); } // 清空缓冲区 this.audioBuffer = new Float32Array(0); } catch (error) { + log(LogLevel.ERROR, '❌ 识别失败', error); this.emit('error', new Error(`Recognition failed: ${error}`)); } } @@ -143,6 +215,7 @@ export class SpeechRecognizer extends EventEmitter { for (const key of outputKeys) { if (results[key]) { output = results[key]; + log(LogLevel.DEBUG, `找到输出键:${key}`); break; } } @@ -152,16 +225,19 @@ export class SpeechRecognizer extends EventEmitter { const firstKey = Object.keys(results)[0]; if (firstKey) { output = results[firstKey]; + log(LogLevel.DEBUG, `使用第一个输出键:${firstKey}`); } } if (!output || !output.data) { + log(LogLevel.WARN, '没有可用的输出数据'); return ''; } // 简化处理:实际应根据具体模型使用 tokenizer 解码 // 这里返回一个占位字符串 const tokens = Array.from(output.data as Float32Array | Int32Array); + log(LogLevel.DEBUG, `输出 token 数量:${tokens.length}`); return `[识别结果:${tokens.length} tokens]`; } @@ -170,6 +246,10 @@ export class SpeechRecognizer extends EventEmitter { */ start(): void { this.isRecognizing = true; + this.processedChunks = 0; + this.recognizedResults = 0; + this.audioBuffer = new Float32Array(0); + log(LogLevel.INFO, '🎤 开始语音识别'); this.emit('start'); } @@ -178,6 +258,7 @@ export class SpeechRecognizer extends EventEmitter { */ stop(): void { this.isRecognizing = false; + log(LogLevel.INFO, `停止语音识别,共处理 ${this.processedChunks} 个音频块,${this.recognizedResults} 个识别结果`); if (this.audioBuffer.length > 0) { this.recognize(16000); } @@ -188,8 +269,10 @@ export class SpeechRecognizer extends EventEmitter { * 卸载模型释放资源 */ async release(): Promise { + log(LogLevel.INFO, '释放识别引擎资源...'); this.stop(); await this.modelLoader.release(); + log(LogLevel.INFO, '资源已释放'); } /**