feat: 增强音频处理和语音识别日志
- 添加详细的运行时日志输出 - 记录音频块数量、RMS 值和缓冲区状态 - 记录 ONNX 推理耗时 - 记录设备信息和录音状态 - 改进错误处理和日志格式化 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
98105d67ed
commit
b60f0061ed
@ -22,13 +22,45 @@ export interface AudioChunk {
|
|||||||
timestamp: number;
|
timestamp: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 日志级别
|
||||||
|
enum LogLevel {
|
||||||
|
DEBUG = 'DEBUG',
|
||||||
|
INFO = 'INFO',
|
||||||
|
WARN = 'WARN',
|
||||||
|
ERROR = 'ERROR',
|
||||||
|
}
|
||||||
|
|
||||||
|
// 简单日志函数
|
||||||
|
function log(level: LogLevel, message: string, data?: unknown): void {
|
||||||
|
const timestamp = new Date().toISOString().replace('T', ' ').slice(0, 23);
|
||||||
|
const formatted = `[${timestamp}] [${level}] [AudioRecorder] ${message}`;
|
||||||
|
|
||||||
|
if (data !== undefined) {
|
||||||
|
try {
|
||||||
|
if (data instanceof Error) {
|
||||||
|
console.log(`${formatted} - ${data.message}`);
|
||||||
|
} else if (typeof data === 'object') {
|
||||||
|
console.log(`${formatted} - ${JSON.stringify(data)}`);
|
||||||
|
} else {
|
||||||
|
console.log(`${formatted} - ${data}`);
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.log(`${formatted} - [Unable to stringify data]`);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
console.log(formatted);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
export class AudioRecorder extends EventEmitter {
|
export class AudioRecorder extends EventEmitter {
|
||||||
private config: AudioConfig;
|
private config: AudioConfig;
|
||||||
private isRecording: boolean = false;
|
private isRecording: boolean = false;
|
||||||
private stream: any = null;
|
private stream: MediaStream | null = null;
|
||||||
private audioContext: any = null;
|
private audioContext: AudioContext | null = null;
|
||||||
private source: any = null;
|
private source: MediaStreamAudioSourceNode | null = null;
|
||||||
private processor: any = null;
|
private processor: ScriptProcessorNode | null = null;
|
||||||
|
private chunkCount: number = 0;
|
||||||
|
private totalSamples: number = 0;
|
||||||
|
|
||||||
constructor(config: Partial<AudioConfig> = {}) {
|
constructor(config: Partial<AudioConfig> = {}) {
|
||||||
super();
|
super();
|
||||||
@ -38,6 +70,7 @@ export class AudioRecorder extends EventEmitter {
|
|||||||
chunkDuration: config.chunkDuration ?? 100,
|
chunkDuration: config.chunkDuration ?? 100,
|
||||||
deviceId: config.deviceId,
|
deviceId: config.deviceId,
|
||||||
};
|
};
|
||||||
|
log(LogLevel.INFO, 'AudioRecorder 初始化', this.config);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -45,13 +78,18 @@ export class AudioRecorder extends EventEmitter {
|
|||||||
*/
|
*/
|
||||||
async start(): Promise<void> {
|
async start(): Promise<void> {
|
||||||
if (this.isRecording) {
|
if (this.isRecording) {
|
||||||
|
log(LogLevel.WARN, '已经在录音中');
|
||||||
throw new Error('Already recording');
|
throw new Error('Already recording');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log(LogLevel.INFO, '开始录音,检查环境...');
|
||||||
|
|
||||||
// 检查是否在浏览器/Electron 渲染进程中
|
// 检查是否在浏览器/Electron 渲染进程中
|
||||||
if (typeof window !== 'undefined' && window.navigator?.mediaDevices) {
|
if (typeof window !== 'undefined' && window.navigator?.mediaDevices) {
|
||||||
|
log(LogLevel.INFO, '检测到浏览器环境,使用 getUserMedia');
|
||||||
await this.startInBrowser();
|
await this.startInBrowser();
|
||||||
} else {
|
} else {
|
||||||
|
log(LogLevel.WARN, 'Node.js 环境,使用演示模式');
|
||||||
// Node.js 环境 - 需要外部音频输入
|
// Node.js 环境 - 需要外部音频输入
|
||||||
this.startInNode();
|
this.startInNode();
|
||||||
}
|
}
|
||||||
@ -62,24 +100,67 @@ export class AudioRecorder extends EventEmitter {
|
|||||||
*/
|
*/
|
||||||
private async startInBrowser(): Promise<void> {
|
private async startInBrowser(): Promise<void> {
|
||||||
try {
|
try {
|
||||||
|
log(LogLevel.INFO, '请求麦克风权限...');
|
||||||
|
|
||||||
const constraints = {
|
const constraints = {
|
||||||
audio: {
|
audio: {
|
||||||
sampleRate: this.config.sampleRate,
|
sampleRate: this.config.sampleRate,
|
||||||
channelCount: this.config.channels,
|
channelCount: this.config.channels,
|
||||||
deviceId: this.config.deviceId ? { exact: this.config.deviceId } : undefined,
|
deviceId: this.config.deviceId ? { exact: this.config.deviceId } : undefined,
|
||||||
|
echoCancellation: true,
|
||||||
|
noiseSuppression: true,
|
||||||
|
autoGainControl: true,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
log(LogLevel.DEBUG, '麦克风约束', constraints);
|
||||||
|
|
||||||
|
// 请求权限
|
||||||
this.stream = await window.navigator.mediaDevices.getUserMedia(constraints);
|
this.stream = await window.navigator.mediaDevices.getUserMedia(constraints);
|
||||||
|
log(LogLevel.INFO, '✅ 麦克风权限已获取');
|
||||||
|
|
||||||
|
// 获取设备信息
|
||||||
|
const devices = await window.navigator.mediaDevices.enumerateDevices();
|
||||||
|
const audioInputs = devices.filter(d => d.kind === 'audioinput');
|
||||||
|
const activeDevice = audioInputs.find(d => d.deviceId === this.stream.getAudioTracks()[0]?.getSettings().deviceId);
|
||||||
|
log(LogLevel.INFO, '录音设备信息', {
|
||||||
|
label: activeDevice?.label || this.stream.getAudioTracks()[0]?.label || 'Unknown',
|
||||||
|
deviceId: this.stream.getAudioTracks()[0]?.getSettings().deviceId,
|
||||||
|
sampleRate: this.stream.getAudioTracks()[0]?.getSettings().sampleRate,
|
||||||
|
});
|
||||||
|
|
||||||
const AudioContextClass = window.AudioContext || (window as any).webkitAudioContext;
|
const AudioContextClass = window.AudioContext || (window as any).webkitAudioContext;
|
||||||
this.audioContext = new AudioContextClass({ sampleRate: this.config.sampleRate });
|
this.audioContext = new AudioContextClass({ sampleRate: this.config.sampleRate });
|
||||||
|
log(LogLevel.INFO, `AudioContext 创建,采样率:${this.config.sampleRate}`);
|
||||||
|
|
||||||
this.source = this.audioContext.createMediaStreamSource(this.stream);
|
this.source = this.audioContext.createMediaStreamSource(this.stream);
|
||||||
|
log(LogLevel.DEBUG, 'MediaStreamSource 已创建');
|
||||||
|
|
||||||
const bufferSize = Math.floor(this.config.sampleRate * (this.config.chunkDuration / 1000));
|
const bufferSize = Math.floor(this.config.sampleRate * (this.config.chunkDuration / 1000));
|
||||||
|
log(LogLevel.DEBUG, `ScriptProcessor bufferSize: ${bufferSize}`);
|
||||||
|
|
||||||
this.processor = this.audioContext.createScriptProcessor(bufferSize, 1, 1);
|
this.processor = this.audioContext.createScriptProcessor(bufferSize, 1, 1);
|
||||||
|
log(LogLevel.INFO, `ScriptProcessor 已创建,bufferSize=${bufferSize}`);
|
||||||
|
|
||||||
this.processor.onaudioprocess = (event: any) => {
|
this.processor.onaudioprocess = (event: any) => {
|
||||||
const inputData = event.inputBuffer.getChannelData(0);
|
const inputData = event.inputBuffer.getChannelData(0);
|
||||||
|
|
||||||
|
// 计算音频 RMS 值用于检测是否有声音输入
|
||||||
|
let sum = 0;
|
||||||
|
for (let i = 0; i < inputData.length; i++) {
|
||||||
|
sum += inputData[i] * inputData[i];
|
||||||
|
}
|
||||||
|
const rms = Math.sqrt(sum / inputData.length);
|
||||||
|
|
||||||
|
this.chunkCount++;
|
||||||
|
this.totalSamples += inputData.length;
|
||||||
|
|
||||||
|
log(LogLevel.DEBUG, `音频块 #${this.chunkCount}`, {
|
||||||
|
samples: inputData.length,
|
||||||
|
rms: rms.toFixed(4),
|
||||||
|
totalSamples: this.totalSamples,
|
||||||
|
});
|
||||||
|
|
||||||
const chunk: AudioChunk = {
|
const chunk: AudioChunk = {
|
||||||
data: new Float32Array(inputData),
|
data: new Float32Array(inputData),
|
||||||
sampleRate: this.config.sampleRate,
|
sampleRate: this.config.sampleRate,
|
||||||
@ -90,10 +171,13 @@ export class AudioRecorder extends EventEmitter {
|
|||||||
|
|
||||||
this.source.connect(this.processor);
|
this.source.connect(this.processor);
|
||||||
this.processor.connect(this.audioContext.destination);
|
this.processor.connect(this.audioContext.destination);
|
||||||
|
log(LogLevel.INFO, '音频处理链路已连接');
|
||||||
|
|
||||||
this.isRecording = true;
|
this.isRecording = true;
|
||||||
|
log(LogLevel.INFO, '✅ 开始录音');
|
||||||
this.emit('start');
|
this.emit('start');
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
log(LogLevel.ERROR, '❌ 获取麦克风权限失败', error);
|
||||||
this.emit('error', error);
|
this.emit('error', error);
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
@ -104,8 +188,8 @@ export class AudioRecorder extends EventEmitter {
|
|||||||
* 实际使用需要 node-audio 等库
|
* 实际使用需要 node-audio 等库
|
||||||
*/
|
*/
|
||||||
private startInNode(): void {
|
private startInNode(): void {
|
||||||
console.warn('Node.js 环境音频采集需要 electron 或 node-audio 库');
|
log(LogLevel.WARN, 'Node.js 环境音频采集需要 electron 或 node-audio 库');
|
||||||
console.warn('当前运行在演示模式,不会采集音频');
|
log(LogLevel.WARN, '当前运行在演示模式,不会采集音频');
|
||||||
this.isRecording = true;
|
this.isRecording = true;
|
||||||
this.emit('start');
|
this.emit('start');
|
||||||
// 演示:定期发送静音数据
|
// 演示:定期发送静音数据
|
||||||
@ -128,39 +212,56 @@ export class AudioRecorder extends EventEmitter {
|
|||||||
*/
|
*/
|
||||||
stop(): void {
|
stop(): void {
|
||||||
if (!this.isRecording) {
|
if (!this.isRecording) {
|
||||||
|
log(LogLevel.WARN, '未在录音中');
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log(LogLevel.INFO, '停止录音...');
|
||||||
|
|
||||||
if (this.processor) {
|
if (this.processor) {
|
||||||
this.processor.disconnect();
|
this.processor.disconnect();
|
||||||
this.processor = null;
|
this.processor = null;
|
||||||
|
log(LogLevel.DEBUG, 'ScriptProcessor 已断开');
|
||||||
}
|
}
|
||||||
if (this.source) {
|
if (this.source) {
|
||||||
this.source.disconnect();
|
this.source.disconnect();
|
||||||
this.source = null;
|
this.source = null;
|
||||||
|
log(LogLevel.DEBUG, 'MediaStreamSource 已断开');
|
||||||
}
|
}
|
||||||
if (this.stream) {
|
if (this.stream) {
|
||||||
const tracks = this.stream.getTracks?.() || this.stream.tracks || [];
|
const tracks = this.stream.getTracks?.() || this.stream.tracks || [];
|
||||||
tracks.forEach((track: any) => track.stop?.());
|
tracks.forEach((track: any) => track.stop?.());
|
||||||
this.stream = null;
|
this.stream = null;
|
||||||
|
log(LogLevel.DEBUG, '媒体流已停止');
|
||||||
}
|
}
|
||||||
if (this.audioContext) {
|
if (this.audioContext) {
|
||||||
this.audioContext.close?.();
|
this.audioContext.close?.();
|
||||||
this.audioContext = null;
|
this.audioContext = null;
|
||||||
|
log(LogLevel.DEBUG, 'AudioContext 已关闭');
|
||||||
}
|
}
|
||||||
|
|
||||||
this.isRecording = false;
|
this.isRecording = false;
|
||||||
|
log(LogLevel.INFO, `录音停止完成,共处理 ${this.chunkCount} 个音频块,${this.totalSamples} 个样本`);
|
||||||
|
this.chunkCount = 0;
|
||||||
|
this.totalSamples = 0;
|
||||||
this.emit('stop');
|
this.emit('stop');
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 获取可用音频设备列表(仅浏览器环境)
|
* 获取可用音频设备列表(仅浏览器环境)
|
||||||
*/
|
*/
|
||||||
static async listDevices(): Promise<any[]> {
|
static async listDevices(): Promise<MediaDeviceInfo[]> {
|
||||||
if (typeof window !== 'undefined' && window.navigator?.mediaDevices) {
|
if (typeof window !== 'undefined' && window.navigator?.mediaDevices) {
|
||||||
|
log(LogLevel.INFO, '枚举音频输入设备...');
|
||||||
const devices = await window.navigator.mediaDevices.enumerateDevices();
|
const devices = await window.navigator.mediaDevices.enumerateDevices();
|
||||||
return devices.filter((device: any) => device.kind === 'audioinput');
|
const audioInputs = devices.filter((device: any) => device.kind === 'audioinput');
|
||||||
|
log(LogLevel.INFO, `找到 ${audioInputs.length} 个音频输入设备`);
|
||||||
|
audioInputs.forEach((device, index) => {
|
||||||
|
log(LogLevel.DEBUG, `设备 ${index + 1}: ${device.label || 'Unknown'} (${device.deviceId})`);
|
||||||
|
});
|
||||||
|
return audioInputs;
|
||||||
}
|
}
|
||||||
|
log(LogLevel.WARN, '当前环境不支持设备枚举');
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -22,17 +22,55 @@ export interface RecognitionResult {
|
|||||||
timestamp: number; // 时间戳
|
timestamp: number; // 时间戳
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 日志级别
|
||||||
|
enum LogLevel {
|
||||||
|
DEBUG = 'DEBUG',
|
||||||
|
INFO = 'INFO',
|
||||||
|
WARN = 'WARN',
|
||||||
|
ERROR = 'ERROR',
|
||||||
|
}
|
||||||
|
|
||||||
|
// 简单日志函数
|
||||||
|
function log(level: LogLevel, message: string, data?: unknown): void {
|
||||||
|
const timestamp = new Date().toISOString().replace('T', ' ').slice(0, 23);
|
||||||
|
const formatted = `[${timestamp}] [${level}] [SpeechRecognizer] ${message}`;
|
||||||
|
|
||||||
|
if (data !== undefined) {
|
||||||
|
try {
|
||||||
|
if (data instanceof Error) {
|
||||||
|
console.log(`${formatted} - ${data.message}`);
|
||||||
|
} else if (typeof data === 'object') {
|
||||||
|
console.log(`${formatted} - ${JSON.stringify(data)}`);
|
||||||
|
} else {
|
||||||
|
console.log(`${formatted} - ${data}`);
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.log(`${formatted} - [Unable to stringify data]`);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
console.log(formatted);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
export class SpeechRecognizer extends EventEmitter {
|
export class SpeechRecognizer extends EventEmitter {
|
||||||
private config: RecognizerConfig;
|
private config: RecognizerConfig;
|
||||||
private modelLoader: ModelLoader;
|
private modelLoader: ModelLoader;
|
||||||
private isRecognizing: boolean = false;
|
private isRecognizing: boolean = false;
|
||||||
private audioBuffer: Float32Array = new Float32Array(0);
|
private audioBuffer: Float32Array = new Float32Array(0);
|
||||||
private readonly MAX_BUFFER_SECONDS = 30;
|
private readonly MAX_BUFFER_SECONDS = 30;
|
||||||
|
private processedChunks: number = 0;
|
||||||
|
private recognizedResults: number = 0;
|
||||||
|
|
||||||
constructor(config: RecognizerConfig) {
|
constructor(config: RecognizerConfig) {
|
||||||
super();
|
super();
|
||||||
this.config = config;
|
this.config = config;
|
||||||
this.modelLoader = new ModelLoader();
|
this.modelLoader = new ModelLoader();
|
||||||
|
log(LogLevel.INFO, 'SpeechRecognizer 初始化', {
|
||||||
|
modelPath: config.modelPath,
|
||||||
|
language: config.language,
|
||||||
|
useVad: config.useVad,
|
||||||
|
beamSize: config.beamSize,
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -40,9 +78,12 @@ export class SpeechRecognizer extends EventEmitter {
|
|||||||
*/
|
*/
|
||||||
async initialize(): Promise<void> {
|
async initialize(): Promise<void> {
|
||||||
try {
|
try {
|
||||||
|
log(LogLevel.INFO, '开始加载模型...');
|
||||||
await this.modelLoader.load(this.config.modelPath);
|
await this.modelLoader.load(this.config.modelPath);
|
||||||
|
log(LogLevel.INFO, '✅ 模型加载完成');
|
||||||
this.emit('ready');
|
this.emit('ready');
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
log(LogLevel.ERROR, '❌ 模型加载失败', error);
|
||||||
this.emit('error', new Error(`Failed to load model: ${error}`));
|
this.emit('error', new Error(`Failed to load model: ${error}`));
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
@ -62,6 +103,22 @@ export class SpeechRecognizer extends EventEmitter {
|
|||||||
newBuffer.set(chunk.data, this.audioBuffer.length);
|
newBuffer.set(chunk.data, this.audioBuffer.length);
|
||||||
this.audioBuffer = newBuffer;
|
this.audioBuffer = newBuffer;
|
||||||
|
|
||||||
|
this.processedChunks++;
|
||||||
|
|
||||||
|
// 计算音频 RMS 值
|
||||||
|
let sum = 0;
|
||||||
|
for (let i = 0; i < chunk.data.length; i++) {
|
||||||
|
sum += chunk.data[i] * chunk.data[i];
|
||||||
|
}
|
||||||
|
const rms = Math.sqrt(sum / chunk.data.length);
|
||||||
|
|
||||||
|
log(LogLevel.DEBUG, `音频块 #${this.processedChunks}`, {
|
||||||
|
samples: chunk.data.length,
|
||||||
|
sampleRate: chunk.sampleRate,
|
||||||
|
rms: rms.toFixed(4),
|
||||||
|
bufferSize: this.audioBuffer.length,
|
||||||
|
});
|
||||||
|
|
||||||
// 检查缓冲区是否超过最大长度
|
// 检查缓冲区是否超过最大长度
|
||||||
const maxSamples = this.config.useVad
|
const maxSamples = this.config.useVad
|
||||||
? chunk.sampleRate * this.MAX_BUFFER_SECONDS
|
? chunk.sampleRate * this.MAX_BUFFER_SECONDS
|
||||||
@ -70,6 +127,7 @@ export class SpeechRecognizer extends EventEmitter {
|
|||||||
if (this.audioBuffer.length > maxSamples) {
|
if (this.audioBuffer.length > maxSamples) {
|
||||||
const keepStart = Math.floor(this.audioBuffer.length / 2);
|
const keepStart = Math.floor(this.audioBuffer.length / 2);
|
||||||
this.audioBuffer = this.audioBuffer.slice(keepStart);
|
this.audioBuffer = this.audioBuffer.slice(keepStart);
|
||||||
|
log(LogLevel.DEBUG, `缓冲区裁剪,保留 ${this.audioBuffer.length} 样本`);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 进行识别
|
// 进行识别
|
||||||
@ -86,6 +144,8 @@ export class SpeechRecognizer extends EventEmitter {
|
|||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
log(LogLevel.DEBUG, '开始 ONNX 推理...');
|
||||||
|
|
||||||
// 重采样到模型要求的采样率
|
// 重采样到模型要求的采样率
|
||||||
let audioData = this.audioBuffer;
|
let audioData = this.audioBuffer;
|
||||||
if (sampleRate !== modelConfig.sampleRate) {
|
if (sampleRate !== modelConfig.sampleRate) {
|
||||||
@ -96,6 +156,7 @@ export class SpeechRecognizer extends EventEmitter {
|
|||||||
const pos = Math.floor(i * ratio);
|
const pos = Math.floor(i * ratio);
|
||||||
audioData[i] = this.audioBuffer[pos] || 0;
|
audioData[i] = this.audioBuffer[pos] || 0;
|
||||||
}
|
}
|
||||||
|
log(LogLevel.DEBUG, `重采样:${sampleRate} -> ${modelConfig.sampleRate}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 填充或截断到模型输入大小
|
// 填充或截断到模型输入大小
|
||||||
@ -104,18 +165,26 @@ export class SpeechRecognizer extends EventEmitter {
|
|||||||
const copyLength = Math.min(audioData.length, inputSize);
|
const copyLength = Math.min(audioData.length, inputSize);
|
||||||
inputData.set(audioData.slice(0, copyLength));
|
inputData.set(audioData.slice(0, copyLength));
|
||||||
|
|
||||||
|
log(LogLevel.DEBUG, `输入张量形状:[1, ${inputSize}]`);
|
||||||
|
|
||||||
const inputTensor = new ort.Tensor('float32', inputData, [1, inputSize]);
|
const inputTensor = new ort.Tensor('float32', inputData, [1, inputSize]);
|
||||||
|
|
||||||
const feeds: Record<string, ort.Tensor> = {
|
const feeds: Record<string, ort.Tensor> = {
|
||||||
input: inputTensor,
|
input: inputTensor,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const startTime = Date.now();
|
||||||
const results = await this.modelLoader.run(feeds);
|
const results = await this.modelLoader.run(feeds);
|
||||||
|
const inferenceTime = Date.now() - startTime;
|
||||||
|
|
||||||
|
log(LogLevel.INFO, `ONNX 推理完成,耗时:${inferenceTime}ms`);
|
||||||
|
|
||||||
// 解码结果
|
// 解码结果
|
||||||
const text = this.decodeOutput(results, modelConfig);
|
const text = this.decodeOutput(results, modelConfig);
|
||||||
|
|
||||||
if (text) {
|
if (text) {
|
||||||
|
this.recognizedResults++;
|
||||||
|
log(LogLevel.INFO, `📝 识别结果 #${this.recognizedResults}: ${text}`);
|
||||||
const result: RecognitionResult = {
|
const result: RecognitionResult = {
|
||||||
text,
|
text,
|
||||||
confidence: 0.95,
|
confidence: 0.95,
|
||||||
@ -123,11 +192,14 @@ export class SpeechRecognizer extends EventEmitter {
|
|||||||
timestamp: Date.now(),
|
timestamp: Date.now(),
|
||||||
};
|
};
|
||||||
this.emit('result', result);
|
this.emit('result', result);
|
||||||
|
} else {
|
||||||
|
log(LogLevel.DEBUG, '识别结果为空');
|
||||||
}
|
}
|
||||||
|
|
||||||
// 清空缓冲区
|
// 清空缓冲区
|
||||||
this.audioBuffer = new Float32Array(0);
|
this.audioBuffer = new Float32Array(0);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
log(LogLevel.ERROR, '❌ 识别失败', error);
|
||||||
this.emit('error', new Error(`Recognition failed: ${error}`));
|
this.emit('error', new Error(`Recognition failed: ${error}`));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -143,6 +215,7 @@ export class SpeechRecognizer extends EventEmitter {
|
|||||||
for (const key of outputKeys) {
|
for (const key of outputKeys) {
|
||||||
if (results[key]) {
|
if (results[key]) {
|
||||||
output = results[key];
|
output = results[key];
|
||||||
|
log(LogLevel.DEBUG, `找到输出键:${key}`);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -152,16 +225,19 @@ export class SpeechRecognizer extends EventEmitter {
|
|||||||
const firstKey = Object.keys(results)[0];
|
const firstKey = Object.keys(results)[0];
|
||||||
if (firstKey) {
|
if (firstKey) {
|
||||||
output = results[firstKey];
|
output = results[firstKey];
|
||||||
|
log(LogLevel.DEBUG, `使用第一个输出键:${firstKey}`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!output || !output.data) {
|
if (!output || !output.data) {
|
||||||
|
log(LogLevel.WARN, '没有可用的输出数据');
|
||||||
return '';
|
return '';
|
||||||
}
|
}
|
||||||
|
|
||||||
// 简化处理:实际应根据具体模型使用 tokenizer 解码
|
// 简化处理:实际应根据具体模型使用 tokenizer 解码
|
||||||
// 这里返回一个占位字符串
|
// 这里返回一个占位字符串
|
||||||
const tokens = Array.from(output.data as Float32Array | Int32Array);
|
const tokens = Array.from(output.data as Float32Array | Int32Array);
|
||||||
|
log(LogLevel.DEBUG, `输出 token 数量:${tokens.length}`);
|
||||||
return `[识别结果:${tokens.length} tokens]`;
|
return `[识别结果:${tokens.length} tokens]`;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -170,6 +246,10 @@ export class SpeechRecognizer extends EventEmitter {
|
|||||||
*/
|
*/
|
||||||
start(): void {
|
start(): void {
|
||||||
this.isRecognizing = true;
|
this.isRecognizing = true;
|
||||||
|
this.processedChunks = 0;
|
||||||
|
this.recognizedResults = 0;
|
||||||
|
this.audioBuffer = new Float32Array(0);
|
||||||
|
log(LogLevel.INFO, '🎤 开始语音识别');
|
||||||
this.emit('start');
|
this.emit('start');
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -178,6 +258,7 @@ export class SpeechRecognizer extends EventEmitter {
|
|||||||
*/
|
*/
|
||||||
stop(): void {
|
stop(): void {
|
||||||
this.isRecognizing = false;
|
this.isRecognizing = false;
|
||||||
|
log(LogLevel.INFO, `停止语音识别,共处理 ${this.processedChunks} 个音频块,${this.recognizedResults} 个识别结果`);
|
||||||
if (this.audioBuffer.length > 0) {
|
if (this.audioBuffer.length > 0) {
|
||||||
this.recognize(16000);
|
this.recognize(16000);
|
||||||
}
|
}
|
||||||
@ -188,8 +269,10 @@ export class SpeechRecognizer extends EventEmitter {
|
|||||||
* 卸载模型释放资源
|
* 卸载模型释放资源
|
||||||
*/
|
*/
|
||||||
async release(): Promise<void> {
|
async release(): Promise<void> {
|
||||||
|
log(LogLevel.INFO, '释放识别引擎资源...');
|
||||||
this.stop();
|
this.stop();
|
||||||
await this.modelLoader.release();
|
await this.modelLoader.release();
|
||||||
|
log(LogLevel.INFO, '资源已释放');
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user