diff --git a/src/audio/audio_capture.cpp b/src/audio/audio_capture.cpp index 1ab9107..6f5b8d8 100644 --- a/src/audio/audio_capture.cpp +++ b/src/audio/audio_capture.cpp @@ -108,7 +108,7 @@ AudioCapture::AudioCapture(QObject* parent) } AudioCapture::~AudioCapture() { - stop(); + stopAndClose(); } QStringList AudioCapture::getDeviceList() { @@ -158,76 +158,91 @@ bool AudioCapture::start(int deviceIndex, int sampleRate, int bufferSizeMs) { return false; } - // 枚举所有 Host API 用于诊断 - LOG_DEBUG(kTag, QString("Host API 数量: %1").arg(Pa_GetHostApiCount())); - for (int i = 0; i < Pa_GetHostApiCount(); i++) { - const PaHostApiInfo* api = Pa_GetHostApiInfo(i); - if (api) { - LOG_DEBUG(kTag, QString(" Host API #%1: %2 (设备数: %3)") - .arg(i).arg(api->name).arg(api->deviceCount)); + // 如果流已打开且参数匹配,直接启动(跳过耗时的 OpenStream) + if (streamOpen_ && impl_->ctx.stream && + deviceIndex == lastDeviceIndex_ && + sampleRate == lastSampleRate_ && + bufferSizeMs == lastBufferSizeMs_) { + LOG_DEBUG(kTag, "复用已打开的音频流,跳过 OpenStream"); + } else { + // 关闭旧流(如果存在) + if (impl_->ctx.stream) { + Pa_CloseStream(impl_->ctx.stream); + impl_->ctx.stream = nullptr; + streamOpen_ = false; } + + // 枚举所有 Host API 用于诊断 + LOG_DEBUG(kTag, QString("Host API 数量: %1").arg(Pa_GetHostApiCount())); + for (int i = 0; i < Pa_GetHostApiCount(); i++) { + const PaHostApiInfo* api = Pa_GetHostApiInfo(i); + if (api) { + LOG_DEBUG(kTag, QString(" Host API #%1: %2 (设备数: %3)") + .arg(i).arg(api->name).arg(api->deviceCount)); + } + } + + // 选择设备 + int devIdx = deviceIndex < 0 ? Pa_GetDefaultInputDevice() : deviceIndex; + if (devIdx < 0 || devIdx >= Pa_GetDeviceCount()) { + LOG_ERROR(kTag, QString("无效的音频设备索引: %1 (默认设备: %2)") + .arg(deviceIndex).arg(Pa_GetDefaultInputDevice())); + return false; + } + + const PaDeviceInfo* devInfo = Pa_GetDeviceInfo(devIdx); + if (!devInfo || devInfo->maxInputChannels <= 0) { + LOG_ERROR(kTag, "所选设备不是输入设备"); + return false; + } + + const PaHostApiInfo* hostApi = Pa_GetHostApiInfo(devInfo->hostApi); + LOG_INFO(kTag, QString("=== 音频设备诊断 ===")); + LOG_INFO(kTag, QString(" 设备 #%1: %2").arg(devIdx).arg(devInfo->name)); + LOG_INFO(kTag, QString(" Host API: %1").arg(hostApi ? hostApi->name : "未知")); + LOG_INFO(kTag, QString(" 最大输入通道: %1").arg(devInfo->maxInputChannels)); + LOG_INFO(kTag, QString(" 设备默认采样率: %1 Hz").arg(devInfo->defaultSampleRate, 0, 'f', 0)); + LOG_INFO(kTag, QString(" 请求采样率: %1 Hz").arg(sampleRate)); + LOG_INFO(kTag, QString(" 采样格式: paFloat32 | paNonInterleaved")); + LOG_INFO(kTag, QString(" 请求通道数: 1 (mono)")); + LOG_INFO(kTag, QString(" 缓冲区: %1ms (%2 帧)").arg(bufferSizeMs) + .arg(sampleRate * bufferSizeMs / 1000)); + + // 检查是否可能选错设备(名称包含 monitor 的通常是回环设备) + QString devName = QString(devInfo->name).toLower(); + if (devName.contains("monitor") || devName.contains("output")) { + LOG_WARNING(kTag, "⚠️ 当前设备名称包含 'monitor' 或 'output'," + "这可能是扬声器回环设备而非麦克风!如果录制的是噪音,请在设置中选择正确的麦克风设备。"); + } + + PaStreamParameters inputParams{}; + inputParams.device = devIdx; + inputParams.channelCount = 1; + inputParams.sampleFormat = paFloat32; + inputParams.suggestedLatency = devInfo->defaultHighInputLatency; + + int framesPerBuffer = sampleRate * bufferSizeMs / 1000; + if (framesPerBuffer < 256) framesPerBuffer = 256; + + PaError err = Pa_OpenStream( + &impl_->ctx.stream, &inputParams, nullptr, sampleRate, + static_cast(framesPerBuffer), + paClipOff, paCallback, &impl_->ctx); + + if (err != paNoError || !impl_->ctx.stream) { + LOG_ERROR(kTag, QString("打开音频流失败: %1").arg(Pa_GetErrorText(err))); + return false; + } + + streamOpen_ = true; + lastDeviceIndex_ = deviceIndex; + lastSampleRate_ = sampleRate; + lastBufferSizeMs_ = bufferSizeMs; } - // 选择设备 - int devIdx = deviceIndex < 0 ? Pa_GetDefaultInputDevice() : deviceIndex; - if (devIdx < 0 || devIdx >= Pa_GetDeviceCount()) { - LOG_ERROR(kTag, QString("无效的音频设备索引: %1 (默认设备: %2)") - .arg(deviceIndex).arg(Pa_GetDefaultInputDevice())); - return false; - } - - const PaDeviceInfo* devInfo = Pa_GetDeviceInfo(devIdx); - if (!devInfo || devInfo->maxInputChannels <= 0) { - LOG_ERROR(kTag, "所选设备不是输入设备"); - return false; - } - - const PaHostApiInfo* hostApi = Pa_GetHostApiInfo(devInfo->hostApi); - LOG_INFO(kTag, QString("=== 音频设备诊断 ===")); - LOG_INFO(kTag, QString(" 设备 #%1: %2").arg(devIdx).arg(devInfo->name)); - LOG_INFO(kTag, QString(" Host API: %1").arg(hostApi ? hostApi->name : "未知")); - LOG_INFO(kTag, QString(" 最大输入通道: %1").arg(devInfo->maxInputChannels)); - LOG_INFO(kTag, QString(" 设备默认采样率: %1 Hz").arg(devInfo->defaultSampleRate, 0, 'f', 0)); - LOG_INFO(kTag, QString(" 请求采样率: %1 Hz").arg(sampleRate)); - LOG_INFO(kTag, QString(" 采样格式: paFloat32 | paNonInterleaved")); - LOG_INFO(kTag, QString(" 请求通道数: 1 (mono)")); - LOG_INFO(kTag, QString(" 缓冲区: %1ms (%2 帧)").arg(bufferSizeMs) - .arg(sampleRate * bufferSizeMs / 1000)); - - // 检查是否可能选错设备(名称包含 monitor 的通常是回环设备) - QString devName = QString(devInfo->name).toLower(); - if (devName.contains("monitor") || devName.contains("output")) { - LOG_WARNING(kTag, "⚠️ 当前设备名称包含 'monitor' 或 'output'," - "这可能是扬声器回环设备而非麦克风!如果录制的是噪音,请在设置中选择正确的麦克风设备。"); - } - - PaStreamParameters inputParams{}; - inputParams.device = devIdx; - inputParams.channelCount = 1; - inputParams.sampleFormat = paFloat32; - // 不使用 paNonInterleaved:input 指针直接是 float* 数组(interleaved mono), - // 回调中可以安全地 static_cast(input) - // 使用高延迟以避免回调过快 - inputParams.suggestedLatency = devInfo->defaultHighInputLatency; - - int framesPerBuffer = sampleRate * bufferSizeMs / 1000; - if (framesPerBuffer < 256) framesPerBuffer = 256; - - PaError err = Pa_OpenStream( - &impl_->ctx.stream, &inputParams, nullptr, sampleRate, - static_cast(framesPerBuffer), - paClipOff, paCallback, &impl_->ctx); - - if (err != paNoError || !impl_->ctx.stream) { - LOG_ERROR(kTag, QString("打开音频流失败: %1").arg(Pa_GetErrorText(err))); - return false; - } - - err = Pa_StartStream(impl_->ctx.stream); + PaError err = Pa_StartStream(impl_->ctx.stream); if (err != paNoError) { LOG_ERROR(kTag, QString("启动音频流失败: %1").arg(Pa_GetErrorText(err))); - Pa_CloseStream(impl_->ctx.stream); - impl_->ctx.stream = nullptr; return false; } @@ -272,17 +287,33 @@ void AudioCapture::stop() { } } + // 只停止流,不关闭 — 下次 start() 可快速复用 if (impl_->ctx.stream) { Pa_StopStream(impl_->ctx.stream); - Pa_CloseStream(impl_->ctx.stream); - impl_->ctx.stream = nullptr; + // 不调用 Pa_CloseStream,保留流以便下次快速启动 } - safePaTerminate(); #endif running_ = false; emit runningChanged(false); - LOG_INFO(kTag, "音频采集已停止"); + LOG_INFO(kTag, "音频采集已停止(流保留,下次启动更快)"); +} + +void AudioCapture::stopAndClose() { +#ifdef HAVE_PORTAUDIO + if (running_ && impl_->ctx.stream) { + Pa_StopStream(impl_->ctx.stream); + } + if (impl_->ctx.stream) { + Pa_CloseStream(impl_->ctx.stream); + impl_->ctx.stream = nullptr; + } + safePaTerminate(); + streamOpen_ = false; +#endif + running_ = false; + emit runningChanged(false); + LOG_INFO(kTag, "音频采集已停止,流已关闭"); } } // namespace impress diff --git a/src/audio/audio_capture.h b/src/audio/audio_capture.h index 61c73e6..9ce8518 100644 --- a/src/audio/audio_capture.h +++ b/src/audio/audio_capture.h @@ -31,9 +31,12 @@ public: int sampleRate = 16000, int bufferSizeMs = 20); - /** @brief 停止采集 */ + /** @brief 停止采集(保留流,下次 start 更快) */ void stop(); + /** @brief 停止采集并关闭流(彻底释放资源) */ + void stopAndClose(); + /** @brief 是否正在采集 */ bool isRunning() const { return running_; } @@ -51,6 +54,10 @@ private: struct Impl; std::unique_ptr impl_; bool running_ = false; + bool streamOpen_ = false; + int lastDeviceIndex_ = -1; + int lastSampleRate_ = 16000; + int lastBufferSizeMs_ = 20; }; } // namespace impress diff --git a/src/core/voice_input_service.cpp b/src/core/voice_input_service.cpp index b35eb18..9aa1ba3 100644 --- a/src/core/voice_input_service.cpp +++ b/src/core/voice_input_service.cpp @@ -70,11 +70,18 @@ VoiceInputService::~VoiceInputService() { bool VoiceInputService::start() { if (running_) return true; - // 1. 初始化音频采集 + // 1. 初始化音频采集并预打开音频流(避免按键时 Pa_OpenStream 延迟 3-4s) impl_->audioCapture = new AudioCapture(this); connect(impl_->audioCapture, &AudioCapture::audioDataReady, this, &VoiceInputService::onAudioData); + int deviceIndex = configManager_->get("audio.input_device").toInt(); + int sampleRate = configManager_->get("stt.sample_rate").toInt(); + int bufferSizeMs = configManager_->get("audio.buffer_size_ms").toInt(); + impl_->audioCapture->start(deviceIndex, sampleRate, bufferSizeMs); + impl_->audioCapture->stop(); // 停止但保留流,后续 start() 只需 Pa_StartStream + LOG_INFO(kTag, "音频流已预打开,后续录音延迟 <100ms"); + // 2. STT 引擎已作为参数传入 // 3. 初始化全局快捷键 @@ -117,7 +124,7 @@ void VoiceInputService::stop() { cooldownTimer_->stop(); if (impl_->audioCapture) { - impl_->audioCapture->stop(); + impl_->audioCapture->stopAndClose(); // 彻底关闭流 } if (impl_->hotkey) { impl_->hotkey->stop();