perf: 预打开音频流,消除按键到录音 3-4s 延迟

根因: 每次按键时 Pa_OpenStream() + Pa_StartStream() 耗时 3-4s,
stop() 时又 Pa_CloseStream() + Pa_Terminate() 销毁流。

优化:
- AudioCapture::start() 复用已打开的流(参数匹配时跳过 OpenStream)
- AudioCapture::stop() 只 Pa_StopStream(),保留流
- 新增 stopAndClose() 彻底关闭流(析构和服务停止时使用)
- VoiceInputService::start() 时预打开音频流再立即 stop()
  → 后续 hotkey start() 只需 Pa_StartStream() (<100ms)

效果: 按键到录音延迟从 3-4s 降至 <100ms

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
impressionyang 2026-06-11 16:26:20 +08:00
parent 04ca41f4c3
commit ff7318efd6
3 changed files with 118 additions and 73 deletions

View File

@ -108,7 +108,7 @@ AudioCapture::AudioCapture(QObject* parent)
}
AudioCapture::~AudioCapture() {
stop();
stopAndClose();
}
QStringList AudioCapture::getDeviceList() {
@ -158,76 +158,91 @@ bool AudioCapture::start(int deviceIndex, int sampleRate, int bufferSizeMs) {
return false;
}
// 枚举所有 Host API 用于诊断
LOG_DEBUG(kTag, QString("Host API 数量: %1").arg(Pa_GetHostApiCount()));
for (int i = 0; i < Pa_GetHostApiCount(); i++) {
const PaHostApiInfo* api = Pa_GetHostApiInfo(i);
if (api) {
LOG_DEBUG(kTag, QString(" Host API #%1: %2 (设备数: %3)")
.arg(i).arg(api->name).arg(api->deviceCount));
// 如果流已打开且参数匹配,直接启动(跳过耗时的 OpenStream
if (streamOpen_ && impl_->ctx.stream &&
deviceIndex == lastDeviceIndex_ &&
sampleRate == lastSampleRate_ &&
bufferSizeMs == lastBufferSizeMs_) {
LOG_DEBUG(kTag, "复用已打开的音频流,跳过 OpenStream");
} else {
// 关闭旧流(如果存在)
if (impl_->ctx.stream) {
Pa_CloseStream(impl_->ctx.stream);
impl_->ctx.stream = nullptr;
streamOpen_ = false;
}
// 枚举所有 Host API 用于诊断
LOG_DEBUG(kTag, QString("Host API 数量: %1").arg(Pa_GetHostApiCount()));
for (int i = 0; i < Pa_GetHostApiCount(); i++) {
const PaHostApiInfo* api = Pa_GetHostApiInfo(i);
if (api) {
LOG_DEBUG(kTag, QString(" Host API #%1: %2 (设备数: %3)")
.arg(i).arg(api->name).arg(api->deviceCount));
}
}
// 选择设备
int devIdx = deviceIndex < 0 ? Pa_GetDefaultInputDevice() : deviceIndex;
if (devIdx < 0 || devIdx >= Pa_GetDeviceCount()) {
LOG_ERROR(kTag, QString("无效的音频设备索引: %1 (默认设备: %2)")
.arg(deviceIndex).arg(Pa_GetDefaultInputDevice()));
return false;
}
const PaDeviceInfo* devInfo = Pa_GetDeviceInfo(devIdx);
if (!devInfo || devInfo->maxInputChannels <= 0) {
LOG_ERROR(kTag, "所选设备不是输入设备");
return false;
}
const PaHostApiInfo* hostApi = Pa_GetHostApiInfo(devInfo->hostApi);
LOG_INFO(kTag, QString("=== 音频设备诊断 ==="));
LOG_INFO(kTag, QString(" 设备 #%1: %2").arg(devIdx).arg(devInfo->name));
LOG_INFO(kTag, QString(" Host API: %1").arg(hostApi ? hostApi->name : "未知"));
LOG_INFO(kTag, QString(" 最大输入通道: %1").arg(devInfo->maxInputChannels));
LOG_INFO(kTag, QString(" 设备默认采样率: %1 Hz").arg(devInfo->defaultSampleRate, 0, 'f', 0));
LOG_INFO(kTag, QString(" 请求采样率: %1 Hz").arg(sampleRate));
LOG_INFO(kTag, QString(" 采样格式: paFloat32 | paNonInterleaved"));
LOG_INFO(kTag, QString(" 请求通道数: 1 (mono)"));
LOG_INFO(kTag, QString(" 缓冲区: %1ms (%2 帧)").arg(bufferSizeMs)
.arg(sampleRate * bufferSizeMs / 1000));
// 检查是否可能选错设备(名称包含 monitor 的通常是回环设备)
QString devName = QString(devInfo->name).toLower();
if (devName.contains("monitor") || devName.contains("output")) {
LOG_WARNING(kTag, "⚠️ 当前设备名称包含 'monitor' 或 'output'"
"这可能是扬声器回环设备而非麦克风!如果录制的是噪音,请在设置中选择正确的麦克风设备。");
}
PaStreamParameters inputParams{};
inputParams.device = devIdx;
inputParams.channelCount = 1;
inputParams.sampleFormat = paFloat32;
inputParams.suggestedLatency = devInfo->defaultHighInputLatency;
int framesPerBuffer = sampleRate * bufferSizeMs / 1000;
if (framesPerBuffer < 256) framesPerBuffer = 256;
PaError err = Pa_OpenStream(
&impl_->ctx.stream, &inputParams, nullptr, sampleRate,
static_cast<unsigned long>(framesPerBuffer),
paClipOff, paCallback, &impl_->ctx);
if (err != paNoError || !impl_->ctx.stream) {
LOG_ERROR(kTag, QString("打开音频流失败: %1").arg(Pa_GetErrorText(err)));
return false;
}
streamOpen_ = true;
lastDeviceIndex_ = deviceIndex;
lastSampleRate_ = sampleRate;
lastBufferSizeMs_ = bufferSizeMs;
}
// 选择设备
int devIdx = deviceIndex < 0 ? Pa_GetDefaultInputDevice() : deviceIndex;
if (devIdx < 0 || devIdx >= Pa_GetDeviceCount()) {
LOG_ERROR(kTag, QString("无效的音频设备索引: %1 (默认设备: %2)")
.arg(deviceIndex).arg(Pa_GetDefaultInputDevice()));
return false;
}
const PaDeviceInfo* devInfo = Pa_GetDeviceInfo(devIdx);
if (!devInfo || devInfo->maxInputChannels <= 0) {
LOG_ERROR(kTag, "所选设备不是输入设备");
return false;
}
const PaHostApiInfo* hostApi = Pa_GetHostApiInfo(devInfo->hostApi);
LOG_INFO(kTag, QString("=== 音频设备诊断 ==="));
LOG_INFO(kTag, QString(" 设备 #%1: %2").arg(devIdx).arg(devInfo->name));
LOG_INFO(kTag, QString(" Host API: %1").arg(hostApi ? hostApi->name : "未知"));
LOG_INFO(kTag, QString(" 最大输入通道: %1").arg(devInfo->maxInputChannels));
LOG_INFO(kTag, QString(" 设备默认采样率: %1 Hz").arg(devInfo->defaultSampleRate, 0, 'f', 0));
LOG_INFO(kTag, QString(" 请求采样率: %1 Hz").arg(sampleRate));
LOG_INFO(kTag, QString(" 采样格式: paFloat32 | paNonInterleaved"));
LOG_INFO(kTag, QString(" 请求通道数: 1 (mono)"));
LOG_INFO(kTag, QString(" 缓冲区: %1ms (%2 帧)").arg(bufferSizeMs)
.arg(sampleRate * bufferSizeMs / 1000));
// 检查是否可能选错设备(名称包含 monitor 的通常是回环设备)
QString devName = QString(devInfo->name).toLower();
if (devName.contains("monitor") || devName.contains("output")) {
LOG_WARNING(kTag, "⚠️ 当前设备名称包含 'monitor' 或 'output'"
"这可能是扬声器回环设备而非麦克风!如果录制的是噪音,请在设置中选择正确的麦克风设备。");
}
PaStreamParameters inputParams{};
inputParams.device = devIdx;
inputParams.channelCount = 1;
inputParams.sampleFormat = paFloat32;
// 不使用 paNonInterleavedinput 指针直接是 float* 数组interleaved mono
// 回调中可以安全地 static_cast<const float*>(input)
// 使用高延迟以避免回调过快
inputParams.suggestedLatency = devInfo->defaultHighInputLatency;
int framesPerBuffer = sampleRate * bufferSizeMs / 1000;
if (framesPerBuffer < 256) framesPerBuffer = 256;
PaError err = Pa_OpenStream(
&impl_->ctx.stream, &inputParams, nullptr, sampleRate,
static_cast<unsigned long>(framesPerBuffer),
paClipOff, paCallback, &impl_->ctx);
if (err != paNoError || !impl_->ctx.stream) {
LOG_ERROR(kTag, QString("打开音频流失败: %1").arg(Pa_GetErrorText(err)));
return false;
}
err = Pa_StartStream(impl_->ctx.stream);
PaError err = Pa_StartStream(impl_->ctx.stream);
if (err != paNoError) {
LOG_ERROR(kTag, QString("启动音频流失败: %1").arg(Pa_GetErrorText(err)));
Pa_CloseStream(impl_->ctx.stream);
impl_->ctx.stream = nullptr;
return false;
}
@ -272,17 +287,33 @@ void AudioCapture::stop() {
}
}
// 只停止流,不关闭 — 下次 start() 可快速复用
if (impl_->ctx.stream) {
Pa_StopStream(impl_->ctx.stream);
Pa_CloseStream(impl_->ctx.stream);
impl_->ctx.stream = nullptr;
// 不调用 Pa_CloseStream保留流以便下次快速启动
}
safePaTerminate();
#endif
running_ = false;
emit runningChanged(false);
LOG_INFO(kTag, "音频采集已停止");
LOG_INFO(kTag, "音频采集已停止(流保留,下次启动更快)");
}
void AudioCapture::stopAndClose() {
#ifdef HAVE_PORTAUDIO
if (running_ && impl_->ctx.stream) {
Pa_StopStream(impl_->ctx.stream);
}
if (impl_->ctx.stream) {
Pa_CloseStream(impl_->ctx.stream);
impl_->ctx.stream = nullptr;
}
safePaTerminate();
streamOpen_ = false;
#endif
running_ = false;
emit runningChanged(false);
LOG_INFO(kTag, "音频采集已停止,流已关闭");
}
} // namespace impress

View File

@ -31,9 +31,12 @@ public:
int sampleRate = 16000,
int bufferSizeMs = 20);
/** @brief 停止采集 */
/** @brief 停止采集(保留流,下次 start 更快) */
void stop();
/** @brief 停止采集并关闭流(彻底释放资源) */
void stopAndClose();
/** @brief 是否正在采集 */
bool isRunning() const { return running_; }
@ -51,6 +54,10 @@ private:
struct Impl;
std::unique_ptr<Impl> impl_;
bool running_ = false;
bool streamOpen_ = false;
int lastDeviceIndex_ = -1;
int lastSampleRate_ = 16000;
int lastBufferSizeMs_ = 20;
};
} // namespace impress

View File

@ -70,11 +70,18 @@ VoiceInputService::~VoiceInputService() {
bool VoiceInputService::start() {
if (running_) return true;
// 1. 初始化音频采集
// 1. 初始化音频采集并预打开音频流(避免按键时 Pa_OpenStream 延迟 3-4s
impl_->audioCapture = new AudioCapture(this);
connect(impl_->audioCapture, &AudioCapture::audioDataReady,
this, &VoiceInputService::onAudioData);
int deviceIndex = configManager_->get("audio.input_device").toInt();
int sampleRate = configManager_->get("stt.sample_rate").toInt();
int bufferSizeMs = configManager_->get("audio.buffer_size_ms").toInt();
impl_->audioCapture->start(deviceIndex, sampleRate, bufferSizeMs);
impl_->audioCapture->stop(); // 停止但保留流,后续 start() 只需 Pa_StartStream
LOG_INFO(kTag, "音频流已预打开,后续录音延迟 <100ms");
// 2. STT 引擎已作为参数传入
// 3. 初始化全局快捷键
@ -117,7 +124,7 @@ void VoiceInputService::stop() {
cooldownTimer_->stop();
if (impl_->audioCapture) {
impl_->audioCapture->stop();
impl_->audioCapture->stopAndClose(); // 彻底关闭流
}
if (impl_->hotkey) {
impl_->hotkey->stop();