From dc4ebab47c355f49628a498799aa8cbed0f6c12d Mon Sep 17 00:00:00 2001 From: impressionyang Date: Wed, 13 May 2026 15:16:56 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=E9=9F=B3=E9=A2=91?= =?UTF-8?q?=E8=BE=93=E5=85=A5=E8=AE=BE=E5=A4=87=E9=80=89=E6=8B=A9=E5=99=A8?= =?UTF-8?q?=E4=B8=8E=E9=9F=B3=E9=A2=91=E7=94=B5=E5=B9=B3=E8=AF=8A=E6=96=AD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - audio_capture 启动时输出详细设备信息(名称、Host API、采样率) - 录音停止时输出 RMS 电平和峰值,帮助诊断音频质量问题 - 设置页面新增音频输入设备下拉选择,支持从 PortAudio 设备列表中手动选择 - 语音输入服务使用配置的音频设备和采样率参数 - 检测 monitor/output 类型设备时发出警告,避免选错回环设备 Co-Authored-By: Claude Opus 4.6 --- src/audio/audio_capture.cpp | 92 ++++++++++++++++++++++++++++++-- src/audio/audio_capture.h | 3 ++ src/core/voice_input_service.cpp | 6 ++- src/ui/settings_page.cpp | 48 +++++++++++++++++ src/ui/settings_page.h | 4 ++ 5 files changed, 146 insertions(+), 7 deletions(-) diff --git a/src/audio/audio_capture.cpp b/src/audio/audio_capture.cpp index e5d1f7d..5119d88 100644 --- a/src/audio/audio_capture.cpp +++ b/src/audio/audio_capture.cpp @@ -3,6 +3,7 @@ #ifdef HAVE_PORTAUDIO #include +#include #endif static const char* const kTag = "AudioCapture"; @@ -41,6 +42,10 @@ struct CallbackContext { float buffer[kMaxBufferSize]; #endif int sampleRate = 16000; + // 音频电平诊断 + float peakLevel = 0.0f; + double sumSquares = 0.0; + int sampleCount = 0; }; struct AudioCapture::Impl { @@ -62,11 +67,22 @@ static int paCallback(const void* input, void* /*output*/, unsigned long count = frameCount; if (count > kMaxBufferSize) count = kMaxBufferSize; - // 拷贝到预分配缓冲区 + // 拷贝到预分配缓冲区 + 计算音频电平 + float peak = 0.0f; + double ss = 0.0; for (unsigned long i = 0; i < count; i++) { - ctx->buffer[i] = samples[i]; + float s = samples[i]; + ctx->buffer[i] = s; + float absS = std::fabs(s); + if (absS > peak) peak = absS; + ss += s * s; } + // 更新诊断数据 + if (peak > ctx->peakLevel) ctx->peakLevel = peak; + ctx->sumSquares += ss; + ctx->sampleCount += count; + // 发射信号(Qt 使用 QueuedConnection,线程安全) std::vector data(ctx->buffer, ctx->buffer + count); emit ctx->owner->audioDataReady(data, ctx->sampleRate); @@ -101,14 +117,26 @@ QStringList AudioCapture::getDeviceList() { for (int i = 0; i < count; ++i) { const PaDeviceInfo* info = Pa_GetDeviceInfo(i); if (info && info->maxInputChannels > 0) { - devices << QString("%1 (CH:%2, SR:%3)").arg( - info->name).arg(info->maxInputChannels).arg(info->defaultSampleRate); + const PaHostApiInfo* hostApi = Pa_GetHostApiInfo(info->hostApi); + QString hostApiName = hostApi ? hostApi->name : "未知"; + devices << QString("[%1] %2 (CH:%3, SR:%4, %5)") + .arg(i).arg(info->name).arg(info->maxInputChannels) + .arg(info->defaultSampleRate).arg(hostApiName); } } #endif return devices; } +int AudioCapture::getDefaultDeviceIndex() { +#ifdef HAVE_PORTAUDIO + if (!ensurePaInitialized()) return -1; + return Pa_GetDefaultInputDevice(); +#else + return -1; +#endif +} + bool AudioCapture::start(int deviceIndex, int sampleRate, int bufferSizeMs) { if (running_) { LOG_WARNING(kTag, "已在运行中"); @@ -121,9 +149,21 @@ bool AudioCapture::start(int deviceIndex, int sampleRate, int bufferSizeMs) { return false; } + // 枚举所有 Host API 用于诊断 + LOG_DEBUG(kTag, QString("Host API 数量: %1").arg(Pa_GetHostApiCount())); + for (int i = 0; i < Pa_GetHostApiCount(); i++) { + const PaHostApiInfo* api = Pa_GetHostApiInfo(i); + if (api) { + LOG_DEBUG(kTag, QString(" Host API #%1: %2 (设备数: %3)") + .arg(i).arg(api->name).arg(api->deviceCount)); + } + } + + // 选择设备 int devIdx = deviceIndex < 0 ? Pa_GetDefaultInputDevice() : deviceIndex; if (devIdx < 0 || devIdx >= Pa_GetDeviceCount()) { - LOG_ERROR(kTag, QString("无效的音频设备索引: %1 (默认设备: %2)").arg(deviceIndex).arg(Pa_GetDefaultInputDevice())); + LOG_ERROR(kTag, QString("无效的音频设备索引: %1 (默认设备: %2)") + .arg(deviceIndex).arg(Pa_GetDefaultInputDevice())); return false; } @@ -133,6 +173,25 @@ bool AudioCapture::start(int deviceIndex, int sampleRate, int bufferSizeMs) { return false; } + const PaHostApiInfo* hostApi = Pa_GetHostApiInfo(devInfo->hostApi); + LOG_INFO(kTag, QString("=== 音频设备诊断 ===")); + LOG_INFO(kTag, QString(" 设备 #%1: %2").arg(devIdx).arg(devInfo->name)); + LOG_INFO(kTag, QString(" Host API: %1").arg(hostApi ? hostApi->name : "未知")); + LOG_INFO(kTag, QString(" 最大输入通道: %1").arg(devInfo->maxInputChannels)); + LOG_INFO(kTag, QString(" 设备默认采样率: %1 Hz").arg(devInfo->defaultSampleRate, 0, 'f', 0)); + LOG_INFO(kTag, QString(" 请求采样率: %1 Hz").arg(sampleRate)); + LOG_INFO(kTag, QString(" 采样格式: paFloat32 | paNonInterleaved")); + LOG_INFO(kTag, QString(" 请求通道数: 1 (mono)")); + LOG_INFO(kTag, QString(" 缓冲区: %1ms (%2 帧)").arg(bufferSizeMs) + .arg(sampleRate * bufferSizeMs / 1000)); + + // 检查是否可能选错设备(名称包含 monitor 的通常是回环设备) + QString devName = QString(devInfo->name).toLower(); + if (devName.contains("monitor") || devName.contains("output")) { + LOG_WARNING(kTag, "⚠️ 当前设备名称包含 'monitor' 或 'output'," + "这可能是扬声器回环设备而非麦克风!如果录制的是噪音,请在设置中选择正确的麦克风设备。"); + } + PaStreamParameters inputParams{}; inputParams.device = devIdx; inputParams.channelCount = 1; @@ -161,6 +220,11 @@ bool AudioCapture::start(int deviceIndex, int sampleRate, int bufferSizeMs) { return false; } + // 重置诊断计数器 + impl_->ctx.peakLevel = 0.0f; + impl_->ctx.sumSquares = 0.0; + impl_->ctx.sampleCount = 0; + impl_->ctx.sampleRate = sampleRate; running_ = true; emit runningChanged(true); @@ -178,6 +242,24 @@ void AudioCapture::stop() { if (!running_) return; #ifdef HAVE_PORTAUDIO + // 输出音频电平诊断信息 + if (impl_->ctx.sampleCount > 0) { + double rms = std::sqrt(impl_->ctx.sumSquares / impl_->ctx.sampleCount); + LOG_INFO(kTag, QString("=== 音频电平诊断 ===")); + LOG_INFO(kTag, QString(" 总样本数: %1").arg(impl_->ctx.sampleCount)); + LOG_INFO(kTag, QString(" RMS 电平: %1").arg(rms, 0, 'f', 6)); + LOG_INFO(kTag, QString(" 峰值: %1").arg(impl_->ctx.peakLevel, 0, 'f', 4)); + if (rms < 0.001) { + LOG_WARNING(kTag, "⚠️ 音频信号过弱,可能是静音或设备未正确采集"); + } else if (rms > 0.5) { + LOG_WARNING(kTag, "⚠️ 音频信号过强,可能存在削波"); + } else if (impl_->ctx.peakLevel > 0.9f) { + LOG_INFO(kTag, " 信号幅度正常"); + } else { + LOG_WARNING(kTag, "⚠️ 信号幅度偏低,请检查设备选择"); + } + } + if (impl_->ctx.stream) { Pa_StopStream(impl_->ctx.stream); Pa_CloseStream(impl_->ctx.stream); diff --git a/src/audio/audio_capture.h b/src/audio/audio_capture.h index a9e4c93..61c73e6 100644 --- a/src/audio/audio_capture.h +++ b/src/audio/audio_capture.h @@ -23,6 +23,9 @@ public: /** @brief 获取可用输入设备列表 */ static QStringList getDeviceList(); + /** @brief 获取默认输入设备索引 (-1 表示无默认设备) */ + static int getDefaultDeviceIndex(); + /** @brief 开始采集 */ bool start(int deviceIndex = -1, int sampleRate = 16000, diff --git a/src/core/voice_input_service.cpp b/src/core/voice_input_service.cpp index 6ef0910..5241e81 100644 --- a/src/core/voice_input_service.cpp +++ b/src/core/voice_input_service.cpp @@ -126,8 +126,10 @@ void VoiceInputService::onHotkeyActivated() { longPressTimer_->start(longPressThreshold_); // 开始音频采集(后台预采集) - int deviceIndex = -1; // 默认设备 - impl_->audioCapture->start(deviceIndex, 16000, 20); + int deviceIndex = configManager_->get("audio.input_device").toInt(); + int sampleRate = configManager_->get("stt.sample_rate").toInt(); + int bufferSizeMs = configManager_->get("audio.buffer_size_ms").toInt(); + impl_->audioCapture->start(deviceIndex, sampleRate, bufferSizeMs); emit statusChanged("等待长按确认..."); } diff --git a/src/ui/settings_page.cpp b/src/ui/settings_page.cpp index e41d2df..934738b 100644 --- a/src/ui/settings_page.cpp +++ b/src/ui/settings_page.cpp @@ -1,5 +1,6 @@ #include "settings_page.h" #include "app/config_manager.h" +#include "audio/audio_capture.h" #include "widgets/hotkey_recorder.h" #include "utils/logger.h" @@ -114,6 +115,11 @@ void SettingsPage::setupUI() { auto* audioGroup = new QGroupBox("音频设置", this); auto* audioLayout = new QFormLayout(audioGroup); + // 音频输入设备选择器 + audioDeviceCombo_ = new QComboBox(this); + populateAudioDevices(); + audioLayout->addRow("输入设备:", audioDeviceCombo_); + bufferSizeSpin_ = new QSpinBox(this); bufferSizeSpin_->setRange(10, 100); bufferSizeSpin_->setValue(20); @@ -197,6 +203,10 @@ void SettingsPage::loadFromConfig() { chunkDurationSpin_->setValue(configManager_->get("audio.chunk_duration_ms").toInt()); paddingSpin_->setValue(configManager_->get("audio.padding_ms").toInt()); + // 恢复音频设备选择 + int savedDevice = configManager_->get("audio.input_device").toInt(); + selectAudioDevice(savedDevice); + themeCombo_->setCurrentText(configManager_->get("ui.theme").toString()); fontSizeSpin_->setValue(configManager_->get("ui.font_size").toInt()); showWaveformCheck_->setChecked(configManager_->get("ui.show_waveform").toBool()); @@ -218,6 +228,7 @@ void SettingsPage::saveToConfig() { batch["stt.beam_size"] = beamSizeSpin_->value(); batch["stt.temperature"] = temperatureSpin_->value(); batch["shortcuts.voice_hotkey"] = hotkeyRecorder_->hotkeyText(); + batch["audio.input_device"] = getSelectedAudioDeviceIndex(); batch["audio.buffer_size_ms"] = bufferSizeSpin_->value(); batch["audio.chunk_duration_ms"] = chunkDurationSpin_->value(); batch["audio.padding_ms"] = paddingSpin_->value(); @@ -237,6 +248,43 @@ void SettingsPage::onBrowseModelPath() { } } +void SettingsPage::populateAudioDevices() { + audioDeviceCombo_->clear(); + audioDeviceCombo_->addItem("默认设备", -1); + +#ifdef HAVE_PORTAUDIO + // 直接使用 PortAudio 枚举所有输入设备 + QStringList devices = AudioCapture::getDeviceList(); + // 跳过第一个 "默认设备"(已手动添加) + for (int i = 1; i < devices.size(); i++) { + audioDeviceCombo_->addItem(devices[i], i - 1); // display text, PortAudio index + } +#else + audioDeviceCombo_->addItem("PortAudio 未启用", -1); +#endif +} + +void SettingsPage::selectAudioDevice(int deviceIndex) { + // deviceIndex == -1 表示默认设备,对应 combo 的第一项(index 0) + if (deviceIndex < 0) { + audioDeviceCombo_->setCurrentIndex(0); + } else { + // 在 combo 中查找 data == deviceIndex 的项 + for (int i = 0; i < audioDeviceCombo_->count(); i++) { + if (audioDeviceCombo_->itemData(i).toInt() == deviceIndex) { + audioDeviceCombo_->setCurrentIndex(i); + return; + } + } + // 如果没找到,使用默认设备 + audioDeviceCombo_->setCurrentIndex(0); + } +} + +int SettingsPage::getSelectedAudioDeviceIndex() const { + return audioDeviceCombo_->currentData().toInt(); +} + void SettingsPage::onBrowseTokensPath() { QString path = QFileDialog::getOpenFileName(this, "选择词表文件", "", "词表文件 (tokens.txt);;所有文件 (*.*)"); diff --git a/src/ui/settings_page.h b/src/ui/settings_page.h index aa28d37..f0f9043 100644 --- a/src/ui/settings_page.h +++ b/src/ui/settings_page.h @@ -38,6 +38,9 @@ private: void setupUI(); void loadFromConfig(); void saveToConfig(); + void populateAudioDevices(); + void selectAudioDevice(int deviceIndex); + int getSelectedAudioDeviceIndex() const; ConfigManager* configManager_; @@ -58,6 +61,7 @@ private: QDoubleSpinBox* temperatureSpin_; // 音频设置 + QComboBox* audioDeviceCombo_; QSpinBox* bufferSizeSpin_; QSpinBox* chunkDurationSpin_; QSpinBox* paddingSpin_;