feat: 添加音频输入设备选择器与音频电平诊断

- audio_capture 启动时输出详细设备信息(名称、Host API、采样率)
- 录音停止时输出 RMS 电平和峰值,帮助诊断音频质量问题
- 设置页面新增音频输入设备下拉选择,支持从 PortAudio 设备列表中手动选择
- 语音输入服务使用配置的音频设备和采样率参数
- 检测 monitor/output 类型设备时发出警告,避免选错回环设备

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Alvin Young 2026-05-13 15:16:56 +08:00
parent a2b216092f
commit dc4ebab47c
5 changed files with 146 additions and 7 deletions

View File

@ -3,6 +3,7 @@
#ifdef HAVE_PORTAUDIO
#include <portaudio.h>
#include <cmath>
#endif
static const char* const kTag = "AudioCapture";
@ -41,6 +42,10 @@ struct CallbackContext {
float buffer[kMaxBufferSize];
#endif
int sampleRate = 16000;
// 音频电平诊断
float peakLevel = 0.0f;
double sumSquares = 0.0;
int sampleCount = 0;
};
struct AudioCapture::Impl {
@ -62,11 +67,22 @@ static int paCallback(const void* input, void* /*output*/,
unsigned long count = frameCount;
if (count > kMaxBufferSize) count = kMaxBufferSize;
// 拷贝到预分配缓冲区
// 拷贝到预分配缓冲区 + 计算音频电平
float peak = 0.0f;
double ss = 0.0;
for (unsigned long i = 0; i < count; i++) {
ctx->buffer[i] = samples[i];
float s = samples[i];
ctx->buffer[i] = s;
float absS = std::fabs(s);
if (absS > peak) peak = absS;
ss += s * s;
}
// 更新诊断数据
if (peak > ctx->peakLevel) ctx->peakLevel = peak;
ctx->sumSquares += ss;
ctx->sampleCount += count;
// 发射信号Qt 使用 QueuedConnection线程安全
std::vector<float> data(ctx->buffer, ctx->buffer + count);
emit ctx->owner->audioDataReady(data, ctx->sampleRate);
@ -101,14 +117,26 @@ QStringList AudioCapture::getDeviceList() {
for (int i = 0; i < count; ++i) {
const PaDeviceInfo* info = Pa_GetDeviceInfo(i);
if (info && info->maxInputChannels > 0) {
devices << QString("%1 (CH:%2, SR:%3)").arg(
info->name).arg(info->maxInputChannels).arg(info->defaultSampleRate);
const PaHostApiInfo* hostApi = Pa_GetHostApiInfo(info->hostApi);
QString hostApiName = hostApi ? hostApi->name : "未知";
devices << QString("[%1] %2 (CH:%3, SR:%4, %5)")
.arg(i).arg(info->name).arg(info->maxInputChannels)
.arg(info->defaultSampleRate).arg(hostApiName);
}
}
#endif
return devices;
}
int AudioCapture::getDefaultDeviceIndex() {
#ifdef HAVE_PORTAUDIO
if (!ensurePaInitialized()) return -1;
return Pa_GetDefaultInputDevice();
#else
return -1;
#endif
}
bool AudioCapture::start(int deviceIndex, int sampleRate, int bufferSizeMs) {
if (running_) {
LOG_WARNING(kTag, "已在运行中");
@ -121,9 +149,21 @@ bool AudioCapture::start(int deviceIndex, int sampleRate, int bufferSizeMs) {
return false;
}
// 枚举所有 Host API 用于诊断
LOG_DEBUG(kTag, QString("Host API 数量: %1").arg(Pa_GetHostApiCount()));
for (int i = 0; i < Pa_GetHostApiCount(); i++) {
const PaHostApiInfo* api = Pa_GetHostApiInfo(i);
if (api) {
LOG_DEBUG(kTag, QString(" Host API #%1: %2 (设备数: %3)")
.arg(i).arg(api->name).arg(api->deviceCount));
}
}
// 选择设备
int devIdx = deviceIndex < 0 ? Pa_GetDefaultInputDevice() : deviceIndex;
if (devIdx < 0 || devIdx >= Pa_GetDeviceCount()) {
LOG_ERROR(kTag, QString("无效的音频设备索引: %1 (默认设备: %2)").arg(deviceIndex).arg(Pa_GetDefaultInputDevice()));
LOG_ERROR(kTag, QString("无效的音频设备索引: %1 (默认设备: %2)")
.arg(deviceIndex).arg(Pa_GetDefaultInputDevice()));
return false;
}
@ -133,6 +173,25 @@ bool AudioCapture::start(int deviceIndex, int sampleRate, int bufferSizeMs) {
return false;
}
const PaHostApiInfo* hostApi = Pa_GetHostApiInfo(devInfo->hostApi);
LOG_INFO(kTag, QString("=== 音频设备诊断 ==="));
LOG_INFO(kTag, QString(" 设备 #%1: %2").arg(devIdx).arg(devInfo->name));
LOG_INFO(kTag, QString(" Host API: %1").arg(hostApi ? hostApi->name : "未知"));
LOG_INFO(kTag, QString(" 最大输入通道: %1").arg(devInfo->maxInputChannels));
LOG_INFO(kTag, QString(" 设备默认采样率: %1 Hz").arg(devInfo->defaultSampleRate, 0, 'f', 0));
LOG_INFO(kTag, QString(" 请求采样率: %1 Hz").arg(sampleRate));
LOG_INFO(kTag, QString(" 采样格式: paFloat32 | paNonInterleaved"));
LOG_INFO(kTag, QString(" 请求通道数: 1 (mono)"));
LOG_INFO(kTag, QString(" 缓冲区: %1ms (%2 帧)").arg(bufferSizeMs)
.arg(sampleRate * bufferSizeMs / 1000));
// 检查是否可能选错设备(名称包含 monitor 的通常是回环设备)
QString devName = QString(devInfo->name).toLower();
if (devName.contains("monitor") || devName.contains("output")) {
LOG_WARNING(kTag, "⚠️ 当前设备名称包含 'monitor' 或 'output'"
"这可能是扬声器回环设备而非麦克风!如果录制的是噪音,请在设置中选择正确的麦克风设备。");
}
PaStreamParameters inputParams{};
inputParams.device = devIdx;
inputParams.channelCount = 1;
@ -161,6 +220,11 @@ bool AudioCapture::start(int deviceIndex, int sampleRate, int bufferSizeMs) {
return false;
}
// 重置诊断计数器
impl_->ctx.peakLevel = 0.0f;
impl_->ctx.sumSquares = 0.0;
impl_->ctx.sampleCount = 0;
impl_->ctx.sampleRate = sampleRate;
running_ = true;
emit runningChanged(true);
@ -178,6 +242,24 @@ void AudioCapture::stop() {
if (!running_) return;
#ifdef HAVE_PORTAUDIO
// 输出音频电平诊断信息
if (impl_->ctx.sampleCount > 0) {
double rms = std::sqrt(impl_->ctx.sumSquares / impl_->ctx.sampleCount);
LOG_INFO(kTag, QString("=== 音频电平诊断 ==="));
LOG_INFO(kTag, QString(" 总样本数: %1").arg(impl_->ctx.sampleCount));
LOG_INFO(kTag, QString(" RMS 电平: %1").arg(rms, 0, 'f', 6));
LOG_INFO(kTag, QString(" 峰值: %1").arg(impl_->ctx.peakLevel, 0, 'f', 4));
if (rms < 0.001) {
LOG_WARNING(kTag, "⚠️ 音频信号过弱,可能是静音或设备未正确采集");
} else if (rms > 0.5) {
LOG_WARNING(kTag, "⚠️ 音频信号过强,可能存在削波");
} else if (impl_->ctx.peakLevel > 0.9f) {
LOG_INFO(kTag, " 信号幅度正常");
} else {
LOG_WARNING(kTag, "⚠️ 信号幅度偏低,请检查设备选择");
}
}
if (impl_->ctx.stream) {
Pa_StopStream(impl_->ctx.stream);
Pa_CloseStream(impl_->ctx.stream);

View File

@ -23,6 +23,9 @@ public:
/** @brief 获取可用输入设备列表 */
static QStringList getDeviceList();
/** @brief 获取默认输入设备索引 (-1 表示无默认设备) */
static int getDefaultDeviceIndex();
/** @brief 开始采集 */
bool start(int deviceIndex = -1,
int sampleRate = 16000,

View File

@ -126,8 +126,10 @@ void VoiceInputService::onHotkeyActivated() {
longPressTimer_->start(longPressThreshold_);
// 开始音频采集(后台预采集)
int deviceIndex = -1; // 默认设备
impl_->audioCapture->start(deviceIndex, 16000, 20);
int deviceIndex = configManager_->get("audio.input_device").toInt();
int sampleRate = configManager_->get("stt.sample_rate").toInt();
int bufferSizeMs = configManager_->get("audio.buffer_size_ms").toInt();
impl_->audioCapture->start(deviceIndex, sampleRate, bufferSizeMs);
emit statusChanged("等待长按确认...");
}

View File

@ -1,5 +1,6 @@
#include "settings_page.h"
#include "app/config_manager.h"
#include "audio/audio_capture.h"
#include "widgets/hotkey_recorder.h"
#include "utils/logger.h"
@ -114,6 +115,11 @@ void SettingsPage::setupUI() {
auto* audioGroup = new QGroupBox("音频设置", this);
auto* audioLayout = new QFormLayout(audioGroup);
// 音频输入设备选择器
audioDeviceCombo_ = new QComboBox(this);
populateAudioDevices();
audioLayout->addRow("输入设备:", audioDeviceCombo_);
bufferSizeSpin_ = new QSpinBox(this);
bufferSizeSpin_->setRange(10, 100);
bufferSizeSpin_->setValue(20);
@ -197,6 +203,10 @@ void SettingsPage::loadFromConfig() {
chunkDurationSpin_->setValue(configManager_->get("audio.chunk_duration_ms").toInt());
paddingSpin_->setValue(configManager_->get("audio.padding_ms").toInt());
// 恢复音频设备选择
int savedDevice = configManager_->get("audio.input_device").toInt();
selectAudioDevice(savedDevice);
themeCombo_->setCurrentText(configManager_->get("ui.theme").toString());
fontSizeSpin_->setValue(configManager_->get("ui.font_size").toInt());
showWaveformCheck_->setChecked(configManager_->get("ui.show_waveform").toBool());
@ -218,6 +228,7 @@ void SettingsPage::saveToConfig() {
batch["stt.beam_size"] = beamSizeSpin_->value();
batch["stt.temperature"] = temperatureSpin_->value();
batch["shortcuts.voice_hotkey"] = hotkeyRecorder_->hotkeyText();
batch["audio.input_device"] = getSelectedAudioDeviceIndex();
batch["audio.buffer_size_ms"] = bufferSizeSpin_->value();
batch["audio.chunk_duration_ms"] = chunkDurationSpin_->value();
batch["audio.padding_ms"] = paddingSpin_->value();
@ -237,6 +248,43 @@ void SettingsPage::onBrowseModelPath() {
}
}
void SettingsPage::populateAudioDevices() {
audioDeviceCombo_->clear();
audioDeviceCombo_->addItem("默认设备", -1);
#ifdef HAVE_PORTAUDIO
// 直接使用 PortAudio 枚举所有输入设备
QStringList devices = AudioCapture::getDeviceList();
// 跳过第一个 "默认设备"(已手动添加)
for (int i = 1; i < devices.size(); i++) {
audioDeviceCombo_->addItem(devices[i], i - 1); // display text, PortAudio index
}
#else
audioDeviceCombo_->addItem("PortAudio 未启用", -1);
#endif
}
void SettingsPage::selectAudioDevice(int deviceIndex) {
// deviceIndex == -1 表示默认设备,对应 combo 的第一项index 0
if (deviceIndex < 0) {
audioDeviceCombo_->setCurrentIndex(0);
} else {
// 在 combo 中查找 data == deviceIndex 的项
for (int i = 0; i < audioDeviceCombo_->count(); i++) {
if (audioDeviceCombo_->itemData(i).toInt() == deviceIndex) {
audioDeviceCombo_->setCurrentIndex(i);
return;
}
}
// 如果没找到,使用默认设备
audioDeviceCombo_->setCurrentIndex(0);
}
}
int SettingsPage::getSelectedAudioDeviceIndex() const {
return audioDeviceCombo_->currentData().toInt();
}
void SettingsPage::onBrowseTokensPath() {
QString path = QFileDialog::getOpenFileName(this, "选择词表文件", "",
"词表文件 (tokens.txt);;所有文件 (*.*)");

View File

@ -38,6 +38,9 @@ private:
void setupUI();
void loadFromConfig();
void saveToConfig();
void populateAudioDevices();
void selectAudioDevice(int deviceIndex);
int getSelectedAudioDeviceIndex() const;
ConfigManager* configManager_;
@ -58,6 +61,7 @@ private:
QDoubleSpinBox* temperatureSpin_;
// 音频设置
QComboBox* audioDeviceCombo_;
QSpinBox* bufferSizeSpin_;
QSpinBox* chunkDurationSpin_;
QSpinBox* paddingSpin_;