fix: STT测试页面异步推理，防止模型推理阻塞音频采集

SenseVoice 完整推理耗时数秒，原实现中 processAudioChunk 在音频回调中同步调用 infer()，导致 PortAudio 回调线程阻塞，表现为"程序无响应"。修复方案： - onAudioDataReady 仅缓存音频数据，不再同步调用推理 - QTimer 周期性触发，从缓冲区提取音频块 - 推理在 QtConcurrent 后台线程执行 - isInferencing_ 标志防止排队积压，推理期间跳过新音频块 - UI 实时显示缓冲区状态和推理进度 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-05-12 19:41:38 +08:00 · 2026-05-12 19:41:38 +08:00 · a7a5b141a9
commit a7a5b141a9
parent 9a6dfa3b88
2 changed files with 85 additions and 27 deletions
--- a/src/ui/stt_test_page.cpp
+++ b/src/ui/stt_test_page.cpp
@ -18,6 +18,8 @@
 #include <QMessageBox>
 #include <QDateTime>
 #include <QFileInfo>
+#include <QTimer>
+#include <QtConcurrent>

 static const char* const kTag = "STTTestPage";

@ -28,6 +30,7 @@ STTTestPage::STTTestPage(ConfigManager* configManager, QWidget* parent)
    , configManager_(configManager)
    , sttEngine_(new SenseVoiceEngine(this))
    , audioCapture_(new AudioCapture(this))
+    , inferenceTimer_(new QTimer(this))
 {
    setupUI();

@ -40,6 +43,10 @@ STTTestPage::STTTestPage(ConfigManager* configManager, QWidget* parent)
            this, &STTTestPage::onModelLoadError);
    connect(sttEngine_, &SenseVoiceEngine::modelUnloaded,
            this, &STTTestPage::onModelUnloaded);
+
+    // 推理定时器：周期性触发后台推理
+    connect(inferenceTimer_, &QTimer::timeout,
+            this, &STTTestPage::onInferenceTimer);
 }

 STTTestPage::~STTTestPage() = default;
@ -114,8 +121,11 @@ void STTTestPage::updateUIState() {
 void STTTestPage::onToggleRecording() {
    if (isRecording_) {
        audioCapture_->stop();
+        inferenceTimer_->stop();
        sttEngine_->unloadModel();
        isRecording_ = false;
+        isInferencing_ = false;
+        audioBuffer_.clear();
    } else {
        // 读取配置
        QString modelPath = configManager_->get("stt.model_path").toString();
@ -172,49 +182,92 @@ void STTTestPage::onModelLoadError(const QString& modelPath, const QString& erro

 void STTTestPage::onModelUnloaded() {
    isLoadingModel_ = false;
+    isInferencing_ = false;
    statusLabel_->setText("模型已卸载");
 }

 void STTTestPage::startAudioCapture() {
    int deviceIdx = deviceCombo_->currentIndex() - 1;
-    int sampleRate = configManager_->get("stt.sample_rate").toInt();
+    audioSampleRate_ = configManager_->get("stt.sample_rate").toInt();

-    if (!audioCapture_->start(deviceIdx, sampleRate)) {
+    if (!audioCapture_->start(deviceIdx, audioSampleRate_)) {
        QMessageBox::critical(this, "错误", "无法启动音频采集");
        return;
    }
+
    isRecording_ = true;
+    audioBuffer_.clear();
+    isInferencing_ = false;
+
+    // 启动周期性推理定时器
+    startInferenceTimer();
+
    statusLabel_->setText(QString("录音中 | 模型: %1").arg(
        QFileInfo(currentModelPath_).fileName()));
    updateUIState();
 }

-void STTTestPage::onAudioDataReady(const std::vector<float>& samples, int sampleRate) {
-    chunkBuffer_.insert(chunkBuffer_.end(), samples.begin(), samples.end());
-
-    int chunkSize = configManager_->get("stt.sample_rate").toInt()
-                  * chunkSizeSpin_->value() / 1000;
-
-    if (static_cast<int>(chunkBuffer_.size()) >= chunkSize) {
-        std::vector<float> chunk(chunkBuffer_.begin(), chunkBuffer_.begin() + chunkSize);
-        chunkBuffer_.erase(chunkBuffer_.begin(), chunkBuffer_.begin() + chunkSize);
-
-        waveform_->setSamples(samples);
-        processAudioChunk(chunk, sampleRate);
-    } else {
-        waveform_->setSamples(samples);
-    }
+void STTTestPage::startInferenceTimer() {
+    int interval = chunkSizeSpin_->value(); // 与推理间隔同步
+    inferenceTimer_->start(interval);
 }

-void STTTestPage::processAudioChunk(const std::vector<float>& samples, int sampleRate) {
-    // 模型已在 onToggleRecording 中异步加载，此处防御性检查
-    if (!sttEngine_->isLoaded()) {
+void STTTestPage::onAudioDataReady(const std::vector<float>& samples, int /* sampleRate */) {
+    // 仅缓存音频数据，不直接调用推理
+    // 避免推理阻塞音频采集线程
+    audioBuffer_.insert(audioBuffer_.end(), samples.begin(), samples.end());
+
+    // 更新波形显示（使用最新数据片段）
+    waveform_->setSamples(samples);
+}
+
+void STTTestPage::onInferenceTimer() {
+    if (!sttEngine_->isLoaded() || isInferencing_) {
        return;
    }

-    auto result = sttEngine_->infer(samples, sampleRate,
-        configManager_->get("stt.language").toString());
-    emit onRecognitionResult(result.text, result.confidence, result.latency_ms, result.isFinal);
+    int chunkSize = audioSampleRate_ * chunkSizeSpin_->value() / 1000;
+
+    if (static_cast<int>(audioBuffer_.size()) < chunkSize) {
+        return; // 缓冲区数据不足，等待下一次
+    }
+
+    // 提取一个推理块的音频
+    std::vector<float> chunk(audioBuffer_.begin(), audioBuffer_.begin() + chunkSize);
+    audioBuffer_.erase(audioBuffer_.begin(), audioBuffer_.begin() + chunkSize);
+
+    // 在后台线程执行推理
+    isInferencing_ = true;
+    statusLabel_->setText("推理中...");
+
+    int sampleRate = audioSampleRate_;
+    QString language = configManager_->get("stt.language").toString();
+
+    (void)QtConcurrent::run([this, chunk, sampleRate, language]() {
+        auto result = sttEngine_->infer(chunk, sampleRate, language);
+
+        // 回到主线程更新 UI
+        QMetaObject::invokeMethod(this, [this, result]() {
+            isInferencing_ = false;
+
+            if (result.text.isEmpty() && !result.text.isNull()) {
+                // 静音段
+                latencyLabel_->setText(QString("延迟: %1 ms").arg(result.latency_ms, 0, 'f', 1));
+            } else {
+                emit onRecognitionResult(result.text, result.confidence,
+                                          result.latency_ms, result.isFinal);
+            }
+
+            // 更新状态
+            if (isRecording_) {
+                int bufMs = (audioSampleRate_ > 0)
+                    ? static_cast<int>(audioBuffer_.size() * 1000 / audioSampleRate_)
+                    : 0;
+                statusLabel_->setText(
+                    QString("录音中 | 缓冲区: %1 ms").arg(bufMs));
+            }
+        }, Qt::QueuedConnection);
+    });
 }

 void STTTestPage::onRecognitionResult(const QString& text, float confidence,
--- a/src/ui/stt_test_page.h
+++ b/src/ui/stt_test_page.h
@ -9,6 +9,7 @@ class QPushButton;
 class QComboBox;
 class QTextEdit;
 class QSpinBox;
+class QTimer;

 namespace impress {

@ -19,9 +20,9 @@ class AudioCapture;
 /**
 * @brief STT 测试页面
 *
- * 实时麦克风采集 + 流式识别。
+ * 实时麦克风采集 + 周期性后台推理。
+ * 音频采集与推理分离，防止推理阻塞音频流。
 * 使用 SenseVoice 模型进行推理。
- * 模型异步加载，不阻塞 UI。
 */
 class STTTestPage : public QWidget {
    Q_OBJECT
@ -36,16 +37,18 @@ private slots:
    void onModelLoaded(const QString& modelPath);
    void onModelLoadError(const QString& modelPath, const QString& error);
    void onModelUnloaded();
+    void onInferenceTimer();

 private:
    void setupUI();
    void updateUIState();
    void startAudioCapture();
-    void processAudioChunk(const std::vector<float>& samples, int sampleRate);
+    void startInferenceTimer();

    ConfigManager* configManager_;
    SenseVoiceEngine* sttEngine_;
    AudioCapture* audioCapture_;
+    QTimer* inferenceTimer_;

    // UI 控件
    QComboBox* deviceCombo_;
@ -58,7 +61,9 @@ private:

    bool isRecording_ = false;
    bool isLoadingModel_ = false;
-    std::vector<float> chunkBuffer_;
+    bool isInferencing_ = false;
+    int audioSampleRate_ = 16000;
+    std::vector<float> audioBuffer_;
    QString currentModelPath_;
 };