fix: STT测试页面异步推理,防止模型推理阻塞音频采集

SenseVoice 完整推理耗时数秒,原实现中 processAudioChunk 在
音频回调中同步调用 infer(),导致 PortAudio 回调线程阻塞,
表现为"程序无响应"。

修复方案:
- onAudioDataReady 仅缓存音频数据,不再同步调用推理
- QTimer 周期性触发,从缓冲区提取音频块
- 推理在 QtConcurrent 后台线程执行
- isInferencing_ 标志防止排队积压,推理期间跳过新音频块
- UI 实时显示缓冲区状态和推理进度

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Alvin Young 2026-05-12 19:41:38 +08:00
parent 9a6dfa3b88
commit a7a5b141a9
2 changed files with 85 additions and 27 deletions

View File

@ -18,6 +18,8 @@
#include <QMessageBox> #include <QMessageBox>
#include <QDateTime> #include <QDateTime>
#include <QFileInfo> #include <QFileInfo>
#include <QTimer>
#include <QtConcurrent>
static const char* const kTag = "STTTestPage"; static const char* const kTag = "STTTestPage";
@ -28,6 +30,7 @@ STTTestPage::STTTestPage(ConfigManager* configManager, QWidget* parent)
, configManager_(configManager) , configManager_(configManager)
, sttEngine_(new SenseVoiceEngine(this)) , sttEngine_(new SenseVoiceEngine(this))
, audioCapture_(new AudioCapture(this)) , audioCapture_(new AudioCapture(this))
, inferenceTimer_(new QTimer(this))
{ {
setupUI(); setupUI();
@ -40,6 +43,10 @@ STTTestPage::STTTestPage(ConfigManager* configManager, QWidget* parent)
this, &STTTestPage::onModelLoadError); this, &STTTestPage::onModelLoadError);
connect(sttEngine_, &SenseVoiceEngine::modelUnloaded, connect(sttEngine_, &SenseVoiceEngine::modelUnloaded,
this, &STTTestPage::onModelUnloaded); this, &STTTestPage::onModelUnloaded);
// 推理定时器:周期性触发后台推理
connect(inferenceTimer_, &QTimer::timeout,
this, &STTTestPage::onInferenceTimer);
} }
STTTestPage::~STTTestPage() = default; STTTestPage::~STTTestPage() = default;
@ -114,8 +121,11 @@ void STTTestPage::updateUIState() {
void STTTestPage::onToggleRecording() { void STTTestPage::onToggleRecording() {
if (isRecording_) { if (isRecording_) {
audioCapture_->stop(); audioCapture_->stop();
inferenceTimer_->stop();
sttEngine_->unloadModel(); sttEngine_->unloadModel();
isRecording_ = false; isRecording_ = false;
isInferencing_ = false;
audioBuffer_.clear();
} else { } else {
// 读取配置 // 读取配置
QString modelPath = configManager_->get("stt.model_path").toString(); QString modelPath = configManager_->get("stt.model_path").toString();
@ -172,49 +182,92 @@ void STTTestPage::onModelLoadError(const QString& modelPath, const QString& erro
void STTTestPage::onModelUnloaded() { void STTTestPage::onModelUnloaded() {
isLoadingModel_ = false; isLoadingModel_ = false;
isInferencing_ = false;
statusLabel_->setText("模型已卸载"); statusLabel_->setText("模型已卸载");
} }
void STTTestPage::startAudioCapture() { void STTTestPage::startAudioCapture() {
int deviceIdx = deviceCombo_->currentIndex() - 1; int deviceIdx = deviceCombo_->currentIndex() - 1;
int sampleRate = configManager_->get("stt.sample_rate").toInt(); audioSampleRate_ = configManager_->get("stt.sample_rate").toInt();
if (!audioCapture_->start(deviceIdx, sampleRate)) { if (!audioCapture_->start(deviceIdx, audioSampleRate_)) {
QMessageBox::critical(this, "错误", "无法启动音频采集"); QMessageBox::critical(this, "错误", "无法启动音频采集");
return; return;
} }
isRecording_ = true; isRecording_ = true;
audioBuffer_.clear();
isInferencing_ = false;
// 启动周期性推理定时器
startInferenceTimer();
statusLabel_->setText(QString("录音中 | 模型: %1").arg( statusLabel_->setText(QString("录音中 | 模型: %1").arg(
QFileInfo(currentModelPath_).fileName())); QFileInfo(currentModelPath_).fileName()));
updateUIState(); updateUIState();
} }
void STTTestPage::onAudioDataReady(const std::vector<float>& samples, int sampleRate) { void STTTestPage::startInferenceTimer() {
chunkBuffer_.insert(chunkBuffer_.end(), samples.begin(), samples.end()); int interval = chunkSizeSpin_->value(); // 与推理间隔同步
inferenceTimer_->start(interval);
int chunkSize = configManager_->get("stt.sample_rate").toInt()
* chunkSizeSpin_->value() / 1000;
if (static_cast<int>(chunkBuffer_.size()) >= chunkSize) {
std::vector<float> chunk(chunkBuffer_.begin(), chunkBuffer_.begin() + chunkSize);
chunkBuffer_.erase(chunkBuffer_.begin(), chunkBuffer_.begin() + chunkSize);
waveform_->setSamples(samples);
processAudioChunk(chunk, sampleRate);
} else {
waveform_->setSamples(samples);
}
} }
void STTTestPage::processAudioChunk(const std::vector<float>& samples, int sampleRate) { void STTTestPage::onAudioDataReady(const std::vector<float>& samples, int /* sampleRate */) {
// 模型已在 onToggleRecording 中异步加载,此处防御性检查 // 仅缓存音频数据,不直接调用推理
if (!sttEngine_->isLoaded()) { // 避免推理阻塞音频采集线程
audioBuffer_.insert(audioBuffer_.end(), samples.begin(), samples.end());
// 更新波形显示(使用最新数据片段)
waveform_->setSamples(samples);
}
void STTTestPage::onInferenceTimer() {
if (!sttEngine_->isLoaded() || isInferencing_) {
return; return;
} }
auto result = sttEngine_->infer(samples, sampleRate, int chunkSize = audioSampleRate_ * chunkSizeSpin_->value() / 1000;
configManager_->get("stt.language").toString());
emit onRecognitionResult(result.text, result.confidence, result.latency_ms, result.isFinal); if (static_cast<int>(audioBuffer_.size()) < chunkSize) {
return; // 缓冲区数据不足,等待下一次
}
// 提取一个推理块的音频
std::vector<float> chunk(audioBuffer_.begin(), audioBuffer_.begin() + chunkSize);
audioBuffer_.erase(audioBuffer_.begin(), audioBuffer_.begin() + chunkSize);
// 在后台线程执行推理
isInferencing_ = true;
statusLabel_->setText("推理中...");
int sampleRate = audioSampleRate_;
QString language = configManager_->get("stt.language").toString();
(void)QtConcurrent::run([this, chunk, sampleRate, language]() {
auto result = sttEngine_->infer(chunk, sampleRate, language);
// 回到主线程更新 UI
QMetaObject::invokeMethod(this, [this, result]() {
isInferencing_ = false;
if (result.text.isEmpty() && !result.text.isNull()) {
// 静音段
latencyLabel_->setText(QString("延迟: %1 ms").arg(result.latency_ms, 0, 'f', 1));
} else {
emit onRecognitionResult(result.text, result.confidence,
result.latency_ms, result.isFinal);
}
// 更新状态
if (isRecording_) {
int bufMs = (audioSampleRate_ > 0)
? static_cast<int>(audioBuffer_.size() * 1000 / audioSampleRate_)
: 0;
statusLabel_->setText(
QString("录音中 | 缓冲区: %1 ms").arg(bufMs));
}
}, Qt::QueuedConnection);
});
} }
void STTTestPage::onRecognitionResult(const QString& text, float confidence, void STTTestPage::onRecognitionResult(const QString& text, float confidence,

View File

@ -9,6 +9,7 @@ class QPushButton;
class QComboBox; class QComboBox;
class QTextEdit; class QTextEdit;
class QSpinBox; class QSpinBox;
class QTimer;
namespace impress { namespace impress {
@ -19,9 +20,9 @@ class AudioCapture;
/** /**
* @brief STT * @brief STT
* *
* + * +
*
* 使 SenseVoice * 使 SenseVoice
* UI
*/ */
class STTTestPage : public QWidget { class STTTestPage : public QWidget {
Q_OBJECT Q_OBJECT
@ -36,16 +37,18 @@ private slots:
void onModelLoaded(const QString& modelPath); void onModelLoaded(const QString& modelPath);
void onModelLoadError(const QString& modelPath, const QString& error); void onModelLoadError(const QString& modelPath, const QString& error);
void onModelUnloaded(); void onModelUnloaded();
void onInferenceTimer();
private: private:
void setupUI(); void setupUI();
void updateUIState(); void updateUIState();
void startAudioCapture(); void startAudioCapture();
void processAudioChunk(const std::vector<float>& samples, int sampleRate); void startInferenceTimer();
ConfigManager* configManager_; ConfigManager* configManager_;
SenseVoiceEngine* sttEngine_; SenseVoiceEngine* sttEngine_;
AudioCapture* audioCapture_; AudioCapture* audioCapture_;
QTimer* inferenceTimer_;
// UI 控件 // UI 控件
QComboBox* deviceCombo_; QComboBox* deviceCombo_;
@ -58,7 +61,9 @@ private:
bool isRecording_ = false; bool isRecording_ = false;
bool isLoadingModel_ = false; bool isLoadingModel_ = false;
std::vector<float> chunkBuffer_; bool isInferencing_ = false;
int audioSampleRate_ = 16000;
std::vector<float> audioBuffer_;
QString currentModelPath_; QString currentModelPath_;
}; };