fix: STT测试页面异步推理,防止模型推理阻塞音频采集
SenseVoice 完整推理耗时数秒,原实现中 processAudioChunk 在 音频回调中同步调用 infer(),导致 PortAudio 回调线程阻塞, 表现为"程序无响应"。 修复方案: - onAudioDataReady 仅缓存音频数据,不再同步调用推理 - QTimer 周期性触发,从缓冲区提取音频块 - 推理在 QtConcurrent 后台线程执行 - isInferencing_ 标志防止排队积压,推理期间跳过新音频块 - UI 实时显示缓冲区状态和推理进度 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
9a6dfa3b88
commit
a7a5b141a9
@ -18,6 +18,8 @@
|
|||||||
#include <QMessageBox>
|
#include <QMessageBox>
|
||||||
#include <QDateTime>
|
#include <QDateTime>
|
||||||
#include <QFileInfo>
|
#include <QFileInfo>
|
||||||
|
#include <QTimer>
|
||||||
|
#include <QtConcurrent>
|
||||||
|
|
||||||
static const char* const kTag = "STTTestPage";
|
static const char* const kTag = "STTTestPage";
|
||||||
|
|
||||||
@ -28,6 +30,7 @@ STTTestPage::STTTestPage(ConfigManager* configManager, QWidget* parent)
|
|||||||
, configManager_(configManager)
|
, configManager_(configManager)
|
||||||
, sttEngine_(new SenseVoiceEngine(this))
|
, sttEngine_(new SenseVoiceEngine(this))
|
||||||
, audioCapture_(new AudioCapture(this))
|
, audioCapture_(new AudioCapture(this))
|
||||||
|
, inferenceTimer_(new QTimer(this))
|
||||||
{
|
{
|
||||||
setupUI();
|
setupUI();
|
||||||
|
|
||||||
@ -40,6 +43,10 @@ STTTestPage::STTTestPage(ConfigManager* configManager, QWidget* parent)
|
|||||||
this, &STTTestPage::onModelLoadError);
|
this, &STTTestPage::onModelLoadError);
|
||||||
connect(sttEngine_, &SenseVoiceEngine::modelUnloaded,
|
connect(sttEngine_, &SenseVoiceEngine::modelUnloaded,
|
||||||
this, &STTTestPage::onModelUnloaded);
|
this, &STTTestPage::onModelUnloaded);
|
||||||
|
|
||||||
|
// 推理定时器:周期性触发后台推理
|
||||||
|
connect(inferenceTimer_, &QTimer::timeout,
|
||||||
|
this, &STTTestPage::onInferenceTimer);
|
||||||
}
|
}
|
||||||
|
|
||||||
STTTestPage::~STTTestPage() = default;
|
STTTestPage::~STTTestPage() = default;
|
||||||
@ -114,8 +121,11 @@ void STTTestPage::updateUIState() {
|
|||||||
void STTTestPage::onToggleRecording() {
|
void STTTestPage::onToggleRecording() {
|
||||||
if (isRecording_) {
|
if (isRecording_) {
|
||||||
audioCapture_->stop();
|
audioCapture_->stop();
|
||||||
|
inferenceTimer_->stop();
|
||||||
sttEngine_->unloadModel();
|
sttEngine_->unloadModel();
|
||||||
isRecording_ = false;
|
isRecording_ = false;
|
||||||
|
isInferencing_ = false;
|
||||||
|
audioBuffer_.clear();
|
||||||
} else {
|
} else {
|
||||||
// 读取配置
|
// 读取配置
|
||||||
QString modelPath = configManager_->get("stt.model_path").toString();
|
QString modelPath = configManager_->get("stt.model_path").toString();
|
||||||
@ -172,49 +182,92 @@ void STTTestPage::onModelLoadError(const QString& modelPath, const QString& erro
|
|||||||
|
|
||||||
void STTTestPage::onModelUnloaded() {
|
void STTTestPage::onModelUnloaded() {
|
||||||
isLoadingModel_ = false;
|
isLoadingModel_ = false;
|
||||||
|
isInferencing_ = false;
|
||||||
statusLabel_->setText("模型已卸载");
|
statusLabel_->setText("模型已卸载");
|
||||||
}
|
}
|
||||||
|
|
||||||
void STTTestPage::startAudioCapture() {
|
void STTTestPage::startAudioCapture() {
|
||||||
int deviceIdx = deviceCombo_->currentIndex() - 1;
|
int deviceIdx = deviceCombo_->currentIndex() - 1;
|
||||||
int sampleRate = configManager_->get("stt.sample_rate").toInt();
|
audioSampleRate_ = configManager_->get("stt.sample_rate").toInt();
|
||||||
|
|
||||||
if (!audioCapture_->start(deviceIdx, sampleRate)) {
|
if (!audioCapture_->start(deviceIdx, audioSampleRate_)) {
|
||||||
QMessageBox::critical(this, "错误", "无法启动音频采集");
|
QMessageBox::critical(this, "错误", "无法启动音频采集");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
isRecording_ = true;
|
isRecording_ = true;
|
||||||
|
audioBuffer_.clear();
|
||||||
|
isInferencing_ = false;
|
||||||
|
|
||||||
|
// 启动周期性推理定时器
|
||||||
|
startInferenceTimer();
|
||||||
|
|
||||||
statusLabel_->setText(QString("录音中 | 模型: %1").arg(
|
statusLabel_->setText(QString("录音中 | 模型: %1").arg(
|
||||||
QFileInfo(currentModelPath_).fileName()));
|
QFileInfo(currentModelPath_).fileName()));
|
||||||
updateUIState();
|
updateUIState();
|
||||||
}
|
}
|
||||||
|
|
||||||
void STTTestPage::onAudioDataReady(const std::vector<float>& samples, int sampleRate) {
|
void STTTestPage::startInferenceTimer() {
|
||||||
chunkBuffer_.insert(chunkBuffer_.end(), samples.begin(), samples.end());
|
int interval = chunkSizeSpin_->value(); // 与推理间隔同步
|
||||||
|
inferenceTimer_->start(interval);
|
||||||
int chunkSize = configManager_->get("stt.sample_rate").toInt()
|
|
||||||
* chunkSizeSpin_->value() / 1000;
|
|
||||||
|
|
||||||
if (static_cast<int>(chunkBuffer_.size()) >= chunkSize) {
|
|
||||||
std::vector<float> chunk(chunkBuffer_.begin(), chunkBuffer_.begin() + chunkSize);
|
|
||||||
chunkBuffer_.erase(chunkBuffer_.begin(), chunkBuffer_.begin() + chunkSize);
|
|
||||||
|
|
||||||
waveform_->setSamples(samples);
|
|
||||||
processAudioChunk(chunk, sampleRate);
|
|
||||||
} else {
|
|
||||||
waveform_->setSamples(samples);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void STTTestPage::processAudioChunk(const std::vector<float>& samples, int sampleRate) {
|
void STTTestPage::onAudioDataReady(const std::vector<float>& samples, int /* sampleRate */) {
|
||||||
// 模型已在 onToggleRecording 中异步加载,此处防御性检查
|
// 仅缓存音频数据,不直接调用推理
|
||||||
if (!sttEngine_->isLoaded()) {
|
// 避免推理阻塞音频采集线程
|
||||||
|
audioBuffer_.insert(audioBuffer_.end(), samples.begin(), samples.end());
|
||||||
|
|
||||||
|
// 更新波形显示(使用最新数据片段)
|
||||||
|
waveform_->setSamples(samples);
|
||||||
|
}
|
||||||
|
|
||||||
|
void STTTestPage::onInferenceTimer() {
|
||||||
|
if (!sttEngine_->isLoaded() || isInferencing_) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto result = sttEngine_->infer(samples, sampleRate,
|
int chunkSize = audioSampleRate_ * chunkSizeSpin_->value() / 1000;
|
||||||
configManager_->get("stt.language").toString());
|
|
||||||
emit onRecognitionResult(result.text, result.confidence, result.latency_ms, result.isFinal);
|
if (static_cast<int>(audioBuffer_.size()) < chunkSize) {
|
||||||
|
return; // 缓冲区数据不足,等待下一次
|
||||||
|
}
|
||||||
|
|
||||||
|
// 提取一个推理块的音频
|
||||||
|
std::vector<float> chunk(audioBuffer_.begin(), audioBuffer_.begin() + chunkSize);
|
||||||
|
audioBuffer_.erase(audioBuffer_.begin(), audioBuffer_.begin() + chunkSize);
|
||||||
|
|
||||||
|
// 在后台线程执行推理
|
||||||
|
isInferencing_ = true;
|
||||||
|
statusLabel_->setText("推理中...");
|
||||||
|
|
||||||
|
int sampleRate = audioSampleRate_;
|
||||||
|
QString language = configManager_->get("stt.language").toString();
|
||||||
|
|
||||||
|
(void)QtConcurrent::run([this, chunk, sampleRate, language]() {
|
||||||
|
auto result = sttEngine_->infer(chunk, sampleRate, language);
|
||||||
|
|
||||||
|
// 回到主线程更新 UI
|
||||||
|
QMetaObject::invokeMethod(this, [this, result]() {
|
||||||
|
isInferencing_ = false;
|
||||||
|
|
||||||
|
if (result.text.isEmpty() && !result.text.isNull()) {
|
||||||
|
// 静音段
|
||||||
|
latencyLabel_->setText(QString("延迟: %1 ms").arg(result.latency_ms, 0, 'f', 1));
|
||||||
|
} else {
|
||||||
|
emit onRecognitionResult(result.text, result.confidence,
|
||||||
|
result.latency_ms, result.isFinal);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 更新状态
|
||||||
|
if (isRecording_) {
|
||||||
|
int bufMs = (audioSampleRate_ > 0)
|
||||||
|
? static_cast<int>(audioBuffer_.size() * 1000 / audioSampleRate_)
|
||||||
|
: 0;
|
||||||
|
statusLabel_->setText(
|
||||||
|
QString("录音中 | 缓冲区: %1 ms").arg(bufMs));
|
||||||
|
}
|
||||||
|
}, Qt::QueuedConnection);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void STTTestPage::onRecognitionResult(const QString& text, float confidence,
|
void STTTestPage::onRecognitionResult(const QString& text, float confidence,
|
||||||
|
|||||||
@ -9,6 +9,7 @@ class QPushButton;
|
|||||||
class QComboBox;
|
class QComboBox;
|
||||||
class QTextEdit;
|
class QTextEdit;
|
||||||
class QSpinBox;
|
class QSpinBox;
|
||||||
|
class QTimer;
|
||||||
|
|
||||||
namespace impress {
|
namespace impress {
|
||||||
|
|
||||||
@ -19,9 +20,9 @@ class AudioCapture;
|
|||||||
/**
|
/**
|
||||||
* @brief STT 测试页面
|
* @brief STT 测试页面
|
||||||
*
|
*
|
||||||
* 实时麦克风采集 + 流式识别。
|
* 实时麦克风采集 + 周期性后台推理。
|
||||||
|
* 音频采集与推理分离,防止推理阻塞音频流。
|
||||||
* 使用 SenseVoice 模型进行推理。
|
* 使用 SenseVoice 模型进行推理。
|
||||||
* 模型异步加载,不阻塞 UI。
|
|
||||||
*/
|
*/
|
||||||
class STTTestPage : public QWidget {
|
class STTTestPage : public QWidget {
|
||||||
Q_OBJECT
|
Q_OBJECT
|
||||||
@ -36,16 +37,18 @@ private slots:
|
|||||||
void onModelLoaded(const QString& modelPath);
|
void onModelLoaded(const QString& modelPath);
|
||||||
void onModelLoadError(const QString& modelPath, const QString& error);
|
void onModelLoadError(const QString& modelPath, const QString& error);
|
||||||
void onModelUnloaded();
|
void onModelUnloaded();
|
||||||
|
void onInferenceTimer();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void setupUI();
|
void setupUI();
|
||||||
void updateUIState();
|
void updateUIState();
|
||||||
void startAudioCapture();
|
void startAudioCapture();
|
||||||
void processAudioChunk(const std::vector<float>& samples, int sampleRate);
|
void startInferenceTimer();
|
||||||
|
|
||||||
ConfigManager* configManager_;
|
ConfigManager* configManager_;
|
||||||
SenseVoiceEngine* sttEngine_;
|
SenseVoiceEngine* sttEngine_;
|
||||||
AudioCapture* audioCapture_;
|
AudioCapture* audioCapture_;
|
||||||
|
QTimer* inferenceTimer_;
|
||||||
|
|
||||||
// UI 控件
|
// UI 控件
|
||||||
QComboBox* deviceCombo_;
|
QComboBox* deviceCombo_;
|
||||||
@ -58,7 +61,9 @@ private:
|
|||||||
|
|
||||||
bool isRecording_ = false;
|
bool isRecording_ = false;
|
||||||
bool isLoadingModel_ = false;
|
bool isLoadingModel_ = false;
|
||||||
std::vector<float> chunkBuffer_;
|
bool isInferencing_ = false;
|
||||||
|
int audioSampleRate_ = 16000;
|
||||||
|
std::vector<float> audioBuffer_;
|
||||||
QString currentModelPath_;
|
QString currentModelPath_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user