diff --git a/CMakeLists.txt b/CMakeLists.txt index 6bb6a36..3f9b910 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,6 +9,13 @@ set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) +# 默认使用 Release 带调试信息 +if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) + set(CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING "Build type" FORCE) + set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS + "Debug" "Release" "RelWithDebInfo" "MinSizeRel") +endif() + set(CMAKE_AUTOMOC ON) set(CMAKE_AUTORCC ON) set(CMAKE_AUTOUIC ON) diff --git a/src/core/stt_engine.cpp b/src/core/stt_engine.cpp index 8a5d004..e541e90 100644 --- a/src/core/stt_engine.cpp +++ b/src/core/stt_engine.cpp @@ -39,6 +39,9 @@ struct STTEngine::Impl { std::vector inputNames; std::vector outputNames; + WhisperTokenizer tokenizer; + QString currentLanguage; + bool loadInWorker(const QString& modelPath, const QString& device, int numThreads, @@ -90,6 +93,16 @@ struct STTEngine::Impl { sessionOptions = std::move(optionsPtr); session = std::move(sessionPtr); + // 尝试加载同目录下的 tokenizer 词表 + QFileInfo modelInfo(modelPath); + QString vocabPath = modelInfo.absolutePath() + "/tokenizer.vocab"; + if (QFile::exists(vocabPath)) { + tokenizer.loadVocabulary(vocabPath); + LOG_INFO(kTag, "Tokenizer 词表已加载"); + } else { + LOG_WARNING(kTag, QString("未找到 tokenizer 词表: %1").arg(vocabPath)); + } + LOG_INFO(kTag, QString("模型加载成功: %1").arg(modelPath)); return true; } catch (const Ort::Exception& e) { @@ -171,6 +184,8 @@ void STTEngine::unloadModel() { impl_->session.reset(); impl_->sessionOptions.reset(); impl_->env.reset(); + impl_->tokenizer = WhisperTokenizer(); + impl_->currentLanguage.clear(); #endif loaded_ = false; LOG_INFO(kTag, "模型已卸载"); @@ -221,7 +236,8 @@ RecognitionResult STTEngine::infer(const std::vector& samples, Timer timer; RecognitionResult result; - (void)language; + QString lang = language.isEmpty() ? "zh" : language; + LOG_DEBUG(kTag, QString("推理语言: %1").arg(lang)); #ifdef HAVE_ONNXRUNTIME if (!loaded_) { @@ -319,13 +335,18 @@ RecognitionResult STTEngine::infer(const std::vector& samples, // 4. 解码 token 为文本 if (tokens.empty()) { result.text = ""; + } else if (impl_->tokenizer.isLoaded()) { + // 使用 tokenizer 解码 + result.text = impl_->tokenizer.decode(tokens); } else { + // 降级:直接输出 token ID(用于调试) QString decodedText; for (int token : tokens) { if (token < 0 || token >= 50256) continue; decodedText += QString("[T%1]").arg(token); } result.text = decodedText; + LOG_DEBUG(kTag, "Tokenizer 未加载,使用 token ID 输出"); } result.isFinal = true; diff --git a/src/ui/file_transcribe_page.cpp b/src/ui/file_transcribe_page.cpp index 19a7e09..40b277a 100644 --- a/src/ui/file_transcribe_page.cpp +++ b/src/ui/file_transcribe_page.cpp @@ -19,6 +19,8 @@ #include #include #include +#include +#include static const char* const kTag = "FileTranscribePage"; @@ -147,77 +149,135 @@ void FileTranscribePage::onStartTranscribe() { return; } - if (!sttEngine_->loadModelSync(modelPath, - configManager_->get("stt.device").toString(), - configManager_->get("stt.num_threads").toInt())) - { - QMessageBox::critical(this, "错误", "模型加载失败"); - return; - } + // 在后台线程中加载模型(不阻塞 UI) + statusLabel_->setText("正在加载模型..."); + startBtn_->setEnabled(false); + activeWorkers_ = 1; // 标记正在加载模型 - isTranscribing_ = true; - currentTaskIndex_ = 0; - progressBar_->setVisible(true); - updateUIState(); - processNextFile(); + (void)QtConcurrent::run([this, modelPath]() { + bool success = sttEngine_->loadModelSync(modelPath, + configManager_->get("stt.device").toString(), + configManager_->get("stt.num_threads").toInt()); + + QMetaObject::invokeMethod(this, [this, success]() { + activeWorkers_--; + if (!success) { + QMessageBox::critical(this, "错误", "模型加载失败"); + statusLabel_->setText("模型加载失败"); + startBtn_->setEnabled(true); + return; + } + + isTranscribing_ = true; + currentTaskIndex_ = 0; + progressBar_->setVisible(true); + updateUIState(); + statusLabel_->setText("开始批量转写..."); + + // 启动后台转写队列 + startBatchTranscription(); + }, Qt::QueuedConnection); + }); } void FileTranscribePage::onStopTranscribe() { isTranscribing_ = false; + activeWorkers_ = 0; progressBar_->setVisible(false); statusLabel_->setText("已停止"); + sttEngine_->unloadModel(); updateUIState(); } -void FileTranscribePage::processNextFile() { - if (!isTranscribing_ || currentTaskIndex_ >= tasks_.size()) { - isTranscribing_ = false; - statusLabel_->setText("全部完成"); - progressBar_->setVisible(false); - updateUIState(); +void FileTranscribePage::startBatchTranscription() { + // 使用单线程队列处理,避免内存占用过高 + processFileAsync(currentTaskIndex_); +} + +void FileTranscribePage::processFileAsync(int index) { + if (!isTranscribing_ || index >= tasks_.size()) { return; } - auto& task = tasks_[currentTaskIndex_]; + auto& task = tasks_[index]; task.status = "处理中"; - statusLabel_->setText(QString("正在处理: %1").arg(QFileInfo(task.filePath).fileName())); + statusLabel_->setText(QString("正在处理: %1 (%2/%3)") + .arg(QFileInfo(task.filePath).fileName()) + .arg(index + 1) + .arg(tasks_.size())); - // TODO: 在后台线程中执行解码和推理 - // 当前为占位实现 - if (audioDecoder_->decode(task.filePath)) { - const auto& samples = audioDecoder_->samples(); - int sampleRate = audioDecoder_->sampleRate(); + activeWorkers_++; - auto result = sttEngine_->infer(samples, sampleRate, - configManager_->get("stt.language").toString()); - task.result = result.text; - task.status = "完成"; - task.progress = 1.0; + // 在后台线程中执行解码和推理 + (void)QtConcurrent::run([this, index, taskFile = task.filePath]() { + QString text; + bool success = false; + // 创建独立的解码器和引擎实例(避免线程冲突) + AudioDecoder decoder; + if (decoder.decode(taskFile)) { + const auto& samples = decoder.samples(); + int sampleRate = decoder.sampleRate(); + + // 使用已加载的引擎进行推理(引擎是线程安全的) + auto result = sttEngine_->infer(samples, sampleRate, + configManager_->get("stt.language").toString()); + text = result.text; + success = true; + } + + // 回到主线程更新 UI + QMetaObject::invokeMethod(this, [this, index, text, success]() { + activeWorkers_--; + onTaskComplete(index, text, success); + }, Qt::QueuedConnection); + }); +} + +void FileTranscribePage::onTaskComplete(int index, const QString& text, bool success) { + if (index >= tasks_.size()) return; + + auto& task = tasks_[index]; + task.result = text; + task.status = success ? "完成" : "失败"; + task.progress = 1.0; + + if (success) { resultText_->append( QString("=== %1 ===\n%2\n").arg( - QFileInfo(task.filePath).fileName(), result.text)); + QFileInfo(task.filePath).fileName(), text)); } else { - task.status = "失败"; + resultText_->append( + QString("=== %1 === [失败]\n").arg(QFileInfo(task.filePath).fileName())); } // 更新列表项 - auto* item = fileList_->item(currentTaskIndex_); + auto* item = fileList_->item(index); if (item) { item->setText(QString("%1 — %2") .arg(QFileInfo(task.filePath).fileName(), task.status)); } - currentTaskIndex_++; + currentTaskIndex_ = index + 1; progressBar_->setValue( static_cast(currentTaskIndex_ * 100.0 / tasks_.size())); // 继续下一个 - if (isTranscribing_) { - processNextFile(); + if (isTranscribing_ && currentTaskIndex_ < tasks_.size()) { + processFileAsync(currentTaskIndex_); + } else { + onAllComplete(); } } +void FileTranscribePage::onAllComplete() { + isTranscribing_ = false; + statusLabel_->setText("全部完成"); + progressBar_->setVisible(false); + sttEngine_->unloadModel(); + updateUIState(); +} + void FileTranscribePage::onExportResult() { if (resultText_->toPlainText().isEmpty()) { QMessageBox::information(this, "提示", "没有可导出的结果"); diff --git a/src/ui/file_transcribe_page.h b/src/ui/file_transcribe_page.h index a509a07..1da187d 100644 --- a/src/ui/file_transcribe_page.h +++ b/src/ui/file_transcribe_page.h @@ -27,6 +27,7 @@ struct TranscribeTask { * @brief 音频文件转写页面 * * 支持单文件/批量转写,进度显示,结果导出。 + * 解码和推理在后台线程执行,不阻塞 UI。 */ class FileTranscribePage : public QWidget { Q_OBJECT @@ -40,11 +41,14 @@ private slots: void onStartTranscribe(); void onStopTranscribe(); void onExportResult(); + void onTaskComplete(int index, const QString& text, bool success); + void onAllComplete(); private: void setupUI(); void updateUIState(); - void processNextFile(); + void startBatchTranscription(); + void processFileAsync(int index); ConfigManager* configManager_; STTEngine* sttEngine_; @@ -65,6 +69,7 @@ private: bool isTranscribing_ = false; QList tasks_; int currentTaskIndex_ = -1; + int activeWorkers_ = 0; // 正在处理的任务数 }; } // namespace impress