diff --git a/README.md b/README.md index dc77f8a..9d4618c 100644 --- a/README.md +++ b/README.md @@ -126,8 +126,10 @@ ctest - [x] 三个 GUI 页面 (实时识别 / 文件转写 / 配置) - [x] 日志系统 (控制台 + 文件输出) - [x] 批量文件转写 (支持 WAV/MP3/FLAC) +- [x] 结果导出 (TXT / SRT 字幕 / JSON 结构化数据) - [x] 音频重采样 (非 16kHz 音频自动重采样) - [x] 语音活动检测 (VAD — 短时能量 + 过零率) +- [x] 音频文件信息 (时长/采样率/声道数) - [x] 单元测试框架 (Catch2, 39 个测试用例) - [ ] 完整 Whisper 推理 (自回归解码 + 流式识别) - [ ] 跨平台打包 diff --git a/src/ui/file_transcribe_page.cpp b/src/ui/file_transcribe_page.cpp index 40b277a..e44d2d7 100644 --- a/src/ui/file_transcribe_page.cpp +++ b/src/ui/file_transcribe_page.cpp @@ -21,6 +21,11 @@ #include #include #include +#include +#include +#include +#include +#include static const char* const kTag = "FileTranscribePage"; @@ -212,12 +217,17 @@ void FileTranscribePage::processFileAsync(int index) { (void)QtConcurrent::run([this, index, taskFile = task.filePath]() { QString text; bool success = false; + double durationSec = 0.0; + int sampleRate = 0; + int channels = 0; // 创建独立的解码器和引擎实例(避免线程冲突) AudioDecoder decoder; if (decoder.decode(taskFile)) { const auto& samples = decoder.samples(); - int sampleRate = decoder.sampleRate(); + sampleRate = decoder.sampleRate(); + channels = decoder.channels(); + durationSec = decoder.duration(); // 使用已加载的引擎进行推理(引擎是线程安全的) auto result = sttEngine_->infer(samples, sampleRate, @@ -227,20 +237,27 @@ void FileTranscribePage::processFileAsync(int index) { } // 回到主线程更新 UI - QMetaObject::invokeMethod(this, [this, index, text, success]() { + QMetaObject::invokeMethod(this, [this, index, text, success, + durationSec, sampleRate, channels]() { activeWorkers_--; - onTaskComplete(index, text, success); + onTaskComplete(index, text, success, durationSec, sampleRate, channels); }, Qt::QueuedConnection); }); } -void FileTranscribePage::onTaskComplete(int index, const QString& text, bool success) { +void FileTranscribePage::onTaskComplete(int index, const QString& text, bool success, + double durationSec, int sampleRate, int channels) { if (index >= tasks_.size()) return; auto& task = tasks_[index]; task.result = text; task.status = success ? "完成" : "失败"; task.progress = 1.0; + if (success) { + task.durationSec = durationSec; + task.sampleRate = sampleRate; + task.channels = channels; + } if (success) { resultText_->append( @@ -279,24 +296,144 @@ void FileTranscribePage::onAllComplete() { } void FileTranscribePage::onExportResult() { - if (resultText_->toPlainText().isEmpty()) { + // 过滤出成功完成的任务 + QList completedTasks; + for (const auto& task : tasks_) { + if (task.status == "完成") { + completedTasks.append(task); + } + } + + if (completedTasks.isEmpty()) { QMessageBox::information(this, "提示", "没有可导出的结果"); return; } QString format = exportFormat_->currentText(); - QString ext = (format == "TXT") ? "txt" : (format == "JSON") ? "json" : "srt"; - QString filter = QString("%1 文件 (*.%2)").arg(format, ext); + QString ext, filter; + if (format.startsWith("SRT")) { + ext = "srt"; + filter = "SRT 字幕文件 (*.srt)"; + } else if (format == "JSON") { + ext = "json"; + filter = "JSON 文件 (*.json)"; + } else { + ext = "txt"; + filter = "文本文件 (*.txt)"; + } QString path = QFileDialog::getSaveFileName(this, "导出结果", "", filter); if (path.isEmpty()) return; QFile file(path); - if (file.open(QIODevice::WriteOnly)) { - file.write(resultText_->toPlainText().toUtf8()); - file.close(); - statusLabel_->setText(QString("已导出: %1").arg(path)); + if (!file.open(QIODevice::WriteOnly)) { + QMessageBox::critical(this, "错误", "无法写入文件"); + return; } + + if (ext == "srt") { + file.write(exportSRT(completedTasks).toUtf8()); + } else if (ext == "json") { + file.write(exportJSON(completedTasks)); + } else { + file.write(exportTXT(completedTasks).toUtf8()); + } + + file.close(); + statusLabel_->setText(QString("已导出: %1").arg(path)); +} + +QString FileTranscribePage::exportTXT(const QList& tasks) const { + QString content; + for (const auto& task : tasks) { + content += QString("=== %1 ===\n").arg(QFileInfo(task.filePath).fileName()); + if (task.durationSec > 0) { + int min = static_cast(task.durationSec) / 60; + int sec = static_cast(task.durationSec) % 60; + content += QString("时长: %1:%2 | %3Hz | %4声道\n\n") + .arg(min, 2, 10, QChar('0')) + .arg(sec, 2, 10, QChar('0')) + .arg(task.sampleRate) + .arg(task.channels); + } + content += task.result + "\n\n"; + } + return content; +} + +QString FileTranscribePage::exportSRT(const QList& tasks) const { + QString srt; + int subtitleIndex = 1; + + for (const auto& task : tasks) { + QString fileName = QFileInfo(task.filePath).fileName(); + srt += QString("# %1\n\n").arg(fileName); + + // 将文本按句号/换行分段,均匀分配到音频时长内 + QStringList sentences = task.result.split( + QRegularExpression("[。!?\n]"), Qt::SkipEmptyParts); + + if (sentences.isEmpty()) { + sentences << task.result; + } + + double duration = task.durationSec > 0 ? task.durationSec : 10.0; + double segmentDuration = duration / qMax(sentences.size(), 1); + + for (int i = 0; i < sentences.size(); ++i) { + double startSec = i * segmentDuration; + double endSec = (i + 1) * segmentDuration; + if (endSec > duration) endSec = duration; + + srt += QString("%1\n").arg(subtitleIndex++); + srt += QString("%1 --> %2\n") + .arg(formatSRTTime(startSec), formatSRTTime(endSec)); + srt += sentences[i].trimmed() + "\n\n"; + } + } + return srt; +} + +QString FileTranscribePage::formatSRTTime(double seconds) const { + int h = static_cast(seconds) / 3600; + int m = (static_cast(seconds) % 3600) / 60; + int s = static_cast(seconds) % 60; + int ms = static_cast((seconds - static_cast(seconds)) * 1000); + return QString("%1:%2:%3,%4") + .arg(h, 2, 10, QChar('0')) + .arg(m, 2, 10, QChar('0')) + .arg(s, 2, 10, QChar('0')) + .arg(ms, 3, 10, QChar('0')); +} + +QByteArray FileTranscribePage::exportJSON(const QList& tasks) const { + QJsonArray filesArray; + + for (const auto& task : tasks) { + QJsonObject fileObj; + fileObj["file"] = QFileInfo(task.filePath).fileName(); + fileObj["path"] = task.filePath; + fileObj["status"] = task.status; + + if (task.durationSec > 0) { + fileObj["duration_sec"] = task.durationSec; + fileObj["sample_rate"] = task.sampleRate; + fileObj["channels"] = task.channels; + } + + fileObj["text"] = task.result; + fileObj["timestamp"] = QDateTime::currentDateTime().toString(Qt::ISODate); + + filesArray.append(fileObj); + } + + QJsonObject root; + root["app"] = "Impress Voice Input"; + root["timestamp"] = QDateTime::currentDateTime().toString(Qt::ISODate); + root["file_count"] = filesArray.size(); + root["files"] = filesArray; + + return QJsonDocument(root).toJson(QJsonDocument::Indented); } } // namespace impress diff --git a/src/ui/file_transcribe_page.h b/src/ui/file_transcribe_page.h index 1da187d..04caf7b 100644 --- a/src/ui/file_transcribe_page.h +++ b/src/ui/file_transcribe_page.h @@ -9,6 +9,7 @@ class QTextEdit; class QProgressBar; class QListWidget; class QComboBox; +class QByteArray; namespace impress { @@ -21,6 +22,9 @@ struct TranscribeTask { QString status; // "等待中", "处理中", "完成", "失败" QString result; double progress = 0.0; + double durationSec = 0.0; // 音频时长(秒) + int sampleRate = 0; // 采样率 + int channels = 0; // 声道数 }; /** @@ -41,7 +45,8 @@ private slots: void onStartTranscribe(); void onStopTranscribe(); void onExportResult(); - void onTaskComplete(int index, const QString& text, bool success); + void onTaskComplete(int index, const QString& text, bool success, + double durationSec, int sampleRate, int channels); void onAllComplete(); private: @@ -50,6 +55,12 @@ private: void startBatchTranscription(); void processFileAsync(int index); + // 导出辅助方法 + QString exportTXT(const QList& tasks) const; + QString exportSRT(const QList& tasks) const; + QByteArray exportJSON(const QList& tasks) const; + QString formatSRTTime(double seconds) const; + ConfigManager* configManager_; STTEngine* sttEngine_; AudioDecoder* audioDecoder_;