feat: 完善文件转写导出功能 (TXT/SRT/JSON)

- 修复导出功能:SRT/JSON 格式直接输出纯文本的 bug
- 新增 SRT 导出:按句子分段,均匀分配时间戳,支持字幕格式
- 新增 JSON 导出:结构化数据,包含文件信息、时长、采样率等元数据
- 新增 TXT 导出:包含文件信息和音频参数的格式化文本
- 记录音频文件信息 (时长/采样率/声道数) 用于导出

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Alvin Young 2026-05-12 19:00:55 +08:00
parent 84cc5bbc22
commit 49313f15f9
3 changed files with 162 additions and 12 deletions

View File

@ -126,8 +126,10 @@ ctest
- [x] 三个 GUI 页面 (实时识别 / 文件转写 / 配置) - [x] 三个 GUI 页面 (实时识别 / 文件转写 / 配置)
- [x] 日志系统 (控制台 + 文件输出) - [x] 日志系统 (控制台 + 文件输出)
- [x] 批量文件转写 (支持 WAV/MP3/FLAC) - [x] 批量文件转写 (支持 WAV/MP3/FLAC)
- [x] 结果导出 (TXT / SRT 字幕 / JSON 结构化数据)
- [x] 音频重采样 (非 16kHz 音频自动重采样) - [x] 音频重采样 (非 16kHz 音频自动重采样)
- [x] 语音活动检测 (VAD — 短时能量 + 过零率) - [x] 语音活动检测 (VAD — 短时能量 + 过零率)
- [x] 音频文件信息 (时长/采样率/声道数)
- [x] 单元测试框架 (Catch2, 39 个测试用例) - [x] 单元测试框架 (Catch2, 39 个测试用例)
- [ ] 完整 Whisper 推理 (自回归解码 + 流式识别) - [ ] 完整 Whisper 推理 (自回归解码 + 流式识别)
- [ ] 跨平台打包 - [ ] 跨平台打包

View File

@ -21,6 +21,11 @@
#include <QFileInfo> #include <QFileInfo>
#include <QFuture> #include <QFuture>
#include <QtConcurrent> #include <QtConcurrent>
#include <QJsonDocument>
#include <QJsonArray>
#include <QJsonObject>
#include <QFile>
#include <QRegularExpression>
static const char* const kTag = "FileTranscribePage"; static const char* const kTag = "FileTranscribePage";
@ -212,12 +217,17 @@ void FileTranscribePage::processFileAsync(int index) {
(void)QtConcurrent::run([this, index, taskFile = task.filePath]() { (void)QtConcurrent::run([this, index, taskFile = task.filePath]() {
QString text; QString text;
bool success = false; bool success = false;
double durationSec = 0.0;
int sampleRate = 0;
int channels = 0;
// 创建独立的解码器和引擎实例(避免线程冲突) // 创建独立的解码器和引擎实例(避免线程冲突)
AudioDecoder decoder; AudioDecoder decoder;
if (decoder.decode(taskFile)) { if (decoder.decode(taskFile)) {
const auto& samples = decoder.samples(); const auto& samples = decoder.samples();
int sampleRate = decoder.sampleRate(); sampleRate = decoder.sampleRate();
channels = decoder.channels();
durationSec = decoder.duration();
// 使用已加载的引擎进行推理(引擎是线程安全的) // 使用已加载的引擎进行推理(引擎是线程安全的)
auto result = sttEngine_->infer(samples, sampleRate, auto result = sttEngine_->infer(samples, sampleRate,
@ -227,20 +237,27 @@ void FileTranscribePage::processFileAsync(int index) {
} }
// 回到主线程更新 UI // 回到主线程更新 UI
QMetaObject::invokeMethod(this, [this, index, text, success]() { QMetaObject::invokeMethod(this, [this, index, text, success,
durationSec, sampleRate, channels]() {
activeWorkers_--; activeWorkers_--;
onTaskComplete(index, text, success); onTaskComplete(index, text, success, durationSec, sampleRate, channels);
}, Qt::QueuedConnection); }, Qt::QueuedConnection);
}); });
} }
void FileTranscribePage::onTaskComplete(int index, const QString& text, bool success) { void FileTranscribePage::onTaskComplete(int index, const QString& text, bool success,
double durationSec, int sampleRate, int channels) {
if (index >= tasks_.size()) return; if (index >= tasks_.size()) return;
auto& task = tasks_[index]; auto& task = tasks_[index];
task.result = text; task.result = text;
task.status = success ? "完成" : "失败"; task.status = success ? "完成" : "失败";
task.progress = 1.0; task.progress = 1.0;
if (success) {
task.durationSec = durationSec;
task.sampleRate = sampleRate;
task.channels = channels;
}
if (success) { if (success) {
resultText_->append( resultText_->append(
@ -279,24 +296,144 @@ void FileTranscribePage::onAllComplete() {
} }
void FileTranscribePage::onExportResult() { void FileTranscribePage::onExportResult() {
if (resultText_->toPlainText().isEmpty()) { // 过滤出成功完成的任务
QList<TranscribeTask> completedTasks;
for (const auto& task : tasks_) {
if (task.status == "完成") {
completedTasks.append(task);
}
}
if (completedTasks.isEmpty()) {
QMessageBox::information(this, "提示", "没有可导出的结果"); QMessageBox::information(this, "提示", "没有可导出的结果");
return; return;
} }
QString format = exportFormat_->currentText(); QString format = exportFormat_->currentText();
QString ext = (format == "TXT") ? "txt" : (format == "JSON") ? "json" : "srt"; QString ext, filter;
QString filter = QString("%1 文件 (*.%2)").arg(format, ext); if (format.startsWith("SRT")) {
ext = "srt";
filter = "SRT 字幕文件 (*.srt)";
} else if (format == "JSON") {
ext = "json";
filter = "JSON 文件 (*.json)";
} else {
ext = "txt";
filter = "文本文件 (*.txt)";
}
QString path = QFileDialog::getSaveFileName(this, "导出结果", "", filter); QString path = QFileDialog::getSaveFileName(this, "导出结果", "", filter);
if (path.isEmpty()) return; if (path.isEmpty()) return;
QFile file(path); QFile file(path);
if (file.open(QIODevice::WriteOnly)) { if (!file.open(QIODevice::WriteOnly)) {
file.write(resultText_->toPlainText().toUtf8()); QMessageBox::critical(this, "错误", "无法写入文件");
return;
}
if (ext == "srt") {
file.write(exportSRT(completedTasks).toUtf8());
} else if (ext == "json") {
file.write(exportJSON(completedTasks));
} else {
file.write(exportTXT(completedTasks).toUtf8());
}
file.close(); file.close();
statusLabel_->setText(QString("已导出: %1").arg(path)); statusLabel_->setText(QString("已导出: %1").arg(path));
} }
QString FileTranscribePage::exportTXT(const QList<TranscribeTask>& tasks) const {
QString content;
for (const auto& task : tasks) {
content += QString("=== %1 ===\n").arg(QFileInfo(task.filePath).fileName());
if (task.durationSec > 0) {
int min = static_cast<int>(task.durationSec) / 60;
int sec = static_cast<int>(task.durationSec) % 60;
content += QString("时长: %1:%2 | %3Hz | %4声道\n\n")
.arg(min, 2, 10, QChar('0'))
.arg(sec, 2, 10, QChar('0'))
.arg(task.sampleRate)
.arg(task.channels);
}
content += task.result + "\n\n";
}
return content;
}
QString FileTranscribePage::exportSRT(const QList<TranscribeTask>& tasks) const {
QString srt;
int subtitleIndex = 1;
for (const auto& task : tasks) {
QString fileName = QFileInfo(task.filePath).fileName();
srt += QString("# %1\n\n").arg(fileName);
// 将文本按句号/换行分段,均匀分配到音频时长内
QStringList sentences = task.result.split(
QRegularExpression("[。!?\n]"), Qt::SkipEmptyParts);
if (sentences.isEmpty()) {
sentences << task.result;
}
double duration = task.durationSec > 0 ? task.durationSec : 10.0;
double segmentDuration = duration / qMax(sentences.size(), 1);
for (int i = 0; i < sentences.size(); ++i) {
double startSec = i * segmentDuration;
double endSec = (i + 1) * segmentDuration;
if (endSec > duration) endSec = duration;
srt += QString("%1\n").arg(subtitleIndex++);
srt += QString("%1 --> %2\n")
.arg(formatSRTTime(startSec), formatSRTTime(endSec));
srt += sentences[i].trimmed() + "\n\n";
}
}
return srt;
}
QString FileTranscribePage::formatSRTTime(double seconds) const {
int h = static_cast<int>(seconds) / 3600;
int m = (static_cast<int>(seconds) % 3600) / 60;
int s = static_cast<int>(seconds) % 60;
int ms = static_cast<int>((seconds - static_cast<int>(seconds)) * 1000);
return QString("%1:%2:%3,%4")
.arg(h, 2, 10, QChar('0'))
.arg(m, 2, 10, QChar('0'))
.arg(s, 2, 10, QChar('0'))
.arg(ms, 3, 10, QChar('0'));
}
QByteArray FileTranscribePage::exportJSON(const QList<TranscribeTask>& tasks) const {
QJsonArray filesArray;
for (const auto& task : tasks) {
QJsonObject fileObj;
fileObj["file"] = QFileInfo(task.filePath).fileName();
fileObj["path"] = task.filePath;
fileObj["status"] = task.status;
if (task.durationSec > 0) {
fileObj["duration_sec"] = task.durationSec;
fileObj["sample_rate"] = task.sampleRate;
fileObj["channels"] = task.channels;
}
fileObj["text"] = task.result;
fileObj["timestamp"] = QDateTime::currentDateTime().toString(Qt::ISODate);
filesArray.append(fileObj);
}
QJsonObject root;
root["app"] = "Impress Voice Input";
root["timestamp"] = QDateTime::currentDateTime().toString(Qt::ISODate);
root["file_count"] = filesArray.size();
root["files"] = filesArray;
return QJsonDocument(root).toJson(QJsonDocument::Indented);
} }
} // namespace impress } // namespace impress

View File

@ -9,6 +9,7 @@ class QTextEdit;
class QProgressBar; class QProgressBar;
class QListWidget; class QListWidget;
class QComboBox; class QComboBox;
class QByteArray;
namespace impress { namespace impress {
@ -21,6 +22,9 @@ struct TranscribeTask {
QString status; // "等待中", "处理中", "完成", "失败" QString status; // "等待中", "处理中", "完成", "失败"
QString result; QString result;
double progress = 0.0; double progress = 0.0;
double durationSec = 0.0; // 音频时长(秒)
int sampleRate = 0; // 采样率
int channels = 0; // 声道数
}; };
/** /**
@ -41,7 +45,8 @@ private slots:
void onStartTranscribe(); void onStartTranscribe();
void onStopTranscribe(); void onStopTranscribe();
void onExportResult(); void onExportResult();
void onTaskComplete(int index, const QString& text, bool success); void onTaskComplete(int index, const QString& text, bool success,
double durationSec, int sampleRate, int channels);
void onAllComplete(); void onAllComplete();
private: private:
@ -50,6 +55,12 @@ private:
void startBatchTranscription(); void startBatchTranscription();
void processFileAsync(int index); void processFileAsync(int index);
// 导出辅助方法
QString exportTXT(const QList<TranscribeTask>& tasks) const;
QString exportSRT(const QList<TranscribeTask>& tasks) const;
QByteArray exportJSON(const QList<TranscribeTask>& tasks) const;
QString formatSRTTime(double seconds) const;
ConfigManager* configManager_; ConfigManager* configManager_;
STTEngine* sttEngine_; STTEngine* sttEngine_;
AudioDecoder* audioDecoder_; AudioDecoder* audioDecoder_;