feat: 初始化 Impress Voice Input 项目

基于 ONNX 的实时语音转文本输入法,C++ 跨平台实现。

核心组件:
- Qt 6 跨平台 GUI(实时识别 / 文件转写 / 配置页面)
- ONNX Runtime 推理引擎(异步模型加载)
- PortAudio 音频采集
- dr_libs 音频文件解码
- JSON 配置管理(线程安全,自动持久化)
- 日志系统(控制台 + 文件输出)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Alvin Young 2026-05-12 15:53:05 +08:00
commit 02e100b318
52 changed files with 56869 additions and 0 deletions

66
.gitignore vendored Normal file
View File

@ -0,0 +1,66 @@
# =====================
# 构建产物
# =====================
build/
cmake-build-*/
out/
*.exe
*.dll
*.so
*.dylib
*.a
*.lib
*.o
*.obj
# =====================
# IDE / 编辑器
# =====================
.vscode/
.idea/
*.swp
*.swo
*~
.DS_Store
Thumbs.db
*.code-workspace
# =====================
# CMake
# =====================
CMakeCache.txt
CMakeFiles/
cmake_install.cmake
Makefile
compile_commands.json
# =====================
# 第三方依赖 (单独管理)
# =====================
# PortAudio 仓库体积大,不纳入版本控制
third_party/portaudio/
# ONNX Runtime 体积大,不纳入版本控制
third_party/onnxruntime/
# 保留 header-only 库(体积小)
!third_party/dr_libs/
!third_party/nlohmann_json/
# =====================
# 模型文件 (体积大,不包含)
# =====================
models/*.onnx
models/*.bin
models/*.pb
models/*.gguf
!models/.gitkeep
# =====================
# 用户数据
# =====================
# 默认配置保留,用户配置不纳入
!configs/default_config.json
configs/user_config.json
configs/config.json
# 日志
*.log

146
CMakeLists.txt Normal file
View File

@ -0,0 +1,146 @@
cmake_minimum_required(VERSION 3.20)
project(impress_voice_input
VERSION 0.1.0
LANGUAGES CXX
DESCRIPTION " ONNX "
)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_AUTOMOC ON)
set(CMAKE_AUTORCC ON)
set(CMAKE_AUTOUIC ON)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
# ============================================================================
#
# ============================================================================
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
# Qt 6
find_package(Qt6 REQUIRED COMPONENTS Core Widgets Concurrent Network)
qt_standard_project_setup()
# ONNX Runtime
include(dependencies)
# ============================================================================
#
# ============================================================================
set(SOURCES
src/main.cpp
# App
src/app/application.cpp
src/app/config_manager.cpp
# Core
src/core/stt_engine.cpp
src/core/audio_processor.cpp
src/core/decoder.cpp
src/core/tokenizer.cpp
# Audio
src/audio/audio_capture.cpp
src/audio/audio_decoder.cpp
src/audio/audio_ring_buffer.cpp
# UI
src/ui/main_window.cpp
src/ui/stt_test_page.cpp
src/ui/file_transcribe_page.cpp
src/ui/settings_page.cpp
src/ui/widgets/audio_waveform.cpp
src/ui/widgets/text_output.cpp
src/ui/widgets/progress_panel.cpp
# Utils
src/utils/logger.cpp
src/utils/timer.cpp
src/utils/string_utils.cpp
)
set(HEADERS
src/app/application.h
src/app/config_manager.h
src/core/stt_engine.h
src/core/audio_processor.h
src/core/decoder.h
src/core/tokenizer.h
src/audio/audio_capture.h
src/audio/audio_decoder.h
src/audio/audio_ring_buffer.h
src/ui/main_window.h
src/ui/stt_test_page.h
src/ui/file_transcribe_page.h
src/ui/settings_page.h
src/ui/widgets/audio_waveform.h
src/ui/widgets/text_output.h
src/ui/widgets/progress_panel.h
src/utils/logger.h
src/utils/timer.h
src/utils/string_utils.h
)
# ============================================================================
#
# ============================================================================
add_executable(${PROJECT_NAME} ${SOURCES} ${HEADERS})
target_include_directories(${PROJECT_NAME} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/src
${CMAKE_CURRENT_SOURCE_DIR}/third_party
${ONNXRUNTIME_INCLUDE_DIRS}
${PORTAUDIO_INCLUDE_DIRS}
${DR_LIBS_INCLUDE_DIR}
${NLOHMANN_JSON_INCLUDE_DIR}
)
target_link_libraries(${PROJECT_NAME} PRIVATE
Qt6::Core
Qt6::Widgets
Qt6::Concurrent
Qt6::Network
${ONNXRUNTIME_LIBRARIES}
${PORTAUDIO_LIBRARIES}
pthread
)
target_compile_options(${PROJECT_NAME} PRIVATE
$<$<CXX_COMPILER_ID:GNU,Clang>:-Wall -Wextra -Wpedantic>
$<$<CXX_COMPILER_ID:MSVC>:/W4>
)
# ============================================================================
#
# ============================================================================
#
qt_add_resources(${PROJECT_NAME} "styles"
PREFIX "/"
FILES
src/ui/resources/styles/main.qss
)
# ============================================================================
#
# ============================================================================
install(TARGETS ${PROJECT_NAME}
RUNTIME DESTINATION bin
BUNDLE DESTINATION .
)
# ============================================================================
#
# ============================================================================
option(BUILD_TESTS "构建单元测试" OFF)
if(BUILD_TESTS)
enable_testing()
add_subdirectory(tests)
endif()

264
PRD.md Normal file
View File

@ -0,0 +1,264 @@
# Impress Voice Input — 产品需求文档 (PRD)
## 1. 项目概述
**项目名称**: Impress Voice Input
**项目类型**: 基于 ONNX 的实时语音转文本输入法
**技术栈**: C++ / ONNX Runtime / Qt (跨平台GUI)
### 1.1 项目目标
开发一款跨平台的实时语音输入法,利用 ONNX 推理引擎运行语音转文本 (STT) 模型,提供低延迟、高准确率的语音输入体验。应用集成前后端于一体,包含实时语音识别、音频文件转写和模型配置三大核心功能。
### 1.2 目标平台
- Windows 10/11
- macOS 12+
- Linux (Ubuntu 20.04+)
---
## 2. 技术架构
### 2.1 整体架构
```
┌─────────────────────────────────────────────────┐
│ Qt GUI 前端 │
│ ┌──────────┐ ┌──────────────┐ ┌───────────┐ │
│ │ STT测试页 │ │ 音频文件转写页 │ │ 配置页面 │ │
│ └────┬─────┘ └──────┬───────┘ └─────┬─────┘ │
│ │ │ │ │
│ ┌────▼───────────────▼────────────────▼─────┐ │
│ │ 统一事件总线 / 消息队列 │ │
│ └────────────────────┬──────────────────────┘ │
└───────────────────────┼─────────────────────────┘
┌───────────────────────▼─────────────────────────┐
│ C++ 后端服务层 │
│ ┌────────────┐ ┌──────────────┐ ┌─────────┐ │
│ │ 音频采集模块 │ │ 音频文件解码 │ │ 配置管理 │ │
│ │ (PortAudio) │ │ (dr_libs) │ │ (INI/JSON)│
│ └─────┬──────┘ └──────┬───────┘ └────┬────┘ │
│ │ │ │ │
│ ┌─────▼────────────────▼───────────────▼──────┐│
│ │ STT 推理引擎 (ONNX Runtime) ││
│ │ ┌──────────┐ ┌──────────┐ ┌──────────┐ ││
│ │ │ 音频预处理│ │ ONNX推理 │ │ 后处理/CTC│ ││
│ │ └──────────┘ └──────────┘ └──────────┘ ││
│ └─────────────────────────────────────────────┘│
└─────────────────────────────────────────────────┘
```
### 2.2 技术选型
| 组件 | 选型 | 说明 |
|------|------|------|
| **GUI 框架** | Qt 6 | 成熟跨平台,原生性能,丰富的控件生态 |
| **STT 推理引擎** | ONNX Runtime (C++ API) | 跨平台推理,支持 CPU/GPU 加速 |
| **音频采集** | PortAudio | 轻量级跨平台音频 I/O 库 |
| **音频解码** | dr_libs (dr_wav/dr_mp3/dr_flac) | 单头文件,支持 WAV/MP3/FLAC |
| **模型格式** | Whisper / Paraformer ONNX | 主流开源 STT 模型导出格式 |
| **构建系统** | CMake 3.20+ | 跨平台标准构建工具 |
| **配置存储** | nlohmann/json | JSON 格式配置文件 |
| **线程模型** | Qt Concurrent + 独立工作线程 | UI 线程与推理线程分离 |
---
## 3. 功能需求
### 3.1 STT 测试页面(实时语音识别)
**功能描述**: 实时采集麦克风音频流,流式输出识别文本。
| 功能项 | 详细说明 | 优先级 |
|--------|----------|--------|
| 麦克风选择 | 下拉列表选择输入设备,显示采样率等信息 | P0 |
| 开始/停止录音 | 按钮控制录音状态,带明显视觉反馈 | P0 |
| 实时文本显示 | 滚动文本区域实时显示识别结果 | P0 |
| 流式/非流式切换 | 支持流式(逐字输出)和整句输出两种模式 | P0 |
| 置信度显示 | 显示每句识别的置信度分数 | P1 |
| 延迟指示 | 显示端到端识别延迟(毫秒) | P1 |
| 波形可视化 | 实时音频波形图展示 | P2 |
| 文本复制/导出 | 一键复制识别文本或导出为 TXT 文件 | P1 |
| 多语言切换 | 运行时切换识别语言 | P1 |
### 3.2 音频文件转写页面
**功能描述**: 导入音频文件,批量转写为文本。
| 功能项 | 详细说明 | 优先级 |
|--------|----------|--------|
| 文件选择 | 支持拖拽或文件选择器导入音频文件 | P0 |
| 支持的格式 | WAV, MP3, FLAC, OGG, AAC | P0 |
| 批量处理 | 支持添加多个文件到队列,按序处理 | P1 |
| 进度显示 | 文件转写进度条、百分比、预计剩余时间 | P0 |
| 结果展示 | 分段显示转写文本,带时间戳 | P0 |
| 结果导出 | 导出为 TXT / SRT (字幕) / JSON 格式 | P0 |
| 断点续传 | 中断后可从断点继续 | P2 |
| 文件信息 | 显示音频文件的时长、采样率、声道数 | P1 |
### 3.3 配置页面
**功能描述**: 管理模型路径、推理参数和应用设置。
| 功能项 | 详细说明 | 优先级 |
|--------|----------|--------|
| 模型路径配置 | 设置 ONNX 模型文件路径(支持相对/绝对路径) | P0 |
| 模型类型选择 | 选择 Whisper / Paraformer / 其他 | P0 |
| 推理设备选择 | CPU / GPU (CUDA / CoreML / DirectML) | P0 |
| 线程数配置 | 设置 ONNX 推理线程数 | P0 |
| 采样率配置 | 设置输入音频采样率(默认 16000Hz | P0 |
| 语言设置 | 默认识别语言 | P0 |
| 音频输入设备 | 默认麦克风选择 | P1 |
| 快捷键设置 | 全局语音激活快捷键配置 | P1 |
| 配置文件导入/导出 | 备份和恢复配置 | P1 |
| 参数调优 | 温度、top-p、beam size 等高级参数 | P2 |
| 配置持久化 | 自动保存到配置文件 | P0 |
---
## 4. 非功能需求
### 4.1 性能指标
| 指标 | 目标值 |
|------|--------|
| 实时识别延迟 | < 300ms (端到端从语音结束到文本输出) |
| 首字延迟 | < 150ms (流式模式) |
| CPU 占用 | < 30% (单核, Intel i5 级别 CPU) |
| 内存占用 | < 500MB (含模型加载) |
| 音频缓冲区延迟 | < 20ms |
### 4.2 兼容性
- ONNX Runtime ≥ 1.16
- Qt ≥ 6.5
- CMake ≥ 3.20
- 支持的模型: Whisper (small/medium), Paraformer, 其他兼容 CTC 的模型
### 4.3 安全与隐私
- 所有推理均在本地执行,不上传任何音频数据到云端
- 配置文件中敏感路径使用相对路径
---
## 5. 项目结构
```
impress_voice_input/
├── PRD.md # 产品需求文档
├── CMakeLists.txt # 顶层 CMake 配置
├── README.md # 项目说明
├── cmake/ # CMake 模块
│ ├── FindPortAudio.cmake
│ ├── FindOnnxRuntime.cmake
│ └── dependencies.cmake
├── src/
│ ├── main.cpp # 入口
│ ├── app/
│ │ ├── application.h/cpp # QApplication 封装
│ │ └── config_manager.h/cpp # 配置管理
│ ├── core/
│ │ ├── stt_engine.h/cpp # STT 推理引擎 (ONNX)
│ │ ├── audio_processor.h/cpp # 音频预处理 (重采样/归一化)
│ │ ├── decoder.h/cpp # CTC/自回归解码器
│ │ └── tokenizer.h/cpp # Tokenizer (BPE/Char)
│ ├── audio/
│ │ ├── audio_capture.h/cpp # 麦克风采集 (PortAudio)
│ │ ├── audio_decoder.h/cpp # 文件解码 (dr_libs)
│ │ └── audio_ring_buffer.h/cpp # 环形缓冲区
│ ├── ui/
│ │ ├── main_window.h/cpp # 主窗口 (Tab/侧边栏导航)
│ │ ├── stt_test_page.h/cpp # STT 测试页面
│ │ ├── file_transcribe_page.h/cpp # 文件转写页面
│ │ ├── settings_page.h/cpp # 配置页面
│ │ ├── widgets/
│ │ │ ├── audio_waveform.h/cpp # 波形控件
│ │ │ ├── text_output.h/cpp # 文本输出控件
│ │ │ └── progress_panel.h/cpp # 进度面板
│ │ ├── resources/
│ │ │ ├── icons/
│ │ │ └── styles/
│ │ │ └── main.qss
│ │ └── main_window.ui # Qt Designer UI
│ └── utils/
│ ├── logger.h/cpp
│ ├── timer.h/cpp
│ └── string_utils.h/cpp
├── models/ # 模型存放目录 (gitignore)
│ └── .gitkeep
├── configs/
│ └── default_config.json # 默认配置
├── tests/
│ ├── test_stt_engine.cpp
│ ├── test_audio_processor.cpp
│ └── test_config_manager.cpp
└── third_party/ # 第三方依赖
├── onnxruntime/
├── portaudio/
├── dr_libs/
└── nlohmann_json/
```
---
## 6. 开发里程碑
### Phase 1: 基础框架 (Week 1-2)
- [ ] CMake 项目搭建,依赖集成
- [ ] Qt 主窗口框架,三页面导航
- [ ] 配置管理模块
- [ ] 配置页面 UI 开发
### Phase 2: 音频与推理核心 (Week 3-4)
- [ ] ONNX Runtime 集成,模型加载与推理
- [ ] 音频预处理模块(重采样、归一化、分帧)
- [ ] PortAudio 麦克风采集
- [ ] 音频文件解码模块
### Phase 3: STT 测试页面 (Week 5-6)
- [ ] 实时音频流接入推理引擎
- [ ] 流式识别与文本输出
- [ ] UI 交互完善
### Phase 4: 文件转写页面 (Week 7)
- [ ] 文件选择与批量队列
- [ ] 转写进度管理
- [ ] 结果导出 (TXT/SRT)
### Phase 5: 优化与测试 (Week 8)
- [ ] 性能优化(延迟、内存)
- [ ] 跨平台兼容性测试
- [ ] Bug 修复与 UI 打磨
- [ ] 打包发布
---
## 7. 风险与缓解
| 风险 | 影响 | 缓解措施 |
|------|------|----------|
| ONNX 模型兼容性问题 | 无法加载模型 | 明确支持的模型列表,提供模型转换工具说明 |
| GPU 加速驱动依赖 | 部分用户无法使用 GPU | CPU 推理作为降级方案,确保 CPU 性能可接受 |
| Qt 跨平台差异 | 各平台表现不一致 | 早期进行三平台测试,使用 Qt 抽象层 |
| 大模型内存占用高 | 低端设备运行困难 | 提供 small 模型默认选项,支持模型卸载 |
---
## 8. 附录
### 8.1 参考项目
- whisper.cpp (GGML 实现,参考模型结构)
- faster-whisper (CTranslate2 实现,参考优化策略)
- onnxruntime examples (ONNX 推理示例)
### 8.2 术语表
| 术语 | 说明 |
|------|------|
| STT | Speech-to-Text语音转文本 |
| ONNX | Open Neural Network Exchange开放神经网络交换格式 |
| CTC | Connectionist Temporal Classification连接时序分类 |
| 流式识别 | 边说话边输出文本,而非说完后一次性输出 |
| BPE | Byte Pair Encoding字节对编码分词方法 |

132
README.md Normal file
View File

@ -0,0 +1,132 @@
# Impress Voice Input
基于 ONNX 的实时语音转文本输入法C++ 跨平台实现。
## 功能特性
- **实时语音识别** — 麦克风采集,流式输出识别结果
- **音频文件转写** — 支持 WAV/MP3/FLAC批量处理导出 TXT/SRT
- **跨平台 GUI** — Qt 6 构建,支持 Windows / macOS / Linux
- **本地推理** — ONNX Runtime支持 CPU/GPU 加速
- **可配置** — 模型路径、推理参数、快捷键均可自定义
## 项目结构
```
impress_voice_input/
├── CMakeLists.txt # 构建配置
├── PRD.md # 产品需求文档
├── cmake/ # CMake 模块
│ └── dependencies.cmake # 依赖查找
├── src/
│ ├── main.cpp # 入口
│ ├── app/ # 应用层 (Application, ConfigManager)
│ ├── core/ # 核心 (STTEngine, AudioProcessor, Decoder, Tokenizer)
│ ├── audio/ # 音频 (AudioCapture, AudioDecoder, RingBuffer)
│ ├── ui/ # GUI 页面与控件
│ │ ├── main_window.cpp # 主窗口
│ │ ├── stt_test_page.cpp # 实时识别页
│ │ ├── file_transcribe_page.cpp # 文件转写页
│ │ ├── settings_page.cpp # 配置页
│ │ └── widgets/ # 自定义控件
│ └── utils/ # 工具类
├── configs/ # 配置文件
├── models/ # ONNX 模型存放目录
└── third_party/ # 第三方依赖
```
## 技术栈
| 组件 | 选型 |
|------|------|
| GUI | Qt 6 |
| 推理引擎 | ONNX Runtime (C++ API) |
| 音频采集 | PortAudio |
| 音频解码 | dr_libs (dr_wav, dr_mp3, dr_flac) |
| 构建系统 | CMake 3.20+ |
| 配置存储 | nlohmann/json |
## 编译指南
### 前置依赖
1. **CMake** >= 3.20
2. **Qt 6** >= 6.5
3. **ONNX Runtime** C++ 库
4. **PortAudio**
5. C++17 兼容编译器 (GCC 9+ / Clang 10+ / MSVC 2019+)
### 第三方库准备
```bash
# 放入 third_party/ 目录
third_party/
├── onnxruntime/
│ ├── include/
│ └── lib/
├── portaudio/
│ ├── include/
│ └── lib/
├── dr_libs/
│ ├── dr_wav.h
│ ├── dr_mp3.h
│ └── dr_flac.h
└── nlohmann_json/
└── json.hpp
```
### 编译
```bash
mkdir build && cd build
cmake .. -DCMAKE_BUILD_TYPE=Release
cmake --build . -j$(nproc)
```
### 带测试编译
```bash
cmake .. -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTS=ON
cmake --build . -j$(nproc)
ctest
```
## 运行
```bash
# 默认启动
./impress_voice_input
# 指定配置文件
./impress_voice_input --config /path/to/config.json
# 指定模型
./impress_voice_input --model /path/to/model.onnx
```
## 模型准备
支持以下 ONNX 格式模型:
- **Whisper** (small/medium) — 多语言,高质量
- **Paraformer** — 中文优化
- 其他兼容 CTC 的 ASR 模型
`.onnx` 模型文件放入 `models/` 目录,然后在配置页面设置路径。
## 当前状态
项目处于 **骨架搭建阶段**,核心功能已规划并创建框架代码:
- [x] 项目结构与 CMake 配置
- [x] 配置管理模块
- [x] STT 推理引擎框架 (ONNX 集成占位)
- [x] 音频采集/解码框架 (PortAudio/dr_libs 集成占位)
- [x] 三个 GUI 页面框架
- [ ] 完整 ONNX 推理实现
- [ ] 流式识别逻辑
- [ ] 批量文件转写
- [ ] 跨平台打包
## License
MIT

69
cmake/dependencies.cmake Normal file
View File

@ -0,0 +1,69 @@
include(FetchContent)
set(THIRD_PARTY_DIR "${CMAKE_CURRENT_SOURCE_DIR}/third_party")
# ----------------------------------------------------------------------------
# ONNX Runtime
# ----------------------------------------------------------------------------
set(ONNXRUNTIME_ROOT "${THIRD_PARTY_DIR}/onnxruntime")
find_library(ONNXRUNTIME_LIB
NAMES onnxruntime
PATHS "${ONNXRUNTIME_ROOT}/lib"
NO_DEFAULT_PATH
)
find_path(ONNXRUNTIME_INCLUDE_DIR
NAMES onnxruntime_cxx_api.h
PATHS "${ONNXRUNTIME_ROOT}/include"
NO_DEFAULT_PATH
)
if(ONNXRUNTIME_LIB AND ONNXRUNTIME_INCLUDE_DIR)
set(ONNXRUNTIME_LIBRARIES ${ONNXRUNTIME_LIB})
set(ONNXRUNTIME_INCLUDE_DIRS ${ONNXRUNTIME_INCLUDE_DIR})
message(STATUS "找到 ONNX Runtime: ${ONNXRUNTIME_LIB}")
add_compile_definitions(HAVE_ONNXRUNTIME)
else()
message(WARNING "未找到 ONNX Runtime推理功能将使用占位实现")
endif()
# ----------------------------------------------------------------------------
# PortAudio
# ----------------------------------------------------------------------------
set(PORTAUDIO_ROOT "${THIRD_PARTY_DIR}/portaudio")
find_library(PORTAUDIO_LIB
NAMES portaudio libportaudio
PATHS "${PORTAUDIO_ROOT}/lib"
NO_DEFAULT_PATH
)
find_path(PORTAUDIO_INCLUDE_DIR
NAMES portaudio.h
PATHS "${PORTAUDIO_ROOT}/include"
NO_DEFAULT_PATH
)
if(PORTAUDIO_LIB AND PORTAUDIO_INCLUDE_DIR)
set(PORTAUDIO_LIBRARIES ${PORTAUDIO_LIB})
set(PORTAUDIO_INCLUDE_DIRS ${PORTAUDIO_INCLUDE_DIR})
message(STATUS "找到 PortAudio: ${PORTAUDIO_LIB}")
add_compile_definitions(HAVE_PORTAUDIO)
else()
message(WARNING "未找到 PortAudio音频采集功能将使用占位实现")
endif()
# ----------------------------------------------------------------------------
# dr_libs (header-only)
# ----------------------------------------------------------------------------
set(DR_LIBS_INCLUDE_DIR "${THIRD_PARTY_DIR}/dr_libs")
if(EXISTS "${DR_LIBS_INCLUDE_DIR}/dr_wav.h")
message(STATUS "找到 dr_libs: ${DR_LIBS_INCLUDE_DIR}")
add_compile_definitions(HAVE_DR_LIBS)
else()
message(WARNING "未找到 dr_libs 头文件")
endif()
# ----------------------------------------------------------------------------
# nlohmann/json (header-only)
# ----------------------------------------------------------------------------
set(NLOHMANN_JSON_INCLUDE_DIR "${THIRD_PARTY_DIR}/nlohmann_json")

View File

@ -0,0 +1,29 @@
{
"version": 1,
"stt": {
"model_path": "",
"model_type": "whisper",
"device": "cpu",
"num_threads": 4,
"sample_rate": 16000,
"language": "zh",
"streaming": true,
"beam_size": 5,
"temperature": 0.0
},
"audio": {
"input_device": -1,
"buffer_size_ms": 20,
"chunk_duration_ms": 3000,
"padding_ms": 500
},
"ui": {
"theme": "light",
"font_size": 14,
"show_waveform": true,
"show_confidence": true
},
"shortcuts": {
"toggle_recording": "Ctrl+Space"
}
}

0
models/.gitkeep Normal file
View File

11
run.sh Executable file
View File

@ -0,0 +1,11 @@
#!/bin/bash
# Impress Voice Input 启动脚本
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
BUILD_DIR="${SCRIPT_DIR}/build"
# 设置库路径
export LD_LIBRARY_PATH="${SCRIPT_DIR}/third_party/onnxruntime/lib:${SCRIPT_DIR}/third_party/portaudio/lib:${LD_LIBRARY_PATH}"
# 运行
exec "${BUILD_DIR}/impress_voice_input" "$@"

28
src/app/application.cpp Normal file
View File

@ -0,0 +1,28 @@
#include "application.h"
#include "config_manager.h"
#include "utils/logger.h"
#include <QFile>
static const char* const kTag = "Application";
namespace impress {
Application::Application(int& argc, char** argv)
: QApplication(argc, argv)
{
LOG_INFO(kTag, "Impress Voice Input 启动");
configManager_ = std::make_unique<ConfigManager>(this);
configManager_->loadDefaults();
}
Application::~Application() {
LOG_INFO(kTag, "应用退出");
Logger::shutdown();
}
ConfigManager* Application::configManager() const {
return configManager_.get();
}
} // namespace impress

26
src/app/application.h Normal file
View File

@ -0,0 +1,26 @@
#pragma once
#include <QApplication>
#include <memory>
namespace impress {
class ConfigManager;
/**
* @brief
*/
class Application : public QApplication {
Q_OBJECT
public:
Application(int& argc, char** argv);
~Application() override;
/** @brief 获取全局配置管理器 */
ConfigManager* configManager() const;
private:
std::unique_ptr<ConfigManager> configManager_;
};
} // namespace impress

145
src/app/config_manager.cpp Normal file
View File

@ -0,0 +1,145 @@
#include "config_manager.h"
#include "utils/logger.h"
#include <QFile>
#include <QJsonDocument>
#include <QJsonObject>
#include <QStandardPaths>
#include <QDir>
static const char* const kTag = "ConfigManager";
namespace impress {
ConfigManager::ConfigManager(QObject* parent)
: QObject(parent)
{}
bool ConfigManager::load(const QString& path) {
QMutexLocker locker(&mutex_);
QFile file(path);
if (!file.open(QIODevice::ReadOnly)) {
LOG_ERROR(kTag, QString("无法打开配置文件: %1").arg(path));
return false;
}
QJsonParseError error;
QJsonDocument doc = QJsonDocument::fromJson(file.readAll(), &error);
if (error.error != QJsonParseError::NoError) {
LOG_ERROR(kTag, QString("JSON 解析错误: %1").arg(error.errorString()));
return false;
}
config_ = doc.object().toVariantMap();
configPath_ = path;
LOG_INFO(kTag, QString("配置已加载: %1").arg(path));
return true;
}
bool ConfigManager::save() {
if (configPath_.isEmpty()) {
// 自动生成默认路径
QString appDataDir = QStandardPaths::writableLocation(
QStandardPaths::AppDataLocation);
QDir().mkpath(appDataDir);
configPath_ = appDataDir + "/config.json";
}
return saveAs(configPath_);
}
bool ConfigManager::saveAs(const QString& path) {
QMutexLocker locker(&mutex_);
QFile file(path);
if (!file.open(QIODevice::WriteOnly)) {
LOG_ERROR(kTag, QString("无法写入配置文件: %1").arg(path));
return false;
}
QJsonDocument doc(QJsonObject::fromVariantMap(config_));
file.write(doc.toJson(QJsonDocument::Indented));
configPath_ = path;
LOG_INFO(kTag, QString("配置已保存: %1").arg(path));
return true;
}
void ConfigManager::loadDefaults() {
QMutexLocker locker(&mutex_);
config_ = QVariantMap{
{"stt", QVariantMap{
{"model_path", ""},
{"model_type", "whisper"},
{"device", "cpu"},
{"num_threads", 4},
{"sample_rate", 16000},
{"language", "zh"},
{"streaming", true},
{"beam_size", 5},
{"temperature", 0.0}
}},
{"audio", QVariantMap{
{"input_device", -1},
{"buffer_size_ms", 20},
{"chunk_duration_ms", 3000},
{"padding_ms", 500}
}},
{"ui", QVariantMap{
{"theme", "light"},
{"font_size", 14},
{"show_waveform", true},
{"show_confidence", true}
}},
{"shortcuts", QVariantMap{
{"toggle_recording", "Ctrl+Space"}
}}
};
emit configChanged();
}
QVariant ConfigManager::get(const QString& key, const QVariant& defaultValue) const {
QMutexLocker locker(&mutex_);
return getValue(config_, key.split('.'), 0, defaultValue);
}
QVariant ConfigManager::getValue(const QVariantMap& map, const QStringList& parts,
int index, const QVariant& defaultValue)
{
if (index >= parts.size() || !map.contains(parts[index])) {
return defaultValue;
}
if (index == parts.size() - 1) {
return map[parts[index]];
}
auto childMap = map[parts[index]].toMap();
return getValue(childMap, parts, index + 1, defaultValue);
}
void ConfigManager::set(const QString& key, const QVariant& value) {
QMutexLocker locker(&mutex_);
setValue(config_, key.split('.'), 0, value);
emit configChanged();
}
void ConfigManager::setValue(QVariantMap& map, const QStringList& parts,
int index, const QVariant& value)
{
if (index >= parts.size()) return;
if (index == parts.size() - 1) {
map.insert(parts[index], value);
return;
}
if (!map.contains(parts[index]) || !map[parts[index]].canConvert<QVariantMap>()) {
map.insert(parts[index], QVariantMap());
}
auto childMap = map[parts[index]].toMap();
setValue(childMap, parts, index + 1, value);
map[parts[index]] = childMap;
}
void ConfigManager::resetToDefaults() {
loadDefaults();
LOG_INFO(kTag, "配置已重置为默认值");
}
} // namespace impress

60
src/app/config_manager.h Normal file
View File

@ -0,0 +1,60 @@
#pragma once
#include <QObject>
#include <QVariantMap>
#include <QMutex>
namespace impress {
/**
* @brief
*
* JSON
* 线线
* save()
*/
class ConfigManager : public QObject {
Q_OBJECT
public:
explicit ConfigManager(QObject* parent = nullptr);
/** @brief 从文件加载配置,并记录路径用于后续 save() */
bool load(const QString& path);
/** @brief 保存配置到上次加载/保存的路径 */
bool save();
/** @brief 保存配置到指定文件 */
bool saveAs(const QString& path);
/** @brief 使用默认配置初始化 */
void loadDefaults();
/** @brief 获取配置值(支持点号路径,如 "stt.model_path" */
QVariant get(const QString& key, const QVariant& defaultValue = {}) const;
/** @brief 设置配置值 */
void set(const QString& key, const QVariant& value);
/** @brief 重置为默认配置 */
void resetToDefaults();
/** @brief 当前配置文件路径 */
QString configPath() const { return configPath_; }
signals:
void configChanged();
private:
/** @brief 递归设置嵌套值 */
static void setValue(QVariantMap& map, const QStringList& parts, int index, const QVariant& value);
/** @brief 递归获取嵌套值 */
static QVariant getValue(const QVariantMap& map, const QStringList& parts, int index, const QVariant& defaultValue);
QString configPath_;
mutable QMutex mutex_;
QVariantMap config_;
};
} // namespace impress

137
src/audio/audio_capture.cpp Normal file
View File

@ -0,0 +1,137 @@
#include "audio_capture.h"
#include "utils/logger.h"
#ifdef HAVE_PORTAUDIO
#include <portaudio.h>
#endif
static const char* const kTag = "AudioCapture";
namespace impress {
struct AudioCapture::Impl {
#ifdef HAVE_PORTAUDIO
PaStream* stream = nullptr;
#endif
AudioCapture* owner = nullptr;
int sampleRate = 16000;
};
static int paCallback(const void* input, void* /*output*/,
unsigned long frameCount,
const PaStreamCallbackTimeInfo* /*timeInfo*/,
PaStreamCallbackFlags /*statusFlags*/,
void* userData)
{
#ifdef HAVE_PORTAUDIO
auto* capture = static_cast<AudioCapture*>(userData);
const float* samples = static_cast<const float*>(input);
std::vector<float> data(samples, samples + frameCount);
emit capture->audioDataReady(data, 16000);
return paContinue;
#else
(void)input; (void)frameCount; (void)userData;
return 0;
#endif
}
AudioCapture::AudioCapture(QObject* parent)
: QObject(parent)
, impl_(std::make_unique<Impl>())
{
impl_->owner = this;
}
AudioCapture::~AudioCapture() {
stop();
}
QStringList AudioCapture::getDeviceList() {
QStringList devices;
devices << "默认设备";
#ifdef HAVE_PORTAUDIO
Pa_Terminate(); // 确保未初始化
if (Pa_Initialize() == paNoError) {
int count = Pa_GetDeviceCount();
for (int i = 0; i < count; ++i) {
const PaDeviceInfo* info = Pa_GetDeviceInfo(i);
if (info && info->maxInputChannels > 0) {
devices << QString("%1 (CH:%2, SR:%3)").arg(
info->name).arg(info->maxInputChannels).arg(info->defaultSampleRate);
}
}
Pa_Terminate();
}
#endif
return devices;
}
bool AudioCapture::start(int deviceIndex, int sampleRate, int bufferSizeMs) {
if (running_) {
LOG_WARNING(kTag, "已在运行中");
return false;
}
#ifdef HAVE_PORTAUDIO
if (Pa_Initialize() != paNoError) {
LOG_ERROR(kTag, "PortAudio 初始化失败");
return false;
}
PaStreamParameters inputParams{};
inputParams.device = deviceIndex < 0 ? Pa_GetDefaultInputDevice() : deviceIndex;
inputParams.channelCount = 1;
inputParams.sampleFormat = paFloat32 | paNonInterleaved;
inputParams.suggestedLatency =
Pa_GetDeviceInfo(inputParams.device)->defaultLowInputLatency;
PaError err = Pa_OpenStream(
&impl_->stream, &inputParams, nullptr, sampleRate,
static_cast<unsigned long>(sampleRate * bufferSizeMs / 1000),
paClipOff, paCallback, this);
if (err != paNoError || !impl_->stream) {
LOG_ERROR(kTag, QString("打开音频流失败: %1").arg(Pa_GetErrorText(err)));
Pa_Terminate();
return false;
}
err = Pa_StartStream(impl_->stream);
if (err != paNoError) {
LOG_ERROR(kTag, QString("启动音频流失败: %1").arg(Pa_GetErrorText(err)));
Pa_CloseStream(impl_->stream);
impl_->stream = nullptr;
Pa_Terminate();
return false;
}
impl_->sampleRate = sampleRate;
running_ = true;
emit runningChanged(true);
LOG_INFO(kTag, QString("音频采集已启动 (设备: %1, 采样率: %2)").arg(deviceIndex).arg(sampleRate));
return true;
#else
LOG_ERROR(kTag, "PortAudio 未编译启用");
emit error("PortAudio 未编译启用");
return false;
#endif
}
void AudioCapture::stop() {
if (!running_) return;
#ifdef HAVE_PORTAUDIO
if (impl_->stream) {
Pa_StopStream(impl_->stream);
Pa_CloseStream(impl_->stream);
impl_->stream = nullptr;
}
Pa_Terminate();
#endif
running_ = false;
emit runningChanged(false);
LOG_INFO(kTag, "音频采集已停止");
}
} // namespace impress

53
src/audio/audio_capture.h Normal file
View File

@ -0,0 +1,53 @@
#pragma once
#include <QObject>
#include <QString>
#include <QVector>
#include <vector>
#include <memory>
namespace impress {
/**
* @brief
*
* PortAudio
* 线
*/
class AudioCapture : public QObject {
Q_OBJECT
public:
explicit AudioCapture(QObject* parent = nullptr);
~AudioCapture() override;
/** @brief 获取可用输入设备列表 */
static QStringList getDeviceList();
/** @brief 开始采集 */
bool start(int deviceIndex = -1,
int sampleRate = 16000,
int bufferSizeMs = 20);
/** @brief 停止采集 */
void stop();
/** @brief 是否正在采集 */
bool isRunning() const { return running_; }
signals:
/** @brief 输出音频数据(归一化 PCM float */
void audioDataReady(const std::vector<float>& samples, int sampleRate);
/** @brief 采集错误 */
void error(const QString& message);
/** @brief 采集状态变化 */
void runningChanged(bool running);
private:
struct Impl;
std::unique_ptr<Impl> impl_;
bool running_ = false;
};
} // namespace impress

View File

@ -0,0 +1,91 @@
#include "audio_decoder.h"
#include "utils/logger.h"
#ifdef HAVE_DR_LIBS
#define DR_WAV_IMPLEMENTATION
#define DR_MP3_IMPLEMENTATION
#define DR_FLAC_IMPLEMENTATION
#include <dr_wav.h>
#include <dr_mp3.h>
#include <dr_flac.h>
#endif
#include <QFileInfo>
static const char* const kTag = "AudioDecoder";
namespace impress {
AudioDecoder::AudioDecoder(QObject* parent)
: QObject(parent)
{}
AudioDecoder::~AudioDecoder() = default;
QStringList AudioDecoder::supportedFormats() {
return {"wav", "mp3", "flac", "ogg", "aac"};
}
bool AudioDecoder::decode(const QString& filePath) {
QFileInfo info(filePath);
QString ext = info.suffix().toLower();
samples_.clear();
sampleRate_ = 0;
channels_ = 0;
if (ext != "wav") {
LOG_ERROR(kTag, QString("暂不支持格式: %1").arg(ext));
emit error(QString("暂不支持格式: %1").arg(ext));
return false;
}
#ifdef HAVE_DR_LIBS
drwav wav;
if (!drwav_init_file(&wav, filePath.toUtf8().constData(), nullptr)) {
LOG_ERROR(kTag, QString("无法打开 WAV 文件: %1").arg(filePath));
emit error("无法打开音频文件");
return false;
}
channels_ = wav.channels;
sampleRate_ = wav.sampleRate;
std::vector<short> pcm16(wav.totalPCMFrameCount * wav.channels);
drwav_read_pcm_frames_s16(&wav, wav.totalPCMFrameCount, pcm16.data());
drwav_uninit(&wav);
// 多声道混合为单声道
if (channels_ == 1) {
samples_ = std::vector<float>(pcm16.begin(), pcm16.end());
// 归一化
for (auto& s : samples_) s /= 32768.0f;
} else {
samples_.resize(wav.totalPCMFrameCount);
for (size_t i = 0; i < wav.totalPCMFrameCount; ++i) {
float sum = 0.0f;
for (int ch = 0; ch < channels_; ++ch) {
sum += pcm16[static_cast<size_t>(i) * static_cast<size_t>(channels_) + static_cast<size_t>(ch)];
}
samples_[i] = sum / (channels_ * 32768.0f);
}
}
emit progress(1.0);
emit decoded(filePath);
LOG_INFO(kTag, QString("文件解码完成: %1 (%2 样本, %3Hz)")
.arg(filePath).arg(samples_.size()).arg(sampleRate_));
return true;
#else
LOG_ERROR(kTag, "dr_libs 未编译启用");
emit error("音频解码库未启用");
return false;
#endif
}
double AudioDecoder::duration() const {
if (sampleRate_ == 0) return 0.0;
return static_cast<double>(samples_.size()) / sampleRate_;
}
} // namespace impress

57
src/audio/audio_decoder.h Normal file
View File

@ -0,0 +1,57 @@
#pragma once
#include <QObject>
#include <QString>
#include <QStringList>
#include <vector>
#include <memory>
namespace impress {
/**
* @brief
*
* WAV, MP3, FLAC, OGG, AAC
* dr_libs (dr_wav, dr_mp3, dr_flac)
*/
class AudioDecoder : public QObject {
Q_OBJECT
public:
explicit AudioDecoder(QObject* parent = nullptr);
~AudioDecoder() override;
/** @brief 支持的格式 */
static QStringList supportedFormats();
/** @brief 解码音频文件 */
bool decode(const QString& filePath);
/** @brief 获取解码后的 PCM 数据 */
const std::vector<float>& samples() const { return samples_; }
/** @brief 采样率 */
int sampleRate() const { return sampleRate_; }
/** @brief 声道数 */
int channels() const { return channels_; }
/** @brief 时长(秒) */
double duration() const;
signals:
/** @brief 解码进度 (0.0 - 1.0) */
void progress(double progress);
/** @brief 解码完成 */
void decoded(const QString& filePath);
/** @brief 解码错误 */
void error(const QString& message);
private:
std::vector<float> samples_;
int sampleRate_ = 0;
int channels_ = 0;
};
} // namespace impress

View File

@ -0,0 +1,52 @@
#include "audio_ring_buffer.h"
#include <algorithm>
#include <cstring>
namespace impress {
AudioRingBuffer::AudioRingBuffer(size_t capacity)
: buffer_(new float[capacity]())
, capacity_(capacity)
{}
AudioRingBuffer::~AudioRingBuffer() {
delete[] buffer_;
}
size_t AudioRingBuffer::write(const float* data, size_t count) {
size_t avail = capacity_ - this->available();
size_t toWrite = std::min(count, avail);
for (size_t i = 0; i < toWrite; ++i) {
buffer_[writePos_] = data[i];
writePos_ = (writePos_ + 1) % capacity_;
}
return toWrite;
}
size_t AudioRingBuffer::read(float* data, size_t count) {
size_t available = this->available();
size_t toRead = std::min(count, available);
for (size_t i = 0; i < toRead; ++i) {
data[i] = buffer_[readPos_];
readPos_ = (readPos_ + 1) % capacity_;
}
return toRead;
}
size_t AudioRingBuffer::available() const {
if (writePos_ >= readPos_) {
return writePos_ - readPos_;
}
return capacity_ - (readPos_ - writePos_);
}
void AudioRingBuffer::clear() {
readPos_ = 0;
writePos_ = 0;
}
} // namespace impress

View File

@ -0,0 +1,39 @@
#pragma once
#include <QObject>
#include <QVector>
#include <memory>
#include <cstddef>
namespace impress {
/**
* @brief
*
* 线
*/
class AudioRingBuffer {
public:
explicit AudioRingBuffer(size_t capacity);
~AudioRingBuffer();
/** @brief 写入数据(生产端) */
size_t write(const float* data, size_t count);
/** @brief 读取数据(消费端) */
size_t read(float* data, size_t count);
/** @brief 当前可读数据量 */
size_t available() const;
/** @brief 清空缓冲区 */
void clear();
private:
float* buffer_;
size_t capacity_;
size_t readPos_ = 0;
size_t writePos_ = 0;
};
} // namespace impress

View File

@ -0,0 +1,74 @@
#include "audio_processor.h"
#include <cmath>
#include <algorithm>
namespace impress {
AudioProcessor::AudioProcessor(int targetSampleRate)
: targetSampleRate_(targetSampleRate)
{}
std::vector<float> AudioProcessor::resample(const std::vector<float>& input,
int sourceSampleRate)
{
if (sourceSampleRate == targetSampleRate_) {
return input;
}
// TODO: 使用高质量重采样算法 (如 libsamplerate)
// 当前使用简单的线性插值
double ratio = static_cast<double>(targetSampleRate_) / sourceSampleRate;
size_t outputSize = static_cast<size_t>(input.size() * ratio);
std::vector<float> output(outputSize);
for (size_t i = 0; i < outputSize; ++i) {
double srcIndex = i / ratio;
size_t idx = static_cast<size_t>(srcIndex);
double frac = srcIndex - idx;
if (idx + 1 < input.size()) {
output[i] = input[idx] * (1.0 - frac) + input[idx + 1] * frac;
} else {
output[i] = input[idx];
}
}
return output;
}
std::vector<float> AudioProcessor::normalize(const std::vector<short>& pcm16) {
std::vector<float> output(pcm16.size());
const float scale = 1.0f / 32768.0f;
for (size_t i = 0; i < pcm16.size(); ++i) {
output[i] = pcm16[i] * scale;
}
return output;
}
std::vector<float> AudioProcessor::normalizeFloats(const std::vector<float>& input) {
float maxVal = 0.0f;
for (float v : input) {
maxVal = std::max(maxVal, std::abs(v));
}
if (maxVal < 1e-6f) return input;
std::vector<float> output(input.size());
float scale = 1.0f / maxVal;
for (size_t i = 0; i < input.size(); ++i) {
output[i] = input[i] * scale;
}
return output;
}
std::vector<std::vector<float>> AudioProcessor::frame(const std::vector<float>& input,
int frameSize,
int hopSize)
{
std::vector<std::vector<float>> frames;
for (size_t start = 0; start + frameSize <= input.size(); start += hopSize) {
frames.emplace_back(input.begin() + start, input.begin() + start + frameSize);
}
return frames;
}
} // namespace impress

View File

@ -0,0 +1,38 @@
#pragma once
#include <vector>
namespace impress {
/**
* @brief
*
*
*/
class AudioProcessor {
public:
AudioProcessor(int targetSampleRate = 16000);
/** @brief 重采样到目标采样率 */
std::vector<float> resample(const std::vector<float>& input,
int sourceSampleRate);
/** @brief 将 PCM 数据归一化到 [-1, 1] */
static std::vector<float> normalize(const std::vector<short>& pcm16);
/** @brief 将 PCM 浮点数据归一化到 [-1, 1] */
static std::vector<float> normalizeFloats(const std::vector<float>& input);
/** @brief 将音频切分为重叠帧 */
std::vector<std::vector<float>> frame(const std::vector<float>& input,
int frameSize,
int hopSize);
/** @brief 获取目标采样率 */
int targetSampleRate() const { return targetSampleRate_; }
private:
int targetSampleRate_;
};
} // namespace impress

34
src/core/decoder.cpp Normal file
View File

@ -0,0 +1,34 @@
#include "decoder.h"
#include <algorithm>
namespace impress {
std::vector<int> CTCGreedyDecoder::decode(const std::vector<float>& logits,
int vocabSize,
int /*beamSize*/)
{
std::vector<int> tokens;
int prevToken = -1;
for (size_t t = 0; t < logits.size(); t += vocabSize) {
// 贪婪选择最大概率的 token
int bestToken = 0;
float bestScore = logits[t];
for (int v = 1; v < vocabSize; ++v) {
if (logits[t + v] > bestScore) {
bestScore = logits[t + v];
bestToken = v;
}
}
// CTC 去重:跳过连续相同 token 和 blank
if (bestToken != prevToken && bestToken != 0) {
tokens.push_back(bestToken);
}
prevToken = bestToken;
}
return tokens;
}
} // namespace impress

33
src/core/decoder.h Normal file
View File

@ -0,0 +1,33 @@
#pragma once
#include <QString>
#include <vector>
namespace impress {
/**
* @brief
*
* CTC
*/
class Decoder {
public:
virtual ~Decoder() = default;
/** @brief 从 logits 解码为 token IDs */
virtual std::vector<int> decode(const std::vector<float>& logits,
int vocabSize,
int beamSize = 5) = 0;
};
/**
* @brief CTC
*/
class CTCGreedyDecoder : public Decoder {
public:
std::vector<int> decode(const std::vector<float>& logits,
int vocabSize,
int beamSize = 5) override;
};
} // namespace impress

200
src/core/stt_engine.cpp Normal file
View File

@ -0,0 +1,200 @@
#include "stt_engine.h"
#include "utils/logger.h"
#include "utils/timer.h"
#include <QThread>
#include <QFuture>
#include <QtConcurrent>
#include <QMutex>
#include <QMutexLocker>
// ONNX Runtime headers
#ifdef HAVE_ONNXRUNTIME
#include <onnxruntime_cxx_api.h>
#endif
static const char* const kTag = "STTEngine";
namespace impress {
struct STTEngine::Impl {
#ifdef HAVE_ONNXRUNTIME
std::unique_ptr<Ort::Env> env;
std::unique_ptr<Ort::SessionOptions> sessionOptions;
std::unique_ptr<Ort::Session> session;
#endif
QMutex mutex;
/**
* @brief 线
* true false
*/
bool loadInWorker(const QString& modelPath,
const QString& device,
int numThreads,
QString& errorMsg)
{
#ifdef HAVE_ONNXRUNTIME
QMutexLocker locker(&mutex);
try {
auto envPtr = std::make_unique<Ort::Env>(
ORT_LOGGING_LEVEL_WARNING, "impress_voice");
auto optionsPtr = std::make_unique<Ort::SessionOptions>();
optionsPtr->SetIntraOpNumThreads(numThreads);
optionsPtr->SetGraphOptimizationLevel(
GraphOptimizationLevel::ORT_ENABLE_ALL);
if (device == "gpu") {
LOG_WARNING(kTag, "GPU 加速尚未实现,回退到 CPU");
}
LOG_INFO(kTag, QString("正在加载模型: %1 (线程: %2)").arg(modelPath).arg(numThreads));
// ONNX Session 构造函数在 Linux 上使用 const char* 路径
auto sessionPtr = std::make_unique<Ort::Session>(
*envPtr,
modelPath.toUtf8().constData(),
*optionsPtr);
// 全部成功后才替换成员变量
env = std::move(envPtr);
sessionOptions = std::move(optionsPtr);
session = std::move(sessionPtr);
LOG_INFO(kTag, QString("模型加载成功: %1").arg(modelPath));
return true;
} catch (const Ort::Exception& e) {
errorMsg = QString("ONNX 异常: %1").arg(e.what());
LOG_ERROR(kTag, errorMsg);
return false;
} catch (const std::exception& e) {
errorMsg = QString("加载异常: %1").arg(e.what());
LOG_ERROR(kTag, errorMsg);
return false;
}
#else
errorMsg = "ONNX Runtime 未编译启用";
LOG_ERROR(kTag, errorMsg);
return false;
#endif
}
};
STTEngine::STTEngine(QObject* parent)
: QObject(parent)
, impl_(std::make_unique<Impl>())
{}
STTEngine::~STTEngine() {
unloadModel();
}
bool STTEngine::loadModelSync(const QString& modelPath,
const QString& device,
int numThreads)
{
if (loaded_) {
LOG_WARNING(kTag, "模型已加载,先卸载再加载新模型");
unloadModel();
}
QString errorMsg;
bool success = impl_->loadInWorker(modelPath, device, numThreads, errorMsg);
loaded_ = success;
if (success) {
emit modelLoaded(modelPath);
} else {
emit modelLoadError(modelPath, errorMsg);
emit error(errorMsg);
}
return success;
}
void STTEngine::loadModelAsync(const QString& modelPath,
const QString& device,
int numThreads)
{
if (loaded_) {
LOG_WARNING(kTag, "模型已加载,先卸载再加载新模型");
unloadModel();
}
LOG_INFO(kTag, QString("异步加载模型: %1").arg(modelPath));
// 在后台线程中执行加载
QFuture<void> future = QtConcurrent::run([this, modelPath, device, numThreads]() {
QString errorMsg;
bool success = impl_->loadInWorker(modelPath, device, numThreads, errorMsg);
// 回到主线程发送信号
QMetaObject::invokeMethod(this, [this, modelPath, errorMsg, success]() {
loaded_ = success;
if (success) {
emit modelLoaded(modelPath);
} else {
emit modelLoadError(modelPath, errorMsg);
emit error(errorMsg);
}
}, Qt::QueuedConnection);
});
}
void STTEngine::unloadModel() {
QMutexLocker locker(&impl_->mutex);
#ifdef HAVE_ONNXRUNTIME
impl_->session.reset();
impl_->sessionOptions.reset();
impl_->env.reset();
#endif
loaded_ = false;
LOG_INFO(kTag, "模型已卸载");
emit modelUnloaded();
}
bool STTEngine::isLoaded() const {
return loaded_;
}
RecognitionResult STTEngine::infer(const std::vector<float>& samples,
int sampleRate,
bool isStreaming)
{
Timer timer;
RecognitionResult result;
#ifdef HAVE_ONNXRUNTIME
if (!loaded_) {
result.text = "[错误] 模型未加载";
result.latency_ms = timer.elapsedMs();
return result;
}
try {
// 标记未使用的参数,消除编译警告
(void)samples;
(void)sampleRate;
(void)isStreaming;
// TODO: 实现完整的 ONNX 推理流程
// 1. 创建输入 Tensor
// 2. 运行推理
// 3. 解码输出 (CTC / 自回归)
// 4. Tokenizer 解码文本
result.text = "[占位] 推理逻辑待实现";
result.confidence = 0.95f;
result.isFinal = true;
} catch (const std::exception& e) {
result.text = QString("[错误] 推理失败: %1").arg(e.what());
}
#else
result.text = "[占位] ONNX Runtime 未启用,推理逻辑未实现";
#endif
result.latency_ms = timer.elapsedMs();
LOG_DEBUG(kTag, QString("推理耗时: %1 ms").arg(result.latency_ms, 0, 'f', 1));
return result;
}
} // namespace impress

67
src/core/stt_engine.h Normal file
View File

@ -0,0 +1,67 @@
#pragma once
#include <QObject>
#include <QString>
#include <vector>
#include <memory>
namespace impress {
struct RecognitionResult {
QString text;
float confidence = 0.0f;
double latency_ms = 0.0;
bool isFinal = false;
};
/**
* @brief STT
*
* ONNX Runtime
* 线 UI
*/
class STTEngine : public QObject {
Q_OBJECT
public:
explicit STTEngine(QObject* parent = nullptr);
~STTEngine() override;
/** @brief 同步加载模型(阻塞,不推荐在 UI 线程调用) */
bool loadModelSync(const QString& modelPath,
const QString& device = "cpu",
int numThreads = 4);
/** @brief 异步加载模型(后台线程,不阻塞 UI */
void loadModelAsync(const QString& modelPath,
const QString& device = "cpu",
int numThreads = 4);
/** @brief 释放模型 */
void unloadModel();
/** @brief 是否已加载模型 */
bool isLoaded() const;
/**
* @brief
* @param samples PCM [-1, 1]
* @param sampleRate
* @param isStreaming
*/
RecognitionResult infer(const std::vector<float>& samples,
int sampleRate,
bool isStreaming = true);
signals:
void modelLoaded(const QString& modelPath);
void modelLoadError(const QString& modelPath, const QString& error);
void modelUnloaded();
void error(const QString& message);
private:
struct Impl;
std::unique_ptr<Impl> impl_;
bool loaded_ = false;
};
} // namespace impress

46
src/core/tokenizer.cpp Normal file
View File

@ -0,0 +1,46 @@
#include "tokenizer.h"
#include "utils/logger.h"
#include <QFile>
#include <QTextStream>
static const char* const kTag = "Tokenizer";
namespace impress {
Tokenizer::Tokenizer() = default;
bool Tokenizer::loadVocabulary(const QString& vocabPath) {
QFile file(vocabPath);
if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) {
LOG_ERROR(kTag, QString("无法打开词表文件: %1").arg(vocabPath));
return false;
}
QTextStream stream(&file);
stream.setEncoding(QStringConverter::Utf8);
vocabulary_.clear();
while (!stream.atEnd()) {
QString line = stream.readLine().trimmed();
if (!line.isEmpty()) {
vocabulary_.push_back(line);
}
}
LOG_INFO(kTag, QString("词表已加载: %1 个词条").arg(vocabulary_.size()));
return true;
}
QString Tokenizer::decode(const std::vector<int>& tokens) const {
QString result;
for (int token : tokens) {
if (token >= 0 && token < static_cast<int>(vocabulary_.size())) {
result += vocabulary_[token];
} else {
result += QString("<unk:%1>").arg(token);
}
}
return result;
}
} // namespace impress

31
src/core/tokenizer.h Normal file
View File

@ -0,0 +1,31 @@
#pragma once
#include <QString>
#include <vector>
namespace impress {
/**
* @brief Tokenizer
*
* token IDs
* BPE (Byte Pair Encoding)
*/
class Tokenizer {
public:
Tokenizer();
/** @brief 加载词表文件 */
bool loadVocabulary(const QString& vocabPath);
/** @brief 将 token IDs 解码为文本 */
QString decode(const std::vector<int>& tokens) const;
/** @brief 是否已加载词表 */
bool isLoaded() const { return !vocabulary_.empty(); }
private:
std::vector<QString> vocabulary_; // token_id -> token string
};
} // namespace impress

64
src/main.cpp Normal file
View File

@ -0,0 +1,64 @@
#include "app/application.h"
#include "ui/main_window.h"
#include "app/config_manager.h"
#include "utils/logger.h"
#include <QFile>
#include <QDir>
#include <QStandardPaths>
#include <QCommandLineParser>
int main(int argc, char* argv[])
{
impress::Application app(argc, argv);
app.setApplicationName("Impress Voice Input");
app.setApplicationVersion("0.1.0");
app.setOrganizationName("Impress");
// 初始化日志文件
QString logDir = QStandardPaths::writableLocation(
QStandardPaths::AppDataLocation);
QDir().mkpath(logDir);
QString logFilePath = logDir + "/app.log";
impress::Logger::init(logFilePath);
LOG_INFO("Main", QString("应用启动,日志文件: %1").arg(logFilePath));
// 命令行参数
QCommandLineParser parser;
parser.setApplicationDescription("基于 ONNX 的实时语音转文本输入法");
parser.addHelpOption();
parser.addVersionOption();
parser.addOptions({
{{"c", "config"}, "指定配置文件路径", "path"},
{{"m", "model"}, "指定模型路径", "path"},
});
parser.process(app);
// 加载用户配置
auto* configManager = app.configManager();
QString configPath = parser.value("config");
if (configPath.isEmpty()) {
// 使用默认配置目录
configPath = logDir + "/config.json";
}
if (QFile::exists(configPath)) {
configManager->load(configPath);
LOG_INFO("Main", QString("已加载配置: %1").arg(configPath));
} else {
LOG_INFO("Main", "使用默认配置");
}
// 命令行覆盖模型路径
QString modelPath = parser.value("model");
if (!modelPath.isEmpty()) {
configManager->set("stt.model_path", modelPath);
}
// 创建并显示主窗口
impress::MainWindow mainWindow(configManager);
mainWindow.show();
return app.exec();
}

View File

@ -0,0 +1,241 @@
#include "file_transcribe_page.h"
#include "core/stt_engine.h"
#include "audio/audio_decoder.h"
#include "app/config_manager.h"
#include "utils/logger.h"
#include "utils/string_utils.h"
#include <QVBoxLayout>
#include <QHBoxLayout>
#include <QGroupBox>
#include <QLabel>
#include <QPushButton>
#include <QListWidget>
#include <QListWidgetItem>
#include <QTextEdit>
#include <QProgressBar>
#include <QComboBox>
#include <QFileDialog>
#include <QMessageBox>
#include <QDateTime>
#include <QFileInfo>
static const char* const kTag = "FileTranscribePage";
namespace impress {
FileTranscribePage::FileTranscribePage(ConfigManager* configManager, QWidget* parent)
: QWidget(parent)
, configManager_(configManager)
, sttEngine_(new STTEngine(this))
, audioDecoder_(new AudioDecoder(this))
{
setupUI();
}
FileTranscribePage::~FileTranscribePage() = default;
void FileTranscribePage::setupUI() {
auto* mainLayout = new QVBoxLayout(this);
// 文件队列
auto* queueGroup = new QGroupBox("文件队列", this);
auto* queueLayout = new QVBoxLayout(queueGroup);
fileList_ = new QListWidget(this);
fileList_->setMinimumHeight(120);
queueLayout->addWidget(fileList_);
auto* btnLayout = new QHBoxLayout();
addBtn_ = new QPushButton("添加文件", this);
connect(addBtn_, &QPushButton::clicked, this, &FileTranscribePage::onAddFiles);
btnLayout->addWidget(addBtn_);
clearBtn_ = new QPushButton("清空队列", this);
connect(clearBtn_, &QPushButton::clicked, this, &FileTranscribePage::onClearQueue);
btnLayout->addWidget(clearBtn_);
btnLayout->addStretch();
queueLayout->addLayout(btnLayout);
mainLayout->addWidget(queueGroup);
// 控制栏
auto* controlLayout = new QHBoxLayout();
startBtn_ = new QPushButton("开始转写", this);
startBtn_->setStyleSheet("QPushButton { font-weight: bold; padding: 8px 16px; }");
connect(startBtn_, &QPushButton::clicked, this, &FileTranscribePage::onStartTranscribe);
controlLayout->addWidget(startBtn_);
stopBtn_ = new QPushButton("停止", this);
stopBtn_->setEnabled(false);
connect(stopBtn_, &QPushButton::clicked, this, &FileTranscribePage::onStopTranscribe);
controlLayout->addWidget(stopBtn_);
controlLayout->addWidget(new QLabel("导出格式:", this));
exportFormat_ = new QComboBox(this);
exportFormat_->addItems({"TXT", "SRT (字幕)", "JSON"});
controlLayout->addWidget(exportFormat_);
exportBtn_ = new QPushButton("导出结果", this);
connect(exportBtn_, &QPushButton::clicked, this, &FileTranscribePage::onExportResult);
controlLayout->addWidget(exportBtn_);
mainLayout->addLayout(controlLayout);
// 进度
progressBar_ = new QProgressBar(this);
progressBar_->setVisible(false);
mainLayout->addWidget(progressBar_);
statusLabel_ = new QLabel("就绪", this);
statusLabel_->setStyleSheet("color: gray;");
mainLayout->addWidget(statusLabel_);
// 结果
auto* resultGroup = new QGroupBox("转写结果", this);
auto* resultLayout = new QVBoxLayout(resultGroup);
resultText_ = new QTextEdit(this);
resultText_->setReadOnly(true);
resultText_->setFont(QFont("Monospace", 11));
resultLayout->addWidget(resultText_);
mainLayout->addWidget(resultGroup);
}
void FileTranscribePage::updateUIState() {
startBtn_->setEnabled(!isTranscribing_ && !tasks_.isEmpty());
stopBtn_->setEnabled(isTranscribing_);
addBtn_->setEnabled(!isTranscribing_);
clearBtn_->setEnabled(!isTranscribing_);
exportBtn_->setEnabled(!isTranscribing_);
}
void FileTranscribePage::onAddFiles() {
QStringList formats;
for (const auto& fmt : AudioDecoder::supportedFormats()) {
formats << QString("*.%1").arg(fmt);
}
QString filter = QString("音频文件 (%1)").arg(formats.join(" "));
QStringList files = QFileDialog::getOpenFileNames(this, "选择音频文件", "", filter);
for (const auto& file : files) {
TranscribeTask task;
task.filePath = file;
task.status = "等待中";
tasks_.append(task);
auto* item = new QListWidgetItem(
QString("%1 — 等待中").arg(QFileInfo(file).fileName()));
fileList_->addItem(item);
}
}
void FileTranscribePage::onClearQueue() {
tasks_.clear();
fileList_->clear();
statusLabel_->setText("队列已清空");
}
void FileTranscribePage::onStartTranscribe() {
if (tasks_.isEmpty()) {
QMessageBox::information(this, "提示", "请先添加音频文件");
return;
}
QString modelPath = configManager_->get("stt.model_path").toString();
if (modelPath.isEmpty()) {
QMessageBox::warning(this, "提示", "请先在配置页面设置模型路径");
return;
}
if (!sttEngine_->loadModelSync(modelPath,
configManager_->get("stt.device").toString(),
configManager_->get("stt.num_threads").toInt()))
{
QMessageBox::critical(this, "错误", "模型加载失败");
return;
}
isTranscribing_ = true;
currentTaskIndex_ = 0;
progressBar_->setVisible(true);
updateUIState();
processNextFile();
}
void FileTranscribePage::onStopTranscribe() {
isTranscribing_ = false;
progressBar_->setVisible(false);
statusLabel_->setText("已停止");
updateUIState();
}
void FileTranscribePage::processNextFile() {
if (!isTranscribing_ || currentTaskIndex_ >= tasks_.size()) {
isTranscribing_ = false;
statusLabel_->setText("全部完成");
progressBar_->setVisible(false);
updateUIState();
return;
}
auto& task = tasks_[currentTaskIndex_];
task.status = "处理中";
statusLabel_->setText(QString("正在处理: %1").arg(QFileInfo(task.filePath).fileName()));
// TODO: 在后台线程中执行解码和推理
// 当前为占位实现
if (audioDecoder_->decode(task.filePath)) {
const auto& samples = audioDecoder_->samples();
int sampleRate = audioDecoder_->sampleRate();
auto result = sttEngine_->infer(samples, sampleRate, false);
task.result = result.text;
task.status = "完成";
task.progress = 1.0;
resultText_->append(
QString("=== %1 ===\n%2\n").arg(
QFileInfo(task.filePath).fileName(), result.text));
} else {
task.status = "失败";
}
// 更新列表项
auto* item = fileList_->item(currentTaskIndex_);
if (item) {
item->setText(QString("%1 — %2")
.arg(QFileInfo(task.filePath).fileName(), task.status));
}
currentTaskIndex_++;
progressBar_->setValue(
static_cast<int>(currentTaskIndex_ * 100.0 / tasks_.size()));
// 继续下一个
if (isTranscribing_) {
processNextFile();
}
}
void FileTranscribePage::onExportResult() {
if (resultText_->toPlainText().isEmpty()) {
QMessageBox::information(this, "提示", "没有可导出的结果");
return;
}
QString format = exportFormat_->currentText();
QString ext = (format == "TXT") ? "txt" : (format == "JSON") ? "json" : "srt";
QString filter = QString("%1 文件 (*.%2)").arg(format, ext);
QString path = QFileDialog::getSaveFileName(this, "导出结果", "", filter);
if (path.isEmpty()) return;
QFile file(path);
if (file.open(QIODevice::WriteOnly)) {
file.write(resultText_->toPlainText().toUtf8());
file.close();
statusLabel_->setText(QString("已导出: %1").arg(path));
}
}
} // namespace impress

View File

@ -0,0 +1,70 @@
#pragma once
#include <QWidget>
#include <memory>
class QLabel;
class QPushButton;
class QTextEdit;
class QProgressBar;
class QListWidget;
class QComboBox;
namespace impress {
class ConfigManager;
class STTEngine;
class AudioDecoder;
struct TranscribeTask {
QString filePath;
QString status; // "等待中", "处理中", "完成", "失败"
QString result;
double progress = 0.0;
};
/**
* @brief
*
* /
*/
class FileTranscribePage : public QWidget {
Q_OBJECT
public:
explicit FileTranscribePage(ConfigManager* configManager, QWidget* parent = nullptr);
~FileTranscribePage() override;
private slots:
void onAddFiles();
void onClearQueue();
void onStartTranscribe();
void onStopTranscribe();
void onExportResult();
private:
void setupUI();
void updateUIState();
void processNextFile();
ConfigManager* configManager_;
STTEngine* sttEngine_;
AudioDecoder* audioDecoder_;
// UI 控件
QListWidget* fileList_;
QPushButton* addBtn_;
QPushButton* clearBtn_;
QPushButton* startBtn_;
QPushButton* stopBtn_;
QPushButton* exportBtn_;
QTextEdit* resultText_;
QProgressBar* progressBar_;
QLabel* statusLabel_;
QComboBox* exportFormat_;
bool isTranscribing_ = false;
QList<TranscribeTask> tasks_;
int currentTaskIndex_ = -1;
};
} // namespace impress

86
src/ui/main_window.cpp Normal file
View File

@ -0,0 +1,86 @@
#include "main_window.h"
#include "stt_test_page.h"
#include "file_transcribe_page.h"
#include "settings_page.h"
#include "app/config_manager.h"
#include "utils/logger.h"
#include <QMenuBar>
#include <QMenu>
#include <QAction>
#include <QFile>
#include <QMessageBox>
static const char* const kTag = "MainWindow";
namespace impress {
MainWindow::MainWindow(ConfigManager* configManager, QWidget* parent)
: QMainWindow(parent)
, configManager_(configManager)
{
setWindowTitle("Impress Voice Input");
resize(1000, 700);
setupUI();
setupMenuBar();
loadStyleSheet();
LOG_INFO(kTag, "主窗口已创建");
}
MainWindow::~MainWindow() = default;
void MainWindow::setupUI() {
tabWidget_ = new QTabWidget(this);
sttPage_ = new STTTestPage(configManager_, tabWidget_);
transcribePage_ = new FileTranscribePage(configManager_, tabWidget_);
settingsPage_ = new SettingsPage(configManager_, tabWidget_);
tabWidget_->addTab(sttPage_, "实时语音识别");
tabWidget_->addTab(transcribePage_, "音频文件转写");
tabWidget_->addTab(settingsPage_, "配置");
setCentralWidget(tabWidget_);
}
void MainWindow::setupMenuBar() {
// 文件菜单
auto* fileMenu = menuBar()->addMenu("文件");
auto* exportAction = fileMenu->addAction("导出结果");
exportAction->setShortcut(QKeySequence("Ctrl+E"));
fileMenu->addSeparator();
auto* exitAction = fileMenu->addAction("退出");
exitAction->setShortcut(QKeySequence("Ctrl+Q"));
connect(exitAction, &QAction::triggered, this, &MainWindow::close);
// 帮助菜单
auto* helpMenu = menuBar()->addMenu("帮助");
auto* aboutAction = helpMenu->addAction("关于");
connect(aboutAction, &QAction::triggered, this, [this] {
QMessageBox::about(this, "关于",
"<h2>Impress Voice Input</h2>"
"<p>基于 ONNX 的实时语音转文本输入法</p>"
"<p>版本: 0.1.0</p>");
});
}
void MainWindow::loadStyleSheet() {
QFile styleFile(":/styles/main.qss");
if (styleFile.open(QIODevice::ReadOnly)) {
setStyleSheet(styleFile.readAll());
styleFile.close();
}
}
void MainWindow::closeEvent(QCloseEvent* event) {
LOG_INFO(kTag, "主窗口关闭");
QMainWindow::closeEvent(event);
}
} // namespace impress

40
src/ui/main_window.h Normal file
View File

@ -0,0 +1,40 @@
#pragma once
#include <QMainWindow>
#include <QTabWidget>
#include <memory>
namespace impress {
class ConfigManager;
class STTTestPage;
class FileTranscribePage;
class SettingsPage;
/**
* @brief
*
* 使 Tab
*/
class MainWindow : public QMainWindow {
Q_OBJECT
public:
explicit MainWindow(ConfigManager* configManager, QWidget* parent = nullptr);
~MainWindow() override;
protected:
void closeEvent(QCloseEvent* event) override;
private:
void setupUI();
void setupMenuBar();
void loadStyleSheet();
ConfigManager* configManager_;
STTTestPage* sttPage_;
FileTranscribePage* transcribePage_;
SettingsPage* settingsPage_;
QTabWidget* tabWidget_;
};
} // namespace impress

View File

@ -0,0 +1,287 @@
/* Impress Voice Input - 全局样式表 */
/* ========== 全局 ========== */
* {
font-family: "PingFang SC", "Microsoft YaHei", "Noto Sans CJK SC", sans-serif;
}
QMainWindow, QWidget {
background-color: #ffffff;
color: #2c3e50;
}
/* ========== QTabWidget ========== */
QTabWidget::pane {
border: 1px solid #dcdfe6;
border-radius: 4px;
background: #fafafa;
}
QTabBar::tab {
background: #f0f2f5;
border: 1px solid #dcdfe6;
border-bottom: none;
border-top-left-radius: 4px;
border-top-right-radius: 4px;
padding: 10px 24px;
margin-right: 2px;
font-size: 14px;
color: #606266;
}
QTabBar::tab:selected {
background: #ffffff;
border-bottom: 2px solid #409eff;
color: #409eff;
font-weight: bold;
}
QTabBar::tab:hover {
color: #409eff;
}
/* ========== QPushButton ========== */
QPushButton {
background-color: #ffffff;
border: 1px solid #dcdfe6;
border-radius: 4px;
padding: 6px 16px;
color: #606266;
font-size: 13px;
}
QPushButton:hover {
background-color: #ecf5ff;
border-color: #b3d8ff;
color: #409eff;
}
QPushButton:pressed {
background-color: #e6f0ff;
}
/* 主要操作按钮 */
QPushButton[objectName="saveBtn"],
QPushButton[text="保存配置"] {
background-color: #409eff;
color: #ffffff;
border: 1px solid #409eff;
}
QPushButton[objectName="saveBtn"]:hover,
QPushButton[text="保存配置"]:hover {
background-color: #66b1ff;
}
/* 危险操作按钮 */
QPushButton[text="停止"],
QPushButton[text="停止录音"] {
background-color: #f56c6c;
color: #ffffff;
border: 1px solid #f56c6c;
}
QPushButton[text="停止"]:hover,
QPushButton[text="停止录音"]:hover {
background-color: #f78989;
}
/* ========== QGroupBox ========== */
QGroupBox {
font-weight: bold;
border: 1px solid #dcdfe6;
border-radius: 6px;
margin-top: 12px;
padding-top: 16px;
}
QGroupBox::title {
subcontrol-origin: margin;
subcontrol-position: top left;
padding: 0 8px;
color: #303133;
}
/* ========== QLabel ========== */
QLabel {
color: #606266;
}
/* ========== QLineEdit ========== */
QLineEdit {
border: 1px solid #dcdfe6;
border-radius: 4px;
padding: 5px 10px;
background: #ffffff;
}
QLineEdit:focus {
border-color: #409eff;
}
QLineEdit[readOnly="true"] {
background-color: #f5f7fa;
}
/* ========== QComboBox ========== */
QComboBox {
border: 1px solid #dcdfe6;
border-radius: 4px;
padding: 5px 10px;
background: #ffffff;
min-width: 120px;
}
QComboBox:hover {
border-color: #c0c4cc;
}
QComboBox:focus {
border-color: #409eff;
}
QComboBox::drop-down {
border: none;
width: 24px;
}
/* ========== QSpinBox / QDoubleSpinBox ========== */
QSpinBox, QDoubleSpinBox {
border: 1px solid #dcdfe6;
border-radius: 4px;
padding: 4px 8px;
background: #ffffff;
}
QSpinBox:focus, QDoubleSpinBox:focus {
border-color: #409eff;
}
/* ========== QProgressBar ========== */
QProgressBar {
border: 1px solid #dcdfe6;
border-radius: 4px;
text-align: center;
height: 20px;
background: #f5f7fa;
}
QProgressBar::chunk {
background: qlineargradient(x1:0, y1:0, x2:1, y2:0,
stop:0 #409eff, stop:1 #66b1ff);
border-radius: 3px;
}
/* ========== QTextEdit ========== */
QTextEdit {
border: 1px solid #dcdfe6;
border-radius: 4px;
padding: 8px;
background: #ffffff;
selection-background-color: #ecf5ff;
}
QTextEdit:focus {
border-color: #409eff;
}
/* ========== QListWidget ========== */
QListWidget {
border: 1px solid #dcdfe6;
border-radius: 4px;
background: #ffffff;
padding: 4px;
}
QListWidget::item {
padding: 8px;
border-radius: 4px;
}
QListWidget::item:selected {
background-color: #ecf5ff;
color: #409eff;
}
QListWidget::item:hover {
background-color: #f5f7fa;
}
/* ========== QCheckBox ========== */
QCheckBox {
spacing: 8px;
}
QCheckBox::indicator {
width: 16px;
height: 16px;
border: 1px solid #dcdfe6;
border-radius: 3px;
}
QCheckBox::indicator:checked {
background-color: #409eff;
border-color: #409eff;
}
/* ========== QMenu / QMenuBar ========== */
QMenuBar {
background-color: #ffffff;
border-bottom: 1px solid #dcdfe6;
padding: 2px;
}
QMenuBar::item:selected {
background-color: #ecf5ff;
color: #409eff;
}
QMenu {
background-color: #ffffff;
border: 1px solid #dcdfe6;
border-radius: 4px;
padding: 4px;
}
QMenu::item:selected {
background-color: #ecf5ff;
color: #409eff;
}
QMenu::separator {
height: 1px;
background-color: #ebeef5;
margin: 4px 0;
}
/* ========== QMessageBox ========== */
QMessageBox {
background-color: #ffffff;
}
QMessageBox QLabel {
color: #303133;
}
/* ========== 滚动条 ========== */
QScrollBar:vertical {
border: none;
background: #f5f7fa;
width: 8px;
border-radius: 4px;
}
QScrollBar::handle:vertical {
background: #c0c4cc;
border-radius: 4px;
min-height: 30px;
}
QScrollBar::handle:vertical:hover {
background: #909399;
}
QScrollBar::add-line:vertical,
QScrollBar::sub-line:vertical {
height: 0;
}

230
src/ui/settings_page.cpp Normal file
View File

@ -0,0 +1,230 @@
#include "settings_page.h"
#include "app/config_manager.h"
#include "utils/logger.h"
#include <QVBoxLayout>
#include <QHBoxLayout>
#include <QFormLayout>
#include <QGroupBox>
#include <QLineEdit>
#include <QPushButton>
#include <QComboBox>
#include <QSpinBox>
#include <QDoubleSpinBox>
#include <QCheckBox>
#include <QLabel>
#include <QFileDialog>
#include <QMessageBox>
static const char* const kTag = "SettingsPage";
namespace impress {
SettingsPage::SettingsPage(ConfigManager* configManager, QWidget* parent)
: QWidget(parent)
, configManager_(configManager)
{
setupUI();
loadFromConfig();
}
SettingsPage::~SettingsPage() = default;
void SettingsPage::setupUI() {
auto* mainLayout = new QVBoxLayout(this);
// STT 设置
auto* sttGroup = new QGroupBox("STT 推理设置", this);
auto* sttLayout = new QFormLayout(sttGroup);
auto* modelRow = new QHBoxLayout();
modelPathEdit_ = new QLineEdit(this);
modelPathEdit_->setPlaceholderText("选择 ONNX 模型文件路径...");
browseBtn_ = new QPushButton("浏览...", this);
connect(browseBtn_, &QPushButton::clicked, this, &SettingsPage::onBrowseModelPath);
modelRow->addWidget(modelPathEdit_);
modelRow->addWidget(browseBtn_);
sttLayout->addRow("模型路径:", modelRow);
modelTypeCombo_ = new QComboBox(this);
modelTypeCombo_->addItems({"whisper", "paraformer", "conformer"});
sttLayout->addRow("模型类型:", modelTypeCombo_);
deviceCombo_ = new QComboBox(this);
deviceCombo_->addItems({"cpu", "gpu"});
sttLayout->addRow("推理设备:", deviceCombo_);
threadSpin_ = new QSpinBox(this);
threadSpin_->setRange(1, 32);
threadSpin_->setValue(4);
sttLayout->addRow("推理线程数:", threadSpin_);
sampleRateSpin_ = new QSpinBox(this);
sampleRateSpin_->setRange(8000, 192000);
sampleRateSpin_->setSingleStep(1000);
sampleRateSpin_->setValue(16000);
sampleRateSpin_->setSuffix(" Hz");
sttLayout->addRow("采样率:", sampleRateSpin_);
languageCombo_ = new QComboBox(this);
languageCombo_->addItems({"zh", "en", "ja", "ko", "fr", "de", "auto"});
sttLayout->addRow("识别语言:", languageCombo_);
streamingCheck_ = new QCheckBox("启用流式识别", this);
streamingCheck_->setChecked(true);
sttLayout->addRow("流式识别:", streamingCheck_);
beamSizeSpin_ = new QSpinBox(this);
beamSizeSpin_->setRange(1, 20);
beamSizeSpin_->setValue(5);
sttLayout->addRow("Beam Size:", beamSizeSpin_);
temperatureSpin_ = new QDoubleSpinBox(this);
temperatureSpin_->setRange(0.0, 2.0);
temperatureSpin_->setSingleStep(0.1);
temperatureSpin_->setValue(0.0);
sttLayout->addRow("温度 (Temperature):", temperatureSpin_);
mainLayout->addWidget(sttGroup);
// 音频设置
auto* audioGroup = new QGroupBox("音频设置", this);
auto* audioLayout = new QFormLayout(audioGroup);
bufferSizeSpin_ = new QSpinBox(this);
bufferSizeSpin_->setRange(10, 100);
bufferSizeSpin_->setValue(20);
bufferSizeSpin_->setSuffix(" ms");
audioLayout->addRow("缓冲区大小:", bufferSizeSpin_);
chunkDurationSpin_ = new QSpinBox(this);
chunkDurationSpin_->setRange(500, 10000);
chunkDurationSpin_->setSingleStep(500);
chunkDurationSpin_->setValue(3000);
chunkDurationSpin_->setSuffix(" ms");
audioLayout->addRow("推理块时长:", chunkDurationSpin_);
paddingSpin_ = new QSpinBox(this);
paddingSpin_->setRange(0, 2000);
paddingSpin_->setSingleStep(100);
paddingSpin_->setValue(500);
paddingSpin_->setSuffix(" ms");
audioLayout->addRow("块间重叠:", paddingSpin_);
mainLayout->addWidget(audioGroup);
// UI 设置
auto* uiGroup = new QGroupBox("界面设置", this);
auto* uiLayout = new QFormLayout(uiGroup);
themeCombo_ = new QComboBox(this);
themeCombo_->addItems({"light", "dark"});
uiLayout->addRow("主题:", themeCombo_);
fontSizeSpin_ = new QSpinBox(this);
fontSizeSpin_->setRange(10, 24);
fontSizeSpin_->setValue(14);
uiLayout->addRow("字体大小:", fontSizeSpin_);
showWaveformCheck_ = new QCheckBox("显示波形", this);
showWaveformCheck_->setChecked(true);
uiLayout->addRow("波形显示:", showWaveformCheck_);
showConfidenceCheck_ = new QCheckBox("显示置信度", this);
showConfidenceCheck_->setChecked(true);
uiLayout->addRow("置信度显示:", showConfidenceCheck_);
mainLayout->addWidget(uiGroup);
// 操作按钮
auto* btnLayout = new QHBoxLayout();
auto* saveBtn = new QPushButton("保存配置", this);
saveBtn->setStyleSheet("QPushButton { font-weight: bold; padding: 8px 16px; }");
connect(saveBtn, &QPushButton::clicked, this, &SettingsPage::onSaveConfig);
btnLayout->addWidget(saveBtn);
auto* resetBtn = new QPushButton("恢复默认", this);
connect(resetBtn, &QPushButton::clicked, this, &SettingsPage::onResetConfig);
btnLayout->addWidget(resetBtn);
btnLayout->addStretch();
statusLabel_ = new QLabel("配置未修改", this);
statusLabel_->setStyleSheet("color: gray;");
btnLayout->addWidget(statusLabel_);
mainLayout->addLayout(btnLayout);
mainLayout->addStretch();
}
void SettingsPage::loadFromConfig() {
modelPathEdit_->setText(configManager_->get("stt.model_path").toString());
modelTypeCombo_->setCurrentText(configManager_->get("stt.model_type").toString());
deviceCombo_->setCurrentText(configManager_->get("stt.device").toString());
threadSpin_->setValue(configManager_->get("stt.num_threads").toInt());
sampleRateSpin_->setValue(configManager_->get("stt.sample_rate").toInt());
languageCombo_->setCurrentText(configManager_->get("stt.language").toString());
streamingCheck_->setChecked(configManager_->get("stt.streaming").toBool());
beamSizeSpin_->setValue(configManager_->get("stt.beam_size").toInt());
temperatureSpin_->setValue(configManager_->get("stt.temperature").toDouble());
bufferSizeSpin_->setValue(configManager_->get("audio.buffer_size_ms").toInt());
chunkDurationSpin_->setValue(configManager_->get("audio.chunk_duration_ms").toInt());
paddingSpin_->setValue(configManager_->get("audio.padding_ms").toInt());
themeCombo_->setCurrentText(configManager_->get("ui.theme").toString());
fontSizeSpin_->setValue(configManager_->get("ui.font_size").toInt());
showWaveformCheck_->setChecked(configManager_->get("ui.show_waveform").toBool());
showConfidenceCheck_->setChecked(configManager_->get("ui.show_confidence").toBool());
}
void SettingsPage::saveToConfig() {
configManager_->set("stt.model_path", modelPathEdit_->text());
configManager_->set("stt.model_type", modelTypeCombo_->currentText());
configManager_->set("stt.device", deviceCombo_->currentText());
configManager_->set("stt.num_threads", threadSpin_->value());
configManager_->set("stt.sample_rate", sampleRateSpin_->value());
configManager_->set("stt.language", languageCombo_->currentText());
configManager_->set("stt.streaming", streamingCheck_->isChecked());
configManager_->set("stt.beam_size", beamSizeSpin_->value());
configManager_->set("stt.temperature", temperatureSpin_->value());
configManager_->set("audio.buffer_size_ms", bufferSizeSpin_->value());
configManager_->set("audio.chunk_duration_ms", chunkDurationSpin_->value());
configManager_->set("audio.padding_ms", paddingSpin_->value());
configManager_->set("ui.theme", themeCombo_->currentText());
configManager_->set("ui.font_size", fontSizeSpin_->value());
configManager_->set("ui.show_waveform", showWaveformCheck_->isChecked());
configManager_->set("ui.show_confidence", showConfidenceCheck_->isChecked());
}
void SettingsPage::onBrowseModelPath() {
QString path = QFileDialog::getOpenFileName(this, "选择 ONNX 模型", "",
"ONNX 模型 (*.onnx);;所有文件 (*.*)");
if (!path.isEmpty()) {
modelPathEdit_->setText(path);
}
}
void SettingsPage::onSaveConfig() {
saveToConfig();
if (configManager_->save()) {
statusLabel_->setText(QString("配置已保存到: %1").arg(configManager_->configPath()));
LOG_INFO(kTag, QString("配置已持久化: %1").arg(configManager_->configPath()));
} else {
statusLabel_->setText("配置保存失败,请检查路径权限");
LOG_ERROR(kTag, "配置持久化失败");
}
}
void SettingsPage::onResetConfig() {
auto reply = QMessageBox::question(this, "确认", "确定要恢复默认配置吗?",
QMessageBox::Yes | QMessageBox::No);
if (reply == QMessageBox::Yes) {
configManager_->resetToDefaults();
loadFromConfig();
statusLabel_->setText("已恢复默认配置");
}
}
} // namespace impress

69
src/ui/settings_page.h Normal file
View File

@ -0,0 +1,69 @@
#pragma once
#include <QWidget>
class QFormLayout;
class QLineEdit;
class QComboBox;
class QSpinBox;
class QDoubleSpinBox;
class QCheckBox;
class QPushButton;
class QLabel;
class QGroupBox;
namespace impress {
class ConfigManager;
/**
* @brief
*
*
*/
class SettingsPage : public QWidget {
Q_OBJECT
public:
explicit SettingsPage(ConfigManager* configManager, QWidget* parent = nullptr);
~SettingsPage() override;
private slots:
void onBrowseModelPath();
void onSaveConfig();
void onResetConfig();
private:
void setupUI();
void loadFromConfig();
void saveToConfig();
ConfigManager* configManager_;
// STT 设置
QLineEdit* modelPathEdit_;
QPushButton* browseBtn_;
QComboBox* modelTypeCombo_;
QComboBox* deviceCombo_;
QSpinBox* threadSpin_;
QSpinBox* sampleRateSpin_;
QComboBox* languageCombo_;
QCheckBox* streamingCheck_;
QSpinBox* beamSizeSpin_;
QDoubleSpinBox* temperatureSpin_;
// 音频设置
QSpinBox* bufferSizeSpin_;
QSpinBox* chunkDurationSpin_;
QSpinBox* paddingSpin_;
// UI 设置
QComboBox* themeCombo_;
QSpinBox* fontSizeSpin_;
QCheckBox* showWaveformCheck_;
QCheckBox* showConfidenceCheck_;
// 状态
QLabel* statusLabel_;
};
} // namespace impress

235
src/ui/stt_test_page.cpp Normal file
View File

@ -0,0 +1,235 @@
#include "stt_test_page.h"
#include "core/stt_engine.h"
#include "audio/audio_capture.h"
#include "audio/audio_ring_buffer.h"
#include "widgets/audio_waveform.h"
#include "app/config_manager.h"
#include "utils/logger.h"
#include <QVBoxLayout>
#include <QHBoxLayout>
#include <QFormLayout>
#include <QGroupBox>
#include <QLabel>
#include <QPushButton>
#include <QComboBox>
#include <QTextEdit>
#include <QSpinBox>
#include <QMessageBox>
#include <QDateTime>
#include <QFileInfo>
static const char* const kTag = "STTTestPage";
namespace impress {
STTTestPage::STTTestPage(ConfigManager* configManager, QWidget* parent)
: QWidget(parent)
, configManager_(configManager)
, sttEngine_(new STTEngine(this))
, audioCapture_(new AudioCapture(this))
{
setupUI();
// 信号连接
connect(audioCapture_, &AudioCapture::audioDataReady,
this, &STTTestPage::onAudioDataReady);
connect(sttEngine_, &STTEngine::modelLoaded,
this, &STTTestPage::onModelLoaded);
connect(sttEngine_, &STTEngine::modelLoadError,
this, &STTTestPage::onModelLoadError);
connect(sttEngine_, &STTEngine::modelUnloaded,
this, &STTTestPage::onModelUnloaded);
}
STTTestPage::~STTTestPage() = default;
void STTTestPage::setupUI() {
auto* mainLayout = new QVBoxLayout(this);
// 控制面板
auto* controlGroup = new QGroupBox("控制面板", this);
auto* controlLayout = new QFormLayout(controlGroup);
deviceCombo_ = new QComboBox(this);
deviceCombo_->addItems(AudioCapture::getDeviceList());
controlLayout->addRow("输入设备:", deviceCombo_);
chunkSizeSpin_ = new QSpinBox(this);
chunkSizeSpin_->setRange(500, 10000);
chunkSizeSpin_->setSingleStep(500);
chunkSizeSpin_->setValue(3000);
chunkSizeSpin_->setSuffix(" ms");
controlLayout->addRow("推理间隔:", chunkSizeSpin_);
auto* btnLayout = new QHBoxLayout();
recordBtn_ = new QPushButton("开始录音", this);
recordBtn_->setMinimumWidth(120);
recordBtn_->setStyleSheet("QPushButton { font-weight: bold; padding: 8px 16px; }");
connect(recordBtn_, &QPushButton::clicked, this, &STTTestPage::onToggleRecording);
btnLayout->addWidget(recordBtn_);
statusLabel_ = new QLabel("就绪", this);
statusLabel_->setStyleSheet("color: gray;");
btnLayout->addWidget(statusLabel_);
btnLayout->addStretch();
controlLayout->addRow(btnLayout);
mainLayout->addWidget(controlGroup);
// 状态信息
auto* infoLayout = new QHBoxLayout();
latencyLabel_ = new QLabel("延迟: -- ms", this);
latencyLabel_->setStyleSheet("font-family: monospace; font-size: 13px;");
infoLayout->addWidget(latencyLabel_);
infoLayout->addStretch();
mainLayout->addLayout(infoLayout);
// 波形
waveform_ = new AudioWaveform(this);
waveform_->setMinimumHeight(80);
mainLayout->addWidget(waveform_);
// 文本输出
auto* outputGroup = new QGroupBox("识别结果", this);
auto* outputLayout = new QVBoxLayout(outputGroup);
textOutput_ = new QTextEdit(this);
textOutput_->setReadOnly(true);
textOutput_->setFont(QFont("Monospace", 12));
outputLayout->addWidget(textOutput_);
mainLayout->addWidget(outputGroup);
updateUIState();
}
void STTTestPage::updateUIState() {
recordBtn_->setText(isRecording_ ? "停止录音" : "开始录音");
recordBtn_->setStyleSheet(isRecording_
? "QPushButton { font-weight: bold; padding: 8px 16px; background-color: #e74c3c; color: white; }"
: "QPushButton { font-weight: bold; padding: 8px 16px; }");
deviceCombo_->setEnabled(!isRecording_ && !isLoadingModel_);
chunkSizeSpin_->setEnabled(!isRecording_ && !isLoadingModel_);
}
void STTTestPage::onToggleRecording() {
if (isRecording_) {
audioCapture_->stop();
sttEngine_->unloadModel();
isRecording_ = false;
} else {
// 读取配置
QString modelPath = configManager_->get("stt.model_path").toString();
if (modelPath.isEmpty()) {
QMessageBox::warning(this, "提示",
"请先在「配置」页面设置模型路径并保存");
return;
}
// 异步加载模型
if (!sttEngine_->isLoaded() ||
currentModelPath_ != modelPath) {
isLoadingModel_ = true;
statusLabel_->setText("正在加载模型,请稍候...");
updateUIState();
sttEngine_->loadModelAsync(modelPath,
configManager_->get("stt.device").toString(),
configManager_->get("stt.num_threads").toInt());
currentModelPath_ = modelPath;
// 注意startAudioCapture() 将在 onModelLoaded() 回调中调用
} else {
startAudioCapture();
}
}
updateUIState();
}
void STTTestPage::onModelLoaded(const QString& modelPath) {
LOG_INFO(kTag, QString("模型加载成功: %1").arg(modelPath));
isLoadingModel_ = false;
statusLabel_->setText(QString("模型就绪: %1").arg(
QFileInfo(modelPath).fileName()));
updateUIState();
// 如果用户仍在录音状态(已切换 UI启动采集
if (!isRecording_) {
startAudioCapture();
}
}
void STTTestPage::onModelLoadError(const QString& modelPath, const QString& error) {
LOG_ERROR(kTag, QString("模型加载失败: %1 - %2").arg(modelPath, error));
isLoadingModel_ = false;
statusLabel_->setText("模型加载失败");
updateUIState();
QMessageBox::critical(this, "模型加载错误",
QString("无法加载模型文件:\n%1\n\n错误信息:\n%2")
.arg(modelPath, error));
}
void STTTestPage::onModelUnloaded() {
isLoadingModel_ = false;
statusLabel_->setText("模型已卸载");
}
void STTTestPage::startAudioCapture() {
int deviceIdx = deviceCombo_->currentIndex() - 1;
int sampleRate = configManager_->get("stt.sample_rate").toInt();
if (!audioCapture_->start(deviceIdx, sampleRate)) {
QMessageBox::critical(this, "错误", "无法启动音频采集");
return;
}
isRecording_ = true;
statusLabel_->setText(QString("录音中 | 模型: %1").arg(
QFileInfo(currentModelPath_).fileName()));
updateUIState();
}
void STTTestPage::onAudioDataReady(const std::vector<float>& samples, int sampleRate) {
chunkBuffer_.insert(chunkBuffer_.end(), samples.begin(), samples.end());
int chunkSize = configManager_->get("stt.sample_rate").toInt()
* chunkSizeSpin_->value() / 1000;
if (static_cast<int>(chunkBuffer_.size()) >= chunkSize) {
std::vector<float> chunk(chunkBuffer_.begin(), chunkBuffer_.begin() + chunkSize);
chunkBuffer_.erase(chunkBuffer_.begin(), chunkBuffer_.begin() + chunkSize);
waveform_->setSamples(samples);
processAudioChunk(chunk, sampleRate);
} else {
waveform_->setSamples(samples);
}
}
void STTTestPage::processAudioChunk(const std::vector<float>& samples, int sampleRate) {
// 模型已在 onToggleRecording 中异步加载,此处防御性检查
if (!sttEngine_->isLoaded()) {
return;
}
auto result = sttEngine_->infer(samples, sampleRate, true);
emit onRecognitionResult(result.text, result.confidence, result.latency_ms, result.isFinal);
}
void STTTestPage::onRecognitionResult(const QString& text, float confidence,
double latency, bool isFinal)
{
QString timestamp = QDateTime::currentDateTime().toString("hh:mm:ss");
QString line = QString("[%1] %2 (置信度: %3%, 延迟: %4 ms)\n")
.arg(timestamp, text)
.arg(confidence * 100, 0, 'f', 1)
.arg(latency, 0, 'f', 1);
textOutput_->append(line);
latencyLabel_->setText(QString("延迟: %1 ms").arg(latency, 0, 'f', 1));
if (isFinal) {
textOutput_->append("---\n");
}
}
} // namespace impress

64
src/ui/stt_test_page.h Normal file
View File

@ -0,0 +1,64 @@
#pragma once
#include <QWidget>
#include <memory>
#include "widgets/audio_waveform.h"
class QLabel;
class QPushButton;
class QComboBox;
class QTextEdit;
class QSpinBox;
namespace impress {
class ConfigManager;
class STTEngine;
class AudioCapture;
/**
* @brief STT
*
* +
* UI
*/
class STTTestPage : public QWidget {
Q_OBJECT
public:
explicit STTTestPage(ConfigManager* configManager, QWidget* parent = nullptr);
~STTTestPage() override;
private slots:
void onToggleRecording();
void onAudioDataReady(const std::vector<float>& samples, int sampleRate);
void onRecognitionResult(const QString& text, float confidence, double latency, bool isFinal);
void onModelLoaded(const QString& modelPath);
void onModelLoadError(const QString& modelPath, const QString& error);
void onModelUnloaded();
private:
void setupUI();
void updateUIState();
void startAudioCapture();
void processAudioChunk(const std::vector<float>& samples, int sampleRate);
ConfigManager* configManager_;
STTEngine* sttEngine_;
AudioCapture* audioCapture_;
// UI 控件
QComboBox* deviceCombo_;
QPushButton* recordBtn_;
QTextEdit* textOutput_;
QLabel* latencyLabel_;
QLabel* statusLabel_;
AudioWaveform* waveform_;
QSpinBox* chunkSizeSpin_;
bool isRecording_ = false;
bool isLoadingModel_ = false;
std::vector<float> chunkBuffer_;
QString currentModelPath_;
};
} // namespace impress

View File

@ -0,0 +1,68 @@
#include "audio_waveform.h"
#include <QPainter>
#include <QPainterPath>
#include <algorithm>
namespace impress {
AudioWaveform::AudioWaveform(QWidget* parent)
: QWidget(parent)
{
setMinimumHeight(60);
setSizePolicy(QSizePolicy::Expanding, QSizePolicy::Fixed);
}
void AudioWaveform::setSamples(const std::vector<float>& samples) {
samples_.resize(static_cast<int>(samples.size()));
for (size_t i = 0; i < samples.size(); ++i) {
samples_[i] = samples[i];
}
update();
}
void AudioWaveform::paintEvent(QPaintEvent* /*event*/) {
QPainter painter(this);
painter.setRenderHint(QPainter::Antialiasing);
// 背景
painter.fillRect(rect(), QColor(245, 245, 245));
if (samples_.isEmpty()) {
painter.setPen(QColor(180, 180, 180));
painter.drawText(rect(), Qt::AlignCenter, "无音频数据");
return;
}
int w = width();
int h = height();
int centerY = h / 2;
int maxAmplitude = centerY - 5;
// 填充区域
QPainterPath fillPath;
fillPath.moveTo(0, centerY);
// 波形线
QPainterPath linePath;
for (int x = 0; x < w; ++x) {
size_t idx = static_cast<size_t>(x) * samples_.size() / w;
float sample = samples_[idx];
int y = centerY - static_cast<int>(sample * maxAmplitude);
fillPath.lineTo(x, y);
linePath.moveTo(x, y);
}
fillPath.lineTo(w, centerY);
fillPath.closeSubpath();
painter.setPen(Qt::NoPen);
painter.setBrush(fillColor_);
painter.drawPath(fillPath);
painter.setPen(QPen(lineColor_, 1.5));
painter.drawPath(linePath);
}
} // namespace impress

View File

@ -0,0 +1,29 @@
#pragma once
#include <QWidget>
#include <QVector>
#include <vector>
namespace impress {
/**
* @brief
*/
class AudioWaveform : public QWidget {
Q_OBJECT
public:
explicit AudioWaveform(QWidget* parent = nullptr);
/** @brief 设置音频样本并触发重绘 */
void setSamples(const std::vector<float>& samples);
protected:
void paintEvent(QPaintEvent* event) override;
private:
QVector<float> samples_;
QColor lineColor_ = QColor(52, 152, 219);
QColor fillColor_ = QColor(52, 152, 219, 30);
};
} // namespace impress

View File

@ -0,0 +1,47 @@
#include "progress_panel.h"
#include <QVBoxLayout>
#include <QProgressBar>
#include <QLabel>
namespace impress {
ProgressPanel::ProgressPanel(QWidget* parent)
: QWidget(parent)
{
auto* layout = new QVBoxLayout(this);
layout->setContentsMargins(0, 4, 0, 4);
statusLabel_ = new QLabel("就绪", this);
layout->addWidget(statusLabel_);
progressBar_ = new QProgressBar(this);
progressBar_->setRange(0, 100);
progressBar_->setValue(0);
layout->addWidget(progressBar_);
hide();
}
void ProgressPanel::setProgress(double value) {
progressBar_->setValue(static_cast<int>(value * 100));
}
void ProgressPanel::setStatusText(const QString& text) {
statusLabel_->setText(text);
}
void ProgressPanel::reset() {
progressBar_->setValue(0);
statusLabel_->setText("就绪");
}
void ProgressPanel::show() {
QWidget::show();
}
void ProgressPanel::hide() {
QWidget::hide();
}
} // namespace impress

View File

@ -0,0 +1,41 @@
#pragma once
#include <QWidget>
class QProgressBar;
class QLabel;
namespace impress {
/**
* @brief
*
*
*/
class ProgressPanel : public QWidget {
Q_OBJECT
public:
explicit ProgressPanel(QWidget* parent = nullptr);
/** @brief 设置进度 (0.0 - 1.0) */
void setProgress(double value);
/** @brief 设置状态文字 */
void setStatusText(const QString& text);
/** @brief 重置面板 */
void reset();
/** @brief 是否可见 */
bool isActive() const { return isVisible(); }
/** @brief 显示/隐藏 */
void show();
void hide();
private:
QProgressBar* progressBar_;
QLabel* statusLabel_;
};
} // namespace impress

View File

@ -0,0 +1,29 @@
#include "text_output.h"
#include <QScrollBar>
namespace impress {
TextOutput::TextOutput(QWidget* parent)
: QTextEdit(parent)
{
setReadOnly(true);
setFont(QFont("Monospace", 12));
}
void TextOutput::appendText(const QString& text) {
QTextCursor cursor = textCursor();
cursor.movePosition(QTextCursor::End);
cursor.insertText(text);
setTextCursor(cursor);
verticalScrollBar()->setValue(verticalScrollBar()->maximum());
}
void TextOutput::clearText() {
clear();
}
QString TextOutput::getFullText() const {
return toPlainText();
}
} // namespace impress

View File

@ -0,0 +1,27 @@
#pragma once
#include <QTextEdit>
namespace impress {
/**
* @brief
*
*
*/
class TextOutput : public QTextEdit {
Q_OBJECT
public:
explicit TextOutput(QWidget* parent = nullptr);
/** @brief 追加文本并自动滚动到底部 */
void appendText(const QString& text);
/** @brief 清空内容 */
void clearText();
/** @brief 获取全部文本 */
QString getFullText() const;
};
} // namespace impress

127
src/utils/logger.cpp Normal file
View File

@ -0,0 +1,127 @@
#include "logger.h"
#include <QDateTime>
#include <QDebug>
#include <QTextStream>
#include <QStandardPaths>
#include <QDir>
#include <iostream>
namespace impress {
QMutex Logger::mutex_;
QFile* Logger::logFile_ = nullptr;
void Logger::init(const QString& logFilePath) {
QMutexLocker locker(&mutex_);
qSetMessagePattern("[%{time yyyy-MM-dd hh:mm:ss.zzz}] %{message}");
// 确定日志文件路径
QString path = logFilePath;
if (path.isEmpty()) {
QString logDir = QStandardPaths::writableLocation(
QStandardPaths::AppDataLocation);
QDir().mkpath(logDir);
path = logDir + "/app.log";
}
logFile_ = new QFile(path);
if (logFile_->open(QIODevice::WriteOnly | QIODevice::Append | QIODevice::Text)) {
// Qt 6 没有 setAutoFlush每次 write 后手动 flush
} else {
delete logFile_;
logFile_ = nullptr;
std::cerr << "[Logger] 无法打开日志文件: " << path.toStdString() << std::endl;
}
}
void Logger::shutdown() {
QMutexLocker locker(&mutex_);
if (logFile_) {
logFile_->flush();
logFile_->close();
delete logFile_;
logFile_ = nullptr;
}
}
void Logger::setLogFile(const QString& path) {
QMutexLocker locker(&mutex_);
if (logFile_) {
logFile_->flush();
logFile_->close();
delete logFile_;
}
logFile_ = new QFile(path);
if (logFile_->open(QIODevice::WriteOnly | QIODevice::Append | QIODevice::Text)) {
// Qt 6 没有 setAutoFlush每次 write 后手动 flush
} else {
delete logFile_;
logFile_ = nullptr;
}
}
void Logger::log(LogLevel level, const QString& tag, const QString& message) {
QMutexLocker locker(&mutex_);
QString logLine = QString("[%1] [%2] [%3] %4")
.arg(getTimestamp(), levelToString(level), tag, message);
// 输出到控制台
switch (level) {
case LogLevel::Debug:
qDebug().noquote() << logLine;
break;
case LogLevel::Info:
qInfo().noquote() << logLine;
break;
case LogLevel::Warning:
qWarning().noquote() << logLine;
break;
case LogLevel::Error:
std::cerr << logLine.toStdString() << std::endl;
break;
}
// 写入文件
writeToFile(logLine);
}
void Logger::debug(const QString& tag, const QString& message) {
log(LogLevel::Debug, tag, message);
}
void Logger::info(const QString& tag, const QString& message) {
log(LogLevel::Info, tag, message);
}
void Logger::warning(const QString& tag, const QString& message) {
log(LogLevel::Warning, tag, message);
}
void Logger::error(const QString& tag, const QString& message) {
log(LogLevel::Error, tag, message);
}
QString Logger::levelToString(LogLevel level) {
switch (level) {
case LogLevel::Debug: return "DEBUG";
case LogLevel::Info: return "INFO";
case LogLevel::Warning: return "WARN";
case LogLevel::Error: return "ERROR";
}
return "UNKNOWN";
}
QString Logger::getTimestamp() {
return QDateTime::currentDateTime().toString("yyyy-MM-dd hh:mm:ss.zzz");
}
void Logger::writeToFile(const QString& line) {
if (logFile_ && logFile_->isOpen()) {
QTextStream stream(logFile_);
stream.setEncoding(QStringConverter::Utf8);
stream << line << Qt::endl;
logFile_->flush();
}
}
} // namespace impress

57
src/utils/logger.h Normal file
View File

@ -0,0 +1,57 @@
#pragma once
#include <QCoreApplication>
#include <QString>
#include <QMutex>
#include <QFile>
namespace impress {
/**
* @brief
*/
enum class LogLevel {
Debug,
Info,
Warning,
Error
};
/**
* @brief
*
* 线
*/
class Logger {
public:
/** @brief 初始化日志(可选指定日志文件路径) */
static void init(const QString& logFilePath = QString());
/** @brief 关闭日志文件 */
static void shutdown();
static void log(LogLevel level, const QString& tag, const QString& message);
static void debug(const QString& tag, const QString& message);
static void info(const QString& tag, const QString& message);
static void warning(const QString& tag, const QString& message);
static void error(const QString& tag, const QString& message);
/** @brief 设置日志文件路径(运行时切换) */
static void setLogFile(const QString& path);
private:
static QString levelToString(LogLevel level);
static QString getTimestamp();
static void writeToFile(const QString& line);
static QMutex mutex_;
static QFile* logFile_;
};
// 便捷宏
#define LOG_DEBUG(tag, msg) ::impress::Logger::debug(tag, msg)
#define LOG_INFO(tag, msg) ::impress::Logger::info(tag, msg)
#define LOG_WARNING(tag, msg) ::impress::Logger::warning(tag, msg)
#define LOG_ERROR(tag, msg) ::impress::Logger::error(tag, msg)
} // namespace impress

View File

@ -0,0 +1,46 @@
#include "string_utils.h"
#include <sstream>
#include <iomanip>
namespace impress {
QStringList StringUtils::split(const QString& input, const QString& delimiter) {
return input.split(delimiter, Qt::SkipEmptyParts);
}
QString StringUtils::trim(const QString& input) {
return input.trimmed();
}
QString StringUtils::joinFloats(const std::vector<float>& values, int precision) {
std::ostringstream oss;
oss << std::fixed << std::setprecision(precision);
for (size_t i = 0; i < values.size(); ++i) {
if (i > 0) oss << ", ";
oss << values[i];
}
return QString::fromStdString(oss.str());
}
std::string StringUtils::toUtf8(const QString& input) {
return input.toStdString();
}
QString StringUtils::fromUtf8(const std::string& input) {
return QString::fromUtf8(input.c_str(), static_cast<int>(input.size()));
}
QString StringUtils::formatFileSize(qint64 bytes) {
if (bytes < 1024) return QString("%1 B").arg(bytes);
if (bytes < 1024 * 1024) return QString("%1 KB").arg(bytes / 1024.0, 0, 'f', 1);
if (bytes < 1024 * 1024 * 1024) return QString("%1 MB").arg(bytes / (1024.0 * 1024), 0, 'f', 1);
return QString("%1 GB").arg(bytes / (1024.0 * 1024 * 1024), 0, 'f', 2);
}
QString StringUtils::formatDuration(int totalSeconds) {
int minutes = totalSeconds / 60;
int seconds = totalSeconds % 60;
return QString("%1:%2").arg(minutes, 2, 10, QChar('0')).arg(seconds, 2, 10, QChar('0'));
}
} // namespace impress

36
src/utils/string_utils.h Normal file
View File

@ -0,0 +1,36 @@
#pragma once
#include <QString>
#include <QStringList>
#include <vector>
namespace impress {
/**
* @brief
*/
class StringUtils {
public:
/** @brief 按分隔符分割字符串 */
static QStringList split(const QString& input, const QString& delimiter);
/** @brief 去除两端空白 */
static QString trim(const QString& input);
/** @brief float 向量转 QString逗号分隔 */
static QString joinFloats(const std::vector<float>& values, int precision = 4);
/** @brief QString 转 UTF-8 std::string */
static std::string toUtf8(const QString& input);
/** @brief std::string 转 QString */
static QString fromUtf8(const std::string& input);
/** @brief 格式化文件大小 */
static QString formatFileSize(qint64 bytes);
/** @brief 格式化时长 (mm:ss) */
static QString formatDuration(int totalSeconds);
};
} // namespace impress

3
src/utils/timer.cpp Normal file
View File

@ -0,0 +1,3 @@
#include "timer.h"
// Timer 为纯头文件实现,此文件保留以维持 CMake 构建一致性

34
src/utils/timer.h Normal file
View File

@ -0,0 +1,34 @@
#pragma once
#include <chrono>
namespace impress {
/**
* @brief
*/
class Timer {
public:
Timer() { reset(); }
void reset() {
start_ = std::chrono::high_resolution_clock::now();
}
/** @brief 返回从 reset 至今的毫秒数 */
double elapsedMs() const {
auto now = std::chrono::high_resolution_clock::now();
return std::chrono::duration<double, std::milli>(now - start_).count();
}
/** @brief 返回从 reset 至今的微秒数 */
double elapsedUs() const {
auto now = std::chrono::high_resolution_clock::now();
return std::chrono::duration<double, std::micro>(now - start_).count();
}
private:
std::chrono::high_resolution_clock::time_point start_;
};
} // namespace impress

12698
third_party/dr_libs/dr_flac.h vendored Normal file

File diff suppressed because it is too large Load Diff

5412
third_party/dr_libs/dr_mp3.h vendored Normal file

File diff suppressed because it is too large Load Diff

9105
third_party/dr_libs/dr_wav.h vendored Normal file

File diff suppressed because it is too large Load Diff

25796
third_party/nlohmann_json/json.hpp vendored Normal file

File diff suppressed because it is too large Load Diff