feat: 完善跨平台支持(Windows + macOS + Linux)

CMakeLists.txt:
- Qt6::DBus 和 pthread 改为平台条件链接
- 新增 PLATFORM_WINDOWS/PLATFORM_MACOS/PLATFORM_LINUX 编译宏
- 平台特定的源文件通过条件块选择

Windows 实现(新增):
- win_hotkey.cpp/h: 使用 RegisterHotKey API + QAbstractNativeEventFilter
  捕获 WM_HOTKEY 消息,通过 GetAsyncKeyState 轮询检测按键松开
- win_text_injector.cpp/h: 使用 SendInput API 的 KEYEVENTF_UNICODE
  实现全 Unicode 字符注入

macOS 实现(占位):
- mac_hotkey.cpp/h: 预留 CGEventTap 接口,暂不实现
- mac_text_injector.cpp/h: 使用 CGEventCreateKeyboardEvent 实现

通用修复:
- 硬编码 /tmp 路径替换为 QDir::tempPath()(跨平台临时目录)
- voice_input_service.cpp 使用条件 include 选择平台实现
- 设置页面移除 /tmp 硬编码提示文本

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Alvin Young 2026-05-13 11:28:57 +08:00
parent baec3482a7
commit 85a0890478
12 changed files with 675 additions and 14 deletions

View File

@ -28,7 +28,15 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
# Qt 6
find_package(Qt6 REQUIRED COMPONENTS Core Widgets Concurrent Network DBus)
find_package(Qt6 REQUIRED COMPONENTS Core Widgets Concurrent Network)
# Windows D-BusLinux/macOS
if(WIN32)
find_package(Qt6 REQUIRED COMPONENTS Gui)
else()
find_package(Qt6 REQUIRED COMPONENTS DBus)
endif()
qt_standard_project_setup()
# ONNX Runtime
@ -44,7 +52,7 @@ set(SOURCES
src/app/application.cpp
src/app/config_manager.cpp
# Core
# Core (平台无关)
src/core/stt_engine.cpp
src/core/sense_voice_engine.cpp
src/core/sense_voice_features.cpp
@ -53,8 +61,6 @@ set(SOURCES
src/core/whisper_tokenizer.cpp
src/core/audio_processor.cpp
src/core/vad.cpp
src/core/caps_lock_voice_hotkey.cpp
src/core/wayland_text_injector.cpp
src/core/voice_input_service.cpp
# Audio
@ -90,8 +96,6 @@ set(HEADERS
src/core/whisper_tokenizer.h
src/core/audio_processor.h
src/core/vad.h
src/core/caps_lock_voice_hotkey.h
src/core/wayland_text_injector.h
src/core/voice_input_service.h
src/audio/audio_capture.h
@ -112,8 +116,24 @@ set(HEADERS
)
# ============================================================================
#
#
# ============================================================================
if(WIN32)
# Windows RegisterHotKey + SendInput
list(APPEND SOURCES src/core/win_hotkey.cpp src/core/win_text_injector.cpp)
list(APPEND HEADERS src/core/win_hotkey.h src/core/win_text_injector.h)
add_compile_definitions(PLATFORM_WINDOWS)
elseif(APPLE)
# macOS Carbon EventManager / CGEvent
list(APPEND SOURCES src/core/mac_hotkey.cpp src/core/mac_text_injector.cpp)
list(APPEND HEADERS src/core/mac_hotkey.h src/core/mac_text_injector.h)
add_compile_definitions(PLATFORM_MACOS)
else()
# Linux D-Bus Portal + XTest
list(APPEND SOURCES src/core/caps_lock_voice_hotkey.cpp src/core/wayland_text_injector.cpp)
list(APPEND HEADERS src/core/caps_lock_voice_hotkey.h src/core/wayland_text_injector.h)
add_compile_definitions(PLATFORM_LINUX)
endif()
add_executable(${PROJECT_NAME} ${SOURCES} ${HEADERS})
target_include_directories(${PROJECT_NAME} PRIVATE
@ -130,10 +150,11 @@ target_link_libraries(${PROJECT_NAME} PRIVATE
Qt6::Widgets
Qt6::Concurrent
Qt6::Network
Qt6::DBus
$<$<BOOL:${WIN32}>:Qt6::Gui>
$<$<NOT:$<BOOL:${WIN32}>>:Qt6::DBus>
${ONNXRUNTIME_LIBRARIES}
${PORTAUDIO_LIBRARIES}
pthread
$<$<NOT:$<BOOL:${WIN32}>>:pthread>
)
target_compile_options(${PROJECT_NAME} PRIVATE

34
src/core/mac_hotkey.cpp Normal file
View File

@ -0,0 +1,34 @@
#include "mac_hotkey.h"
#include "utils/logger.h"
static const char* const kTag = "CapsLockVoiceHotkey";
namespace impress {
struct CapsLockVoiceHotkey::Impl {};
CapsLockVoiceHotkey::CapsLockVoiceHotkey(QObject* parent)
: QObject(parent)
, impl_(std::make_unique<Impl>())
{}
CapsLockVoiceHotkey::~CapsLockVoiceHotkey() {
stop();
}
bool CapsLockVoiceHotkey::start() {
if (active_) return true;
// macOS: 使用 CGEventTap 或 Carbon EventManager 实现
LOG_WARNING(kTag, "macOS 全局快捷键尚未实现");
emit error("macOS 全局快捷键尚未实现(待完善 CGEventTap");
return false;
}
void CapsLockVoiceHotkey::stop() {
if (!active_) return;
active_ = false;
recording_ = false;
LOG_INFO(kTag, "CapsLock 快捷键已停止");
}
} // namespace impress

33
src/core/mac_hotkey.h Normal file
View File

@ -0,0 +1,33 @@
#pragma once
#include <QObject>
#include <QString>
#include <memory>
namespace impress {
class CapsLockVoiceHotkey : public QObject {
Q_OBJECT
public:
explicit CapsLockVoiceHotkey(QObject* parent = nullptr);
~CapsLockVoiceHotkey() override;
bool start();
void stop();
bool isActive() const { return active_; }
bool isRecording() const { return recording_; }
signals:
void recordingStarted();
void recordingStopped();
void ready();
void error(const QString& message);
private:
struct Impl;
std::unique_ptr<Impl> impl_;
bool active_ = false;
bool recording_ = false;
};
} // namespace impress

View File

@ -0,0 +1,103 @@
#include "mac_text_injector.h"
#include "utils/logger.h"
#ifdef Q_OS_MACOS
#include <ApplicationServices/ApplicationServices.h>
#endif
#include <QThread>
static const char* const kTag = "WaylandTextInjector";
namespace impress {
WaylandTextInjector::WaylandTextInjector(QObject* parent)
: QObject(parent)
{}
WaylandTextInjector::~WaylandTextInjector() = default;
bool WaylandTextInjector::initialize() {
if (initialized_) return true;
#ifdef Q_OS_MACOS
// macOS: 需要辅助功能权限
initialized_ = true;
LOG_INFO(kTag, "macOS 文本注入器已初始化(需要辅助功能权限)");
return true;
#else
LOG_ERROR(kTag, "WaylandTextInjector 仅支持 macOS 平台");
return false;
#endif
}
bool WaylandTextInjector::injectText(const QString& text) {
if (!initialized_) {
LOG_ERROR(kTag, "文本注入器未初始化");
return false;
}
if (text.isEmpty()) return true;
LOG_DEBUG(kTag, QString("注入文本 (%1 字符): %2").arg(text.length()).arg(text));
for (int i = 0; i < text.length(); i++) {
if (!injectChar(text[i])) {
LOG_WARNING(kTag, QString("字符注入失败: '%1'").arg(text[i]));
}
QThread::usleep(10000);
}
LOG_DEBUG(kTag, "文本注入完成");
return true;
}
bool WaylandTextInjector::injectChar(QChar ch) {
#ifdef Q_OS_MACOS
CGEventSourceRef source = CGEventSourceCreate(kCGEventSourceStateHIDSystemState);
if (!source) return false;
UniChar code = ch.unicode();
// 按键按下
CGEventRef keyDown = CGEventCreateKeyboardEvent(source, 0, true);
CGEventKeyboardSetUnicodeString(keyDown, 1, &code);
CGEventPost(kCGHIDEventTap, keyDown);
CFRelease(keyDown);
// 按键释放
CGEventRef keyUp = CGEventCreateKeyboardEvent(source, 0, false);
CGEventKeyboardSetUnicodeString(keyUp, 1, &code);
CGEventPost(kCGHIDEventTap, keyUp);
CFRelease(keyUp);
CFRelease(source);
return true;
#else
(void)ch;
return false;
#endif
}
bool WaylandTextInjector::simulateKeycode(unsigned int keycode) {
#ifdef Q_OS_MACOS
CGEventSourceRef source = CGEventSourceCreate(kCGEventSourceStateHIDSystemState);
if (!source) return false;
CGEventRef keyDown = CGEventCreateKeyboardEvent(source, (CGKeyCode)keycode, true);
CGEventPost(kCGHIDEventTap, keyDown);
CFRelease(keyDown);
CGEventRef keyUp = CGEventCreateKeyboardEvent(source, (CGKeyCode)keycode, false);
CGEventPost(kCGHIDEventTap, keyUp);
CFRelease(keyUp);
CFRelease(source);
return true;
#else
(void)keycode;
return false;
#endif
}
} // namespace impress

View File

@ -0,0 +1,29 @@
#pragma once
#include <QObject>
#include <QString>
#include <memory>
namespace impress {
class WaylandTextInjector : public QObject {
Q_OBJECT
public:
explicit WaylandTextInjector(QObject* parent = nullptr);
~WaylandTextInjector() override;
bool initialize();
bool injectText(const QString& text);
bool isInitialized() const { return initialized_; }
bool simulateKeycode(unsigned int keycode);
signals:
void error(const QString& message);
private:
bool initialized_ = false;
bool injectChar(QChar ch);
};
} // namespace impress

View File

@ -346,7 +346,7 @@ RecognitionResult SenseVoiceEngine::infer(const std::vector<float>& samples,
// 调试模式:保存原始音频到 WAV 文件
if (debugSaveAudio_ && !samples.empty()) {
QString debugDir = "/tmp/impress_audio_debug";
QString debugDir = QDir::tempPath() + "/impress_audio_debug";
QDir dir;
if (!dir.exists(debugDir)) {
dir.mkpath(debugDir);

View File

@ -1,11 +1,21 @@
#include "voice_input_service.h"
#include "audio/audio_capture.h"
#include "sense_voice_engine.h"
#include "caps_lock_voice_hotkey.h"
#include "wayland_text_injector.h"
#include "app/config_manager.h"
#include "utils/logger.h"
// 平台特定的快捷键和文本注入
#ifdef PLATFORM_WINDOWS
#include "win_hotkey.h"
#include "win_text_injector.h"
#elif defined(PLATFORM_MACOS)
#include "mac_hotkey.h"
#include "mac_text_injector.h"
#else
#include "caps_lock_voice_hotkey.h"
#include "wayland_text_injector.h"
#endif
#include <QThread>
#include <QTimer>
#include <QtConcurrent>

215
src/core/win_hotkey.cpp Normal file
View File

@ -0,0 +1,215 @@
#include "win_hotkey.h"
#include "utils/logger.h"
#ifdef Q_OS_WIN
#include <windows.h>
#include <QAbstractNativeEventFilter>
#include <QGuiApplication>
#endif
static const char* const kTag = "CapsLockVoiceHotkey";
namespace impress {
struct CapsLockVoiceHotkey::Impl {
#ifdef Q_OS_WIN
int hotkeyId = 0;
QTimer* longPressTimer = nullptr;
QTimer* keyUpDebounce = nullptr;
bool isHolding = false;
bool longPressFired = false;
bool pollThreadRunning = false;
void* nativeEventFilter = nullptr;
static constexpr int kLongPressMs = 1000;
#endif
};
#ifdef Q_OS_WIN
/** Native event filter to catch WM_HOTKEY */
class HotkeyNativeEventFilter : public QAbstractNativeEventFilter {
public:
explicit HotkeyNativeEventFilter(CapsLockVoiceHotkey* hotkey)
: hotkey_(hotkey) {}
bool nativeEventFilter(const QByteArray& eventType, void* message,
qintptr* /*result*/) override {
if (eventType == "windows_generic_MSG" || eventType == "windows_dispatcher_MSG") {
auto* msg = static_cast<MSG*>(message);
if (msg->message == WM_HOTKEY) {
if (hotkey_) {
hotkey_->onHotkeyEvent(static_cast<int>(msg->wParam));
}
return true;
}
}
return false;
}
private:
CapsLockVoiceHotkey* hotkey_ = nullptr;
};
#endif
CapsLockVoiceHotkey::CapsLockVoiceHotkey(QObject* parent)
: QObject(parent)
, impl_(std::make_unique<Impl>())
{}
CapsLockVoiceHotkey::~CapsLockVoiceHotkey() {
stop();
}
bool CapsLockVoiceHotkey::start() {
if (active_) return true;
#ifdef Q_OS_WIN
HWND hwnd = reinterpret_cast<HWND>(QGuiApplication::instance()->winId());
if (!hwnd) {
// Try to get the top-level widget's window handle
hwnd = GetForegroundWindow();
}
if (!hwnd) {
emit error("无法获取窗口句柄");
return false;
}
// 注册 CapsLock (VK_CAPITAL = 0x14) 全局快捷键
// MOD_NOREPEAT 防止按住时重复触发
const int vkCapsLock = 0x14;
impl_->hotkeyId = GlobalAddAtom(L"ImpressVoiceHotkey");
BOOL ok = RegisterHotKey(hwnd, impl_->hotkeyId, MOD_NOREPEAT, vkCapsLock);
if (!ok) {
DWORD err = GetLastError();
emit error(QString("注册 CapsLock 快捷键失败 (错误码: %1)").arg(err));
LOG_ERROR(kTag, QString("RegisterHotKey failed: %1").arg(err));
return false;
}
// 安装原生事件过滤器
auto* filter = new HotkeyNativeEventFilter(this);
QGuiApplication::instance()->installNativeEventFilter(filter);
impl_->nativeEventFilter = filter;
// 长按定时器
impl_->longPressTimer = new QTimer(this);
impl_->longPressTimer->setSingleShot(true);
impl_->longPressTimer->setInterval(Impl::kLongPressMs);
connect(impl_->longPressTimer, &QTimer::timeout, this, [this]() {
if (impl_->isHolding && !impl_->longPressFired) {
impl_->longPressFired = true;
recording_ = true;
emit recordingStarted();
LOG_DEBUG(kTag, "长按触发,开始录音");
}
});
// 松开后延迟重置,避免 CapsLock 状态闪烁
impl_->keyUpDebounce = new QTimer(this);
impl_->keyUpDebounce->setSingleShot(true);
impl_->keyUpDebounce->setInterval(200);
connect(impl_->keyUpDebounce, &QTimer::timeout, this, [this]() {
impl_->isHolding = false;
impl_->longPressFired = false;
});
active_ = true;
emit ready();
LOG_INFO(kTag, "CapsLock 快捷键已注册");
return true;
#else
emit error("CapsLockVoiceHotkey 仅支持 Windows 平台");
return false;
#endif
}
void CapsLockVoiceHotkey::stop() {
if (!active_) return;
#ifdef Q_OS_WIN
impl_->pollThreadRunning = false;
if (impl_->longPressTimer) {
impl_->longPressTimer->stop();
}
if (impl_->keyUpDebounce) {
impl_->keyUpDebounce->stop();
}
// 移除原生事件过滤器
if (impl_->nativeEventFilter) {
auto* filter = static_cast<HotkeyNativeEventFilter*>(impl_->nativeEventFilter);
QGuiApplication::instance()->removeNativeEventFilter(filter);
delete filter;
impl_->nativeEventFilter = nullptr;
}
// 注销快捷键
HWND hwnd = reinterpret_cast<HWND>(QGuiApplication::instance()->winId());
if (hwnd && impl_->hotkeyId) {
UnregisterHotKey(hwnd, impl_->hotkeyId);
GlobalDeleteAtom(impl_->hotkeyId);
impl_->hotkeyId = 0;
}
active_ = false;
recording_ = false;
impl_->isHolding = false;
impl_->longPressFired = false;
LOG_INFO(kTag, "CapsLock 快捷键已停止");
#endif
}
#ifdef Q_OS_WIN
void CapsLockVoiceHotkey::onHotkeyEvent(int /*hotkeyId*/) {
if (!active_) return;
// Windows 只在按键按下时触发 WM_HOTKEY
// 我们通过 GetAsyncKeyState 轮询检测松开
impl_->isHolding = true;
impl_->longPressFired = false;
// 启动长按定时器
if (impl_->longPressTimer) {
impl_->longPressTimer->start();
}
// 启动轮询线程检测松开
if (impl_->pollThreadRunning) return;
impl_->pollThreadRunning = true;
QThread::create([this]() {
const int vkCapsLock = 0x14;
while (impl_->pollThreadRunning && impl_->isHolding) {
SHORT state = GetAsyncKeyState(vkCapsLock);
if (!(state & 0x8000)) {
// 按键松开
impl_->isHolding = false;
if (impl_->longPressTimer) {
impl_->longPressTimer->stop();
}
QMetaObject::invokeMethod(this, [this]() {
if (impl_->longPressFired) {
// 长按 → 停止录音
recording_ = false;
emit recordingStopped();
LOG_DEBUG(kTag, "长按结束,停止录音");
} else {
// 短按 → 不处理(让系统处理 CapsLock
LOG_DEBUG(kTag, "短按,不拦截 CapsLock");
}
impl_->longPressFired = false;
if (impl_->keyUpDebounce) {
impl_->keyUpDebounce->start();
}
}, Qt::QueuedConnection);
break;
}
QThread::msleep(50);
}
impl_->pollThreadRunning = false;
}).start();
}
#endif
} // namespace impress

61
src/core/win_hotkey.h Normal file
View File

@ -0,0 +1,61 @@
#pragma once
#include <QObject>
#include <QString>
#include <QTimer>
#include <memory>
namespace impress {
/**
* @brief CapsLock Windows
*
* 使 Windows RegisterHotKey API
*
* 1. CapsLock 1
* 2.
* 3. CapsLock
* 4. < 1s CapsLock
*/
class CapsLockVoiceHotkey : public QObject {
Q_OBJECT
public:
explicit CapsLockVoiceHotkey(QObject* parent = nullptr);
~CapsLockVoiceHotkey() override;
/** @brief 初始化并注册快捷键 */
bool start();
/** @brief 停止并注销快捷键 */
void stop();
/** @brief 是否已激活 */
bool isActive() const { return active_; }
/** @brief 当前是否正在录音CapsLock 长按超过 1s 后) */
bool isRecording() const { return recording_; }
signals:
/** @brief 开始录音(长按超过 1 秒后) */
void recordingStarted();
/** @brief 停止录音(松开快捷键后) */
void recordingStopped();
/** @brief 快捷键已注册 */
void ready();
/** @brief 初始化失败 */
void error(const QString& message);
/** @brief 处理 WM_HOTKEY 事件(由原生事件过滤器调用) */
void onHotkeyEvent(int hotkeyId);
private:
struct Impl;
std::unique_ptr<Impl> impl_;
bool active_ = false;
bool recording_ = false;
};
} // namespace impress

View File

@ -0,0 +1,114 @@
#include "win_text_injector.h"
#include "utils/logger.h"
#ifdef Q_OS_WIN
#include <windows.h>
#endif
#include <QThread>
static const char* const kTag = "WaylandTextInjector";
namespace impress {
WaylandTextInjector::WaylandTextInjector(QObject* parent)
: QObject(parent)
{}
WaylandTextInjector::~WaylandTextInjector() = default;
bool WaylandTextInjector::initialize() {
if (initialized_) return true;
#ifdef Q_OS_WIN
// SendInput 不需要额外初始化
initialized_ = true;
LOG_INFO(kTag, "Windows 文本注入器已初始化 (SendInput)");
return true;
#else
LOG_ERROR(kTag, "WaylandTextInjector 仅支持 Windows 平台");
return false;
#endif
}
bool WaylandTextInjector::injectText(const QString& text) {
if (!initialized_) {
LOG_ERROR(kTag, "文本注入器未初始化");
return false;
}
if (text.isEmpty()) return true;
LOG_DEBUG(kTag, QString("注入文本 (%1 字符): %2").arg(text.length()).arg(text));
for (int i = 0; i < text.length(); i++) {
if (!injectChar(text[i])) {
LOG_WARNING(kTag, QString("字符注入失败: '%1'").arg(text[i]));
}
// 字符间短暂延迟
QThread::usleep(10000); // 10ms
}
LOG_DEBUG(kTag, "文本注入完成");
return true;
}
bool WaylandTextInjector::injectChar(QChar ch) {
#ifdef Q_OS_WIN
INPUT inputs[4] = {};
int count = 0;
// 尝试使用 Unicode 输入法(支持所有 Unicode 字符)
inputs[0].type = INPUT_KEYBOARD;
inputs[0].ki.wScan = ch.unicode();
inputs[0].ki.dwFlags = KEYEVENTF_UNICODE;
count++;
inputs[1].type = INPUT_KEYBOARD;
inputs[1].ki.wScan = ch.unicode();
inputs[1].ki.dwFlags = KEYEVENTF_UNICODE | KEYEVENTF_KEYUP;
count++;
UINT sent = SendInput(count, inputs, sizeof(INPUT));
if (sent == 0) {
DWORD err = GetLastError();
LOG_ERROR(kTag, QString("SendInput 失败: %1").arg(err));
return false;
}
return true;
#else
(void)ch;
return false;
#endif
}
bool WaylandTextInjector::simulateKeycode(unsigned int keycode) {
#ifdef Q_OS_WIN
INPUT inputs[2] = {};
// 按下
inputs[0].type = INPUT_KEYBOARD;
inputs[0].ki.wVk = static_cast<WORD>(keycode);
inputs[0].ki.dwFlags = 0;
// 释放
inputs[1].type = INPUT_KEYBOARD;
inputs[1].ki.wVk = static_cast<WORD>(keycode);
inputs[1].ki.dwFlags = KEYEVENTF_KEYUP;
UINT sent = SendInput(2, inputs, sizeof(INPUT));
if (sent == 0) {
DWORD err = GetLastError();
LOG_ERROR(kTag, QString("SendInput keycode 注入失败: %1").arg(err));
return false;
}
LOG_DEBUG(kTag, QString("模拟 keycode: 0x%1").arg(keycode, 0, 16));
return true;
#else
(void)keycode;
return false;
#endif
}
} // namespace impress

View File

@ -0,0 +1,41 @@
#pragma once
#include <QObject>
#include <QString>
#include <memory>
namespace impress {
/**
* @brief Windows
*
* Windows SendInput API
*/
class WaylandTextInjector : public QObject {
Q_OBJECT
public:
explicit WaylandTextInjector(QObject* parent = nullptr);
~WaylandTextInjector() override;
/** @brief 初始化 */
bool initialize();
/** @brief 将文本注入到当前光标位置 */
bool injectText(const QString& text);
/** @brief 是否已初始化 */
bool isInitialized() const { return initialized_; }
/** @brief 模拟 keycode 按下+释放Windows 使用虚拟键码) */
bool simulateKeycode(unsigned int keycode);
signals:
void error(const QString& message);
private:
bool initialized_ = false;
bool injectChar(QChar ch);
};
} // namespace impress

View File

@ -83,8 +83,8 @@ void SettingsPage::setupUI() {
streamingCheck_->setChecked(true);
sttLayout->addRow("流式识别:", streamingCheck_);
debugSaveAudioCheck_ = new QCheckBox("保存调试音频到 /tmp/impress_audio_debug/", this);
debugSaveAudioCheck_->setToolTip("开启后,每次识别会将原始音频保存为 WAV 文件,用于调试音频质量问题");
debugSaveAudioCheck_ = new QCheckBox("保存调试音频到临时文件夹", this);
debugSaveAudioCheck_->setToolTip("开启后,每次识别会将原始音频保存为 WAV 文件到系统临时目录,用于调试音频质量问题");
sttLayout->addRow("调试录音:", debugSaveAudioCheck_);
capslockVoiceCheck_ = new QCheckBox("启用 CapsLock 长按语音输入", this);