From cda68e53763f3bc5a1053c28f626389bc9c604bf Mon Sep 17 00:00:00 2001 From: impressionyang Date: Wed, 13 May 2026 11:12:57 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=20CapsLock=20?= =?UTF-8?q?=E8=AF=AD=E9=9F=B3=E8=BE=93=E5=85=A5=E6=A0=B8=E5=BF=83=E6=A8=A1?= =?UTF-8?q?=E5=9D=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 新增三个核心组件: - CapsLockVoiceHotkey: 基于 freedesktop GlobalShortcuts D-Bus Portal 的全局快捷键注册,支持 Wayland 环境 - WaylandTextInjector: 通过 dlopen 动态加载 XTest 实现文本注入, 无需编译时依赖 libXtst-devel 头文件 - VoiceInputService: 状态机协调器,实现长按 1s 录音、松开转写、 短按恢复 CapsLock 的完整交互流程 CMakeLists.txt 新增 Qt6::DBus 依赖和新源文件。配置管理器新增 capslock_voice_enabled 开关。 Co-Authored-By: Claude Opus 4.6 --- CMakeLists.txt | 9 +- src/app/config_manager.cpp | 4 +- src/core/caps_lock_voice_hotkey.cpp | 222 ++++++++++++++++++++++++++ src/core/caps_lock_voice_hotkey.h | 66 ++++++++ src/core/voice_input_service.cpp | 236 ++++++++++++++++++++++++++++ src/core/voice_input_service.h | 80 ++++++++++ src/core/wayland_text_injector.cpp | 181 +++++++++++++++++++++ src/core/wayland_text_injector.h | 44 ++++++ 8 files changed, 840 insertions(+), 2 deletions(-) create mode 100644 src/core/caps_lock_voice_hotkey.cpp create mode 100644 src/core/caps_lock_voice_hotkey.h create mode 100644 src/core/voice_input_service.cpp create mode 100644 src/core/voice_input_service.h create mode 100644 src/core/wayland_text_injector.cpp create mode 100644 src/core/wayland_text_injector.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 5b25262..9d1e190 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,7 +28,7 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON) list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") # Qt 6 -find_package(Qt6 REQUIRED COMPONENTS Core Widgets Concurrent Network) +find_package(Qt6 REQUIRED COMPONENTS Core Widgets Concurrent Network DBus) qt_standard_project_setup() # ONNX Runtime @@ -53,6 +53,9 @@ set(SOURCES src/core/whisper_tokenizer.cpp src/core/audio_processor.cpp src/core/vad.cpp + src/core/caps_lock_voice_hotkey.cpp + src/core/wayland_text_injector.cpp + src/core/voice_input_service.cpp # Audio src/audio/audio_capture.cpp @@ -87,6 +90,9 @@ set(HEADERS src/core/whisper_tokenizer.h src/core/audio_processor.h src/core/vad.h + src/core/caps_lock_voice_hotkey.h + src/core/wayland_text_injector.h + src/core/voice_input_service.h src/audio/audio_capture.h src/audio/audio_decoder.h @@ -124,6 +130,7 @@ target_link_libraries(${PROJECT_NAME} PRIVATE Qt6::Widgets Qt6::Concurrent Qt6::Network + Qt6::DBus ${ONNXRUNTIME_LIBRARIES} ${PORTAUDIO_LIBRARIES} pthread diff --git a/src/app/config_manager.cpp b/src/app/config_manager.cpp index 096eb30..5f12524 100644 --- a/src/app/config_manager.cpp +++ b/src/app/config_manager.cpp @@ -74,7 +74,9 @@ void ConfigManager::loadDefaults() { {"language", "zh"}, {"streaming", true}, {"beam_size", 5}, - {"temperature", 0.0} + {"temperature", 0.0}, + {"debug_save_audio", false}, + {"capslock_voice_enabled", false} }}, {"audio", QVariantMap{ {"input_device", -1}, diff --git a/src/core/caps_lock_voice_hotkey.cpp b/src/core/caps_lock_voice_hotkey.cpp new file mode 100644 index 0000000..ff2ea2f --- /dev/null +++ b/src/core/caps_lock_voice_hotkey.cpp @@ -0,0 +1,222 @@ +#include "caps_lock_voice_hotkey.h" +#include "utils/logger.h" + +#include +#include +#include +#include +#include +#include + +static const char* const kTag = "CapsLockVoiceHotkey"; + +// Portal 常量 +static const char* const kPortalService = "org.freedesktop.portal.Desktop"; +static const char* const kPortalObjectPath = "/org/freedesktop/portal/desktop"; +static const char* const kGlobalShortcutsIface = "org.freedesktop.portal.GlobalShortcuts"; +static const char* const kRequestIface = "org.freedesktop.portal.Request"; + +namespace impress { + +struct CapsLockVoiceHotkey::Impl { + QString sessionPath; + QString pendingRequestPath; + + enum State { Idle, WaitingSession, WaitingBind, Active }; + State state = Idle; + + /** 生成唯一 token */ + static QString makeToken(const QString& prefix) { + return prefix + "_" + QUuid::createUuid().toString().mid(1, 8); + } + + /** 构造 session path(从 sender 名和 token) */ + static QString makeSessionPath(const QString& sender, const QString& token) { + QString safeSender = sender; + safeSender.remove(0, 1); // 去掉前导 ':' + safeSender.replace('.', '_'); + return QString("/org/freedesktop/portal/desktop/session/%1/%2") + .arg(safeSender, token); + } + + /** 获取 session bus */ + static QDBusConnection bus() { + return QDBusConnection::sessionBus(); + } +}; + +CapsLockVoiceHotkey::CapsLockVoiceHotkey(QObject* parent) + : QObject(parent) + , impl_(std::make_unique()) +{} + +CapsLockVoiceHotkey::~CapsLockVoiceHotkey() { + stop(); +} + +bool CapsLockVoiceHotkey::start() { + if (active_) return true; + + QDBusConnection bus = Impl::bus(); + if (!bus.isConnected()) { + emit error("无法连接到 D-Bus session bus"); + return false; + } + + // 连接信号 + bus.connect(kPortalService, kPortalObjectPath, + kGlobalShortcutsIface, "Activated", + this, SLOT(handleActivated(QString))); + + bus.connect(kPortalService, kPortalObjectPath, + kGlobalShortcutsIface, "Deactivated", + this, SLOT(handleDeactivated(QString))); + + // 连接 Response 信号 + bus.connect(kPortalService, QString(), + kRequestIface, "Response", + this, SLOT(onPortalResponse(uint, QVariantMap))); + + // 发送 CreateSession + QDBusInterface portal(kPortalService, kPortalObjectPath, + kGlobalShortcutsIface, bus); + + QString sessionToken = Impl::makeToken("io_impress_sess"); + QString requestToken = Impl::makeToken("io_impress_req"); + + QVariantMap options; + options["handle_token"] = requestToken; + options["session_handle_token"] = sessionToken; + + QDBusMessage reply = portal.call("CreateSession", options); + if (reply.type() == QDBusMessage::ErrorMessage) { + emit error(QString("CreateSession 失败: %1").arg(reply.errorMessage())); + LOG_ERROR(kTag, reply.errorMessage()); + return false; + } + + // 保存预期 session path + QString sender = bus.baseService(); + impl_->sessionPath = Impl::makeSessionPath(sender, sessionToken); + impl_->state = Impl::WaitingSession; + + LOG_INFO(kTag, "CreateSession 已发送,等待用户授权..."); + LOG_DEBUG(kTag, QString("Session path: %1").arg(impl_->sessionPath)); + return true; +} + +void CapsLockVoiceHotkey::stop() { + if (!active_ && impl_->state == Impl::Idle) return; + + QDBusConnection bus = Impl::bus(); + bus.disconnect(kPortalService, kPortalObjectPath, + kGlobalShortcutsIface, "Activated", + this, SLOT(handleActivated(QString))); + bus.disconnect(kPortalService, kPortalObjectPath, + kGlobalShortcutsIface, "Deactivated", + this, SLOT(handleDeactivated(QString))); + bus.disconnect(kPortalService, QString(), + kRequestIface, "Response", + this, SLOT(onPortalResponse(uint, QVariantMap))); + + active_ = false; + recording_ = false; + impl_->state = Impl::Idle; + impl_->sessionPath.clear(); + LOG_INFO(kTag, "CapsLock 语音快捷键已停止"); +} + +void CapsLockVoiceHotkey::onPortalResponse(uint response, const QVariantMap& results) { + if (impl_->state == Impl::WaitingSession) { + handleSessionResponse(response, results); + } else if (impl_->state == Impl::WaitingBind) { + handleBindResponse(response, results); + } +} + +void CapsLockVoiceHotkey::handleSessionResponse(uint response, const QVariantMap& results) { + if (impl_->state != Impl::WaitingSession) return; + + if (response != 0) { + emit error(QString("Session 被拒绝 (response=%1)").arg(response)); + LOG_ERROR(kTag, QString("Session 被拒绝: %1").arg(response)); + impl_->state = Impl::Idle; + return; + } + + QString actualPath = results.value("session_handle").toString(); + if (!actualPath.isEmpty()) { + impl_->sessionPath = actualPath; + } + LOG_INFO(kTag, QString("Session 已授权: %1").arg(impl_->sessionPath)); + + // 发送 BindShortcuts + impl_->state = Impl::WaitingBind; + + QDBusInterface portal(kPortalService, kPortalObjectPath, + kGlobalShortcutsIface, Impl::bus()); + + QString bindToken = Impl::makeToken("io_impress_bind"); + + QVariantMap shortcutProps; + shortcutProps["description"] = "语音输入(CapsLock)"; + + QList shortcuts; + QVariantMap shortcutEntry; + shortcutEntry["id"] = "voice_input"; + shortcutEntry["properties"] = shortcutProps; + shortcuts.append(shortcutEntry); + + QVariantMap bindOptions; + bindOptions["handle_token"] = bindToken; + + QDBusMessage reply = portal.call("BindShortcuts", + QDBusObjectPath(impl_->sessionPath), + shortcuts, + QString(), // parent_window (空 = Wayland 模式) + bindOptions); + + if (reply.type() == QDBusMessage::ErrorMessage) { + emit error(QString("BindShortcuts 失败: %1").arg(reply.errorMessage())); + LOG_ERROR(kTag, reply.errorMessage()); + impl_->state = Impl::Idle; + return; + } + + impl_->pendingRequestPath = reply.arguments().isEmpty() ? + QString() : reply.arguments()[0].toString(); + LOG_INFO(kTag, "BindShortcuts 已发送,等待用户设置快捷键..."); +} + +void CapsLockVoiceHotkey::handleBindResponse(uint response, const QVariantMap&) { + if (impl_->state != Impl::WaitingBind) return; + + if (response != 0) { + emit error(QString("快捷键绑定被拒绝 (response=%1)").arg(response)); + LOG_ERROR(kTag, QString("Bind 被拒绝: %1").arg(response)); + impl_->state = Impl::Idle; + return; + } + + // 快捷键绑定成功 + active_ = true; + impl_->state = Impl::Active; + emit ready(); + LOG_INFO(kTag, "快捷键已注册,CapsLock 语音输入已就绪"); +} + +void CapsLockVoiceHotkey::handleActivated(const QString& shortcutId) { + if (!active_) return; + LOG_DEBUG(kTag, QString("快捷键按下: %1").arg(shortcutId)); + recording_ = true; + emit recordingStarted(); +} + +void CapsLockVoiceHotkey::handleDeactivated(const QString& shortcutId) { + if (!active_) return; + LOG_DEBUG(kTag, QString("快捷键松开: %1").arg(shortcutId)); + recording_ = false; + emit recordingStopped(); +} + +} // namespace impress diff --git a/src/core/caps_lock_voice_hotkey.h b/src/core/caps_lock_voice_hotkey.h new file mode 100644 index 0000000..f585825 --- /dev/null +++ b/src/core/caps_lock_voice_hotkey.h @@ -0,0 +1,66 @@ +#pragma once + +#include +#include +#include +#include + +namespace impress { + +/** + * @brief CapsLock 长按语音输入快捷键管理器 + * + * 使用 freedesktop GlobalShortcuts D-Bus Portal 实现 Wayland 兼容的全局快捷键。 + * 工作流程: + * 1. 用户长按 CapsLock 1 秒后触发录音 + * 2. 长按期间持续录音 + * 3. 松开 CapsLock 后停止录音并触发转写 + * 4. 短按(< 1s)直接传递 CapsLock 事件(切换大小写锁定) + * + * 首次启动时需要用户通过 GNOME 对话框授权。 + */ +class CapsLockVoiceHotkey : public QObject { + Q_OBJECT +public: + explicit CapsLockVoiceHotkey(QObject* parent = nullptr); + ~CapsLockVoiceHotkey() override; + + /** @brief 初始化并注册快捷键(首次需要用户授权) */ + bool start(); + + /** @brief 停止并注销快捷键 */ + void stop(); + + /** @brief 是否已激活 */ + bool isActive() const { return active_; } + + /** @brief 当前是否正在录音(CapsLock 长按超过 1s 后) */ + bool isRecording() const { return recording_; } + +signals: + /** @brief 开始录音(长按超过 1 秒后) */ + void recordingStarted(); + + /** @brief 停止录音(松开快捷键后) */ + void recordingStopped(); + + /** @brief 快捷键已注册(用户授权后) */ + void ready(); + + /** @brief 初始化失败 */ + void error(const QString& message); + +private: + struct Impl; + std::unique_ptr impl_; + bool active_ = false; + bool recording_ = false; + + void handleSessionResponse(uint response, const QVariantMap& results); + void handleBindResponse(uint response, const QVariantMap& results); + void handleActivated(const QString& shortcutId); + void handleDeactivated(const QString& shortcutId); + void onPortalResponse(uint response, const QVariantMap& results); +}; + +} // namespace impress diff --git a/src/core/voice_input_service.cpp b/src/core/voice_input_service.cpp new file mode 100644 index 0000000..1ad2035 --- /dev/null +++ b/src/core/voice_input_service.cpp @@ -0,0 +1,236 @@ +#include "voice_input_service.h" +#include "audio/audio_capture.h" +#include "sense_voice_engine.h" +#include "caps_lock_voice_hotkey.h" +#include "wayland_text_injector.h" +#include "app/config_manager.h" +#include "utils/logger.h" + +#include +#include +#include + +static const char* const kTag = "VoiceInputService"; + +namespace impress { + +struct VoiceInputService::Impl { + AudioCapture* audioCapture = nullptr; + SenseVoiceEngine* sttEngine = nullptr; + CapsLockVoiceHotkey* hotkey = nullptr; + WaylandTextInjector* injector = nullptr; +}; + +VoiceInputService::VoiceInputService(ConfigManager* configManager, QObject* parent) + : QObject(parent) + , configManager_(configManager) + , impl_(std::make_unique()) +{ + longPressTimer_ = new QTimer(this); + longPressTimer_->setSingleShot(true); + connect(longPressTimer_, &QTimer::timeout, this, [this]() { + // 长按超时仍未松开 → 确认为长按录音 + if (!longPressDetected_) { + longPressDetected_ = true; + emit statusChanged("正在录音..."); + } + }); +} + +VoiceInputService::~VoiceInputService() { + stop(); +} + +bool VoiceInputService::start() { + if (running_) return true; + + // 1. 初始化音频采集 + impl_->audioCapture = new AudioCapture(this); + connect(impl_->audioCapture, &AudioCapture::audioDataReady, + this, &VoiceInputService::onAudioData); + + // 2. 初始化 STT 引擎并加载模型 + impl_->sttEngine = new SenseVoiceEngine(this); + + // 从配置加载模型 + QString modelPath = configManager_->get("stt.model_path").toString(); + QString tokensPath = configManager_->get("stt.tokens_path").toString(); + QString device = configManager_->get("stt.device").toString(); + int numThreads = configManager_->get("stt.num_threads").toInt(); + + if (!modelPath.isEmpty()) { + LOG_INFO(kTag, QString("正在加载 STT 模型: %1").arg(modelPath)); + bool modelLoaded = impl_->sttEngine->loadModelSync(modelPath, tokensPath, device, numThreads); + if (!modelLoaded) { + emit error(QString("STT 模型加载失败: %1").arg(modelPath)); + LOG_ERROR(kTag, "STT 模型加载失败"); + } else { + LOG_INFO(kTag, "STT 模型加载成功"); + // 同步调试音频设置 + bool debugSave = configManager_->get("stt.debug_save_audio").toBool(); + impl_->sttEngine->setDebugSaveAudio(debugSave); + } + } else { + LOG_WARNING(kTag, "模型路径为空,请先在配置中设置模型路径"); + } + + // 3. 初始化全局快捷键 + impl_->hotkey = new CapsLockVoiceHotkey(this); + connect(impl_->hotkey, &CapsLockVoiceHotkey::recordingStarted, + this, &VoiceInputService::onHotkeyActivated); + connect(impl_->hotkey, &CapsLockVoiceHotkey::recordingStopped, + this, &VoiceInputService::onHotkeyDeactivated); + connect(impl_->hotkey, &CapsLockVoiceHotkey::ready, + this, [this]() { + emit statusChanged("语音输入就绪(快捷键已注册)"); + }); + connect(impl_->hotkey, &CapsLockVoiceHotkey::error, + this, &VoiceInputService::error); + + // 4. 初始化文本注入器 + impl_->injector = new WaylandTextInjector(this); + if (!impl_->injector->initialize()) { + emit error("文本注入器初始化失败,无法注入识别结果"); + LOG_ERROR(kTag, "文本注入器初始化失败"); + } + + // 启动快捷键(首次会弹出授权对话框) + if (!impl_->hotkey->start()) { + emit error("全局快捷键启动失败"); + return false; + } + + running_ = true; + emit statusChanged("语音输入已启动(等待授权...)"); + LOG_INFO(kTag, "语音输入服务已启动"); + return true; +} + +void VoiceInputService::stop() { + if (!running_) return; + + longPressTimer_->stop(); + + if (impl_->audioCapture) { + impl_->audioCapture->stop(); + } + if (impl_->sttEngine) { + impl_->sttEngine->unloadModel(); + } + if (impl_->hotkey) { + impl_->hotkey->stop(); + } + + running_ = false; + recording_ = false; + longPressDetected_ = false; + audioBuffer_.clear(); + + LOG_INFO(kTag, "语音输入服务已停止"); +} + +void VoiceInputService::onHotkeyActivated() { + LOG_DEBUG(kTag, "快捷键激活(按下)"); + recording_ = true; + longPressDetected_ = false; + audioBuffer_.clear(); + + // 启动长按定时器 + longPressTimer_->start(longPressThreshold_); + + // 开始音频采集(后台预采集) + int deviceIndex = -1; // 默认设备 + impl_->audioCapture->start(deviceIndex, 16000, 20); + + emit statusChanged("等待长按确认..."); +} + +void VoiceInputService::onHotkeyDeactivated() { + LOG_DEBUG(kTag, "快捷键停用(松开)"); + recording_ = false; + longPressTimer_->stop(); + + // 停止音频采集 + if (impl_->audioCapture && impl_->audioCapture->isRunning()) { + impl_->audioCapture->stop(); + } + + if (!longPressDetected_) { + // 短按 → 模拟 CapsLock 按键 + LOG_DEBUG(kTag, "短按,模拟 CapsLock"); + simulateCapsLock(); + emit statusChanged("短按:切换 CapsLock"); + } else { + // 长按 → 停止录音并转写 + stopRecordingAndTranscribe(); + } + + longPressDetected_ = false; +} + +void VoiceInputService::onAudioData(const std::vector& samples, int sampleRate) { + if (!recording_) return; + + audioSampleRate_ = sampleRate; + audioBuffer_.insert(audioBuffer_.end(), samples.begin(), samples.end()); +} + +void VoiceInputService::stopRecordingAndTranscribe() { + if (audioBuffer_.empty()) { + emit statusChanged("未检测到音频输入"); + return; + } + + emit statusChanged("正在识别..."); + + QString language = configManager_->get("stt.language").toString(); + + (void)QtConcurrent::run([this, buffer = audioBuffer_, lang = language]() { + QString text; + + if (!impl_->sttEngine->isLoaded()) { + LOG_WARNING(kTag, "模型未加载,跳过推理"); + text = "[错误] 模型未加载,请先在配置中设置模型路径"; + } else { + auto result = impl_->sttEngine->infer(buffer, audioSampleRate_, lang); + text = result.text; + } + + QMetaObject::invokeMethod(this, [this, text]() { + onRecognitionComplete(text); + }, Qt::QueuedConnection); + }); + + audioBuffer_.clear(); +} + +void VoiceInputService::onRecognitionComplete(const QString& text) { + if (text.isEmpty()) { + emit statusChanged("识别结果:无语音输入"); + return; + } + + emit recognitionResult(text); + emit statusChanged(QString("识别结果: %1").arg(text)); + + // 注入文本到光标位置 + if (impl_->injector && impl_->injector->isInitialized()) { + impl_->injector->injectText(text); + LOG_INFO(kTag, QString("文本已注入: %1").arg(text)); + } else { + LOG_WARNING(kTag, "文本注入器未就绪,无法注入"); + } +} + +void VoiceInputService::simulateCapsLock() { + if (impl_->injector && impl_->injector->isInitialized()) { + // CapsLock keysym = 0xffe5 + unsigned int capslockKeysym = 0xffe5; + impl_->injector->simulateKeycode(capslockKeysym); + LOG_DEBUG(kTag, "模拟 CapsLock 按键已注入"); + } else { + LOG_WARNING(kTag, "文本注入器未初始化,无法模拟 CapsLock"); + } +} + +} // namespace impress diff --git a/src/core/voice_input_service.h b/src/core/voice_input_service.h new file mode 100644 index 0000000..c4fffe3 --- /dev/null +++ b/src/core/voice_input_service.h @@ -0,0 +1,80 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace impress { + +class AudioCapture; +class SenseVoiceEngine; +class CapsLockVoiceHotkey; +class WaylandTextInjector; +class ConfigManager; + +/** + * @brief CapsLock 语音输入服务 + * + * 协调全局快捷键、音频采集、STT 推理和文本注入。 + * 状态机: + * 1. 空闲 → 按下快捷键 → 开始录音 + * 2. 长按超过 1 秒 → 开始正式录音(清除之前的静音段) + * 3. 松开快捷键 → 停止录音 → 推理 → 注入文本 + * 4. 短按(< 1 秒)→ 注入 CapsLock 按键(切换大小写) + */ +class VoiceInputService : public QObject { + Q_OBJECT +public: + explicit VoiceInputService(ConfigManager* configManager, QObject* parent = nullptr); + ~VoiceInputService() override; + + /** @brief 启动服务(初始化所有组件) */ + bool start(); + + /** @brief 停止服务 */ + void stop(); + + /** @brief 是否已启动 */ + bool isRunning() const { return running_; } + + /** @brief 是否正在录音 */ + bool isRecording() const { return recording_; } + + /** @brief 长按阈值(毫秒),默认 1000ms */ + void setLongPressThreshold(int ms) { longPressThreshold_ = ms; } + int longPressThreshold() const { return longPressThreshold_; } + +signals: + void statusChanged(const QString& status); + void recognitionResult(const QString& text); + void error(const QString& message); + +private slots: + void onHotkeyActivated(); + void onHotkeyDeactivated(); + void onAudioData(const std::vector& samples, int sampleRate); + void onRecognitionComplete(const QString& text); + +private: + struct Impl; + ConfigManager* configManager_ = nullptr; + std::unique_ptr impl_; + + bool running_ = false; + bool recording_ = false; + bool longPressDetected_ = false; + int longPressThreshold_ = 1000; + + std::vector audioBuffer_; + int audioSampleRate_ = 16000; + + QTimer* longPressTimer_ = nullptr; + + void startRecording(); + void stopRecordingAndTranscribe(); + void simulateCapsLock(); +}; + +} // namespace impress diff --git a/src/core/wayland_text_injector.cpp b/src/core/wayland_text_injector.cpp new file mode 100644 index 0000000..07cee05 --- /dev/null +++ b/src/core/wayland_text_injector.cpp @@ -0,0 +1,181 @@ +#include "wayland_text_injector.h" +#include "utils/logger.h" + +#include +#include + +static const char* const kTag = "WaylandTextInjector"; + +namespace impress { + +// XTest 函数指针类型 +typedef int (*XTestFakeKeyEventFn)(void* display, unsigned int keycode, + int is_press, unsigned long delay); +typedef void* (*XOpenDisplayFn)(const char* display_name); +typedef int (*XCloseDisplayFn)(void* display); +typedef unsigned int (*XKeysymToKeycodeFn)(void* display, unsigned long keysym); +typedef unsigned long (*XStringToKeysymFn)(const char* str); +typedef int (*XSyncFn)(void* display, int discard); + +struct WaylandTextInjector::Impl { + QLibrary x11Lib; + QLibrary xtstLib; + + XOpenDisplayFn XOpenDisplay = nullptr; + XCloseDisplayFn XCloseDisplay = nullptr; + XKeysymToKeycodeFn XKeysymToKeycode = nullptr; + XStringToKeysymFn XStringToKeysym = nullptr; + XSyncFn XSyncFnPtr = nullptr; + XTestFakeKeyEventFn XTestFakeKeyEvent = nullptr; + + void* display = nullptr; + + bool loadLibraries() { + // 加载 libX11 + x11Lib.setFileName("libX11.so.6"); + if (!x11Lib.load()) { + LOG_ERROR(kTag, QString("无法加载 libX11: %1").arg(x11Lib.errorString())); + return false; + } + + // 加载 libXtst + xtstLib.setFileName("libXtst.so.6"); + if (!xtstLib.load()) { + LOG_ERROR(kTag, QString("无法加载 libXtst: %1").arg(xtstLib.errorString())); + return false; + } + + // 解析 X11 符号 + XOpenDisplay = reinterpret_cast(x11Lib.resolve("XOpenDisplay")); + XCloseDisplay = reinterpret_cast(x11Lib.resolve("XCloseDisplay")); + XKeysymToKeycode = reinterpret_cast(x11Lib.resolve("XKeysymToKeycode")); + XStringToKeysym = reinterpret_cast(x11Lib.resolve("XStringToKeysym")); + XSyncFnPtr = reinterpret_cast(x11Lib.resolve("XSync")); + + // 解析 XTest 符号 + XTestFakeKeyEvent = reinterpret_cast( + xtstLib.resolve("XTestFakeKeyEvent")); + + if (!XOpenDisplay || !XCloseDisplay || !XKeysymToKeycode || + !XSyncFnPtr || !XTestFakeKeyEvent) { + LOG_ERROR(kTag, "无法解析 X11/XTest 符号"); + return false; + } + + // 打开 X11 显示(通过 XWayland) + display = XOpenDisplay(nullptr); + if (!display) { + display = XOpenDisplay(":0"); + } + if (!display) { + LOG_ERROR(kTag, "无法连接 X11 显示(XWayland)"); + return false; + } + + LOG_INFO(kTag, "XTest 文本注入器已初始化"); + return true; + } +}; + +WaylandTextInjector::WaylandTextInjector(QObject* parent) + : QObject(parent) + , impl_(std::make_unique()) +{} + +WaylandTextInjector::~WaylandTextInjector() { + if (impl_->display && impl_->XCloseDisplay) { + impl_->XCloseDisplay(impl_->display); + } +} + +bool WaylandTextInjector::initialize() { + if (initialized_) return true; + initialized_ = impl_->loadLibraries(); + return initialized_; +} + +bool WaylandTextInjector::injectText(const QString& text) { + if (!initialized_) { + LOG_ERROR(kTag, "文本注入器未初始化"); + return false; + } + + if (text.isEmpty()) return true; + + LOG_DEBUG(kTag, QString("注入文本 (%1 字符): %2").arg(text.length()).arg(text)); + + for (int i = 0; i < text.length(); i++) { + if (!injectChar(text[i])) { + LOG_WARNING(kTag, QString("字符注入失败: '%1'").arg(text[i])); + } + // 字符间短暂延迟 + QThread::usleep(10000); // 10ms + } + + LOG_DEBUG(kTag, "文本注入完成"); + return true; +} + +bool WaylandTextInjector::injectChar(QChar ch) { + if (!impl_->display) return false; + + // 处理常见字符映射 + unsigned long keysym; + if (ch.isLetterOrNumber() || ch.isPunct() || ch.isSymbol()) { + // ASCII 字符直接使用 keysym + keysym = ch.unicode(); + } else if (ch == '\n' || ch == '\r') { + keysym = 0xff0d; // XK_Return + } else if (ch == '\t') { + keysym = 0xff09; // XK_Tab + } else if (ch == ' ') { + keysym = 0x020; // XK_space + } else { + // 尝试通过 XStringToKeysym 解析 + QByteArray ba = QString(ch).toUtf8(); + keysym = impl_->XStringToKeysym(ba.constData()); + if (keysym == 0) { + return false; // 不支持的字符 + } + } + + unsigned int keycode = impl_->XKeysymToKeycode(impl_->display, keysym); + if (keycode == 0) return false; + + // Shift 处理(大写字母需要按住 Shift) + bool needShift = ch.isUpper() && ch.isLetter(); + if (needShift) { + unsigned int shiftCode = impl_->XKeysymToKeycode(impl_->display, 0xffe1); // XK_Shift_L + if (shiftCode) { + impl_->XTestFakeKeyEvent(impl_->display, shiftCode, 1, 0); + } + } + + // 按键按下 + 释放 + impl_->XTestFakeKeyEvent(impl_->display, keycode, 1, 0); + impl_->XTestFakeKeyEvent(impl_->display, keycode, 0, 0); + + if (needShift) { + unsigned int shiftCode = impl_->XKeysymToKeycode(impl_->display, 0xffe1); + if (shiftCode) { + impl_->XTestFakeKeyEvent(impl_->display, shiftCode, 0, 0); + } + } + + impl_->XSyncFnPtr(impl_->display, 0); + return true; +} + +bool WaylandTextInjector::simulateKeycode(unsigned int keycode) { + if (!impl_->display || !impl_->XTestFakeKeyEvent) return false; + + LOG_DEBUG(kTag, QString("模拟 keycode: 0x%1").arg(keycode, 0, 16)); + + // 按下 + 释放 + impl_->XTestFakeKeyEvent(impl_->display, keycode, 1, 0); + impl_->XTestFakeKeyEvent(impl_->display, keycode, 0, 0); + impl_->XSyncFnPtr(impl_->display, 0); + return true; +} + +} // namespace impress diff --git a/src/core/wayland_text_injector.h b/src/core/wayland_text_injector.h new file mode 100644 index 0000000..171bb0d --- /dev/null +++ b/src/core/wayland_text_injector.h @@ -0,0 +1,44 @@ +#pragma once + +#include +#include +#include + +namespace impress { + +/** + * @brief 文本注入器 + * + * 通过 XTest (XWayland) 或 RemoteDesktop Portal 将文本注入到当前光标位置。 + * 使用 dlopen 动态加载 libXtst,无需编译时依赖 XTest 头文件。 + */ +class WaylandTextInjector : public QObject { + Q_OBJECT +public: + explicit WaylandTextInjector(QObject* parent = nullptr); + ~WaylandTextInjector() override; + + /** @brief 初始化(加载 XTest 库) */ + bool initialize(); + + /** @brief 将文本注入到当前光标位置 */ + bool injectText(const QString& text); + + /** @brief 是否已初始化 */ + bool isInitialized() const { return initialized_; } + + /** @brief 模拟 X11 keycode 按下+释放(用于 CapsLock 等系统按键) */ + bool simulateKeycode(unsigned int keycode); + +signals: + void error(const QString& message); + +private: + struct Impl; + std::unique_ptr impl_; + bool initialized_ = false; + + bool injectChar(QChar ch); +}; + +} // namespace impress