Compare commits
No commits in common. "baec3482a770d72044d30a1507c257691e27b4d1" and "85b67780b1b39aa089f134a91fd9a0e0f9206b94" have entirely different histories.
baec3482a7
...
85b67780b1
@ -28,7 +28,7 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
|||||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
|
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
|
||||||
|
|
||||||
# Qt 6
|
# Qt 6
|
||||||
find_package(Qt6 REQUIRED COMPONENTS Core Widgets Concurrent Network DBus)
|
find_package(Qt6 REQUIRED COMPONENTS Core Widgets Concurrent Network)
|
||||||
qt_standard_project_setup()
|
qt_standard_project_setup()
|
||||||
|
|
||||||
# ONNX Runtime
|
# ONNX Runtime
|
||||||
@ -53,9 +53,6 @@ set(SOURCES
|
|||||||
src/core/whisper_tokenizer.cpp
|
src/core/whisper_tokenizer.cpp
|
||||||
src/core/audio_processor.cpp
|
src/core/audio_processor.cpp
|
||||||
src/core/vad.cpp
|
src/core/vad.cpp
|
||||||
src/core/caps_lock_voice_hotkey.cpp
|
|
||||||
src/core/wayland_text_injector.cpp
|
|
||||||
src/core/voice_input_service.cpp
|
|
||||||
|
|
||||||
# Audio
|
# Audio
|
||||||
src/audio/audio_capture.cpp
|
src/audio/audio_capture.cpp
|
||||||
@ -90,9 +87,6 @@ set(HEADERS
|
|||||||
src/core/whisper_tokenizer.h
|
src/core/whisper_tokenizer.h
|
||||||
src/core/audio_processor.h
|
src/core/audio_processor.h
|
||||||
src/core/vad.h
|
src/core/vad.h
|
||||||
src/core/caps_lock_voice_hotkey.h
|
|
||||||
src/core/wayland_text_injector.h
|
|
||||||
src/core/voice_input_service.h
|
|
||||||
|
|
||||||
src/audio/audio_capture.h
|
src/audio/audio_capture.h
|
||||||
src/audio/audio_decoder.h
|
src/audio/audio_decoder.h
|
||||||
@ -130,7 +124,6 @@ target_link_libraries(${PROJECT_NAME} PRIVATE
|
|||||||
Qt6::Widgets
|
Qt6::Widgets
|
||||||
Qt6::Concurrent
|
Qt6::Concurrent
|
||||||
Qt6::Network
|
Qt6::Network
|
||||||
Qt6::DBus
|
|
||||||
${ONNXRUNTIME_LIBRARIES}
|
${ONNXRUNTIME_LIBRARIES}
|
||||||
${PORTAUDIO_LIBRARIES}
|
${PORTAUDIO_LIBRARIES}
|
||||||
pthread
|
pthread
|
||||||
|
|||||||
@ -74,9 +74,7 @@ void ConfigManager::loadDefaults() {
|
|||||||
{"language", "zh"},
|
{"language", "zh"},
|
||||||
{"streaming", true},
|
{"streaming", true},
|
||||||
{"beam_size", 5},
|
{"beam_size", 5},
|
||||||
{"temperature", 0.0},
|
{"temperature", 0.0}
|
||||||
{"debug_save_audio", false},
|
|
||||||
{"capslock_voice_enabled", false}
|
|
||||||
}},
|
}},
|
||||||
{"audio", QVariantMap{
|
{"audio", QVariantMap{
|
||||||
{"input_device", -1},
|
{"input_device", -1},
|
||||||
|
|||||||
@ -7,7 +7,7 @@
|
|||||||
#define DR_WAV_IMPLEMENTATION
|
#define DR_WAV_IMPLEMENTATION
|
||||||
#define DR_MP3_IMPLEMENTATION
|
#define DR_MP3_IMPLEMENTATION
|
||||||
#define DR_FLAC_IMPLEMENTATION
|
#define DR_FLAC_IMPLEMENTATION
|
||||||
#include "dr_wav.h"
|
#include <dr_wav.h>
|
||||||
#include <dr_mp3.h>
|
#include <dr_mp3.h>
|
||||||
#include <dr_flac.h>
|
#include <dr_flac.h>
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@ -1,222 +0,0 @@
|
|||||||
#include "caps_lock_voice_hotkey.h"
|
|
||||||
#include "utils/logger.h"
|
|
||||||
|
|
||||||
#include <QDBusConnection>
|
|
||||||
#include <QDBusInterface>
|
|
||||||
#include <QDBusMessage>
|
|
||||||
#include <QDBusObjectPath>
|
|
||||||
#include <QCoreApplication>
|
|
||||||
#include <QUuid>
|
|
||||||
|
|
||||||
static const char* const kTag = "CapsLockVoiceHotkey";
|
|
||||||
|
|
||||||
// Portal 常量
|
|
||||||
static const char* const kPortalService = "org.freedesktop.portal.Desktop";
|
|
||||||
static const char* const kPortalObjectPath = "/org/freedesktop/portal/desktop";
|
|
||||||
static const char* const kGlobalShortcutsIface = "org.freedesktop.portal.GlobalShortcuts";
|
|
||||||
static const char* const kRequestIface = "org.freedesktop.portal.Request";
|
|
||||||
|
|
||||||
namespace impress {
|
|
||||||
|
|
||||||
struct CapsLockVoiceHotkey::Impl {
|
|
||||||
QString sessionPath;
|
|
||||||
QString pendingRequestPath;
|
|
||||||
|
|
||||||
enum State { Idle, WaitingSession, WaitingBind, Active };
|
|
||||||
State state = Idle;
|
|
||||||
|
|
||||||
/** 生成唯一 token */
|
|
||||||
static QString makeToken(const QString& prefix) {
|
|
||||||
return prefix + "_" + QUuid::createUuid().toString().mid(1, 8);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** 构造 session path(从 sender 名和 token) */
|
|
||||||
static QString makeSessionPath(const QString& sender, const QString& token) {
|
|
||||||
QString safeSender = sender;
|
|
||||||
safeSender.remove(0, 1); // 去掉前导 ':'
|
|
||||||
safeSender.replace('.', '_');
|
|
||||||
return QString("/org/freedesktop/portal/desktop/session/%1/%2")
|
|
||||||
.arg(safeSender, token);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** 获取 session bus */
|
|
||||||
static QDBusConnection bus() {
|
|
||||||
return QDBusConnection::sessionBus();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
CapsLockVoiceHotkey::CapsLockVoiceHotkey(QObject* parent)
|
|
||||||
: QObject(parent)
|
|
||||||
, impl_(std::make_unique<Impl>())
|
|
||||||
{}
|
|
||||||
|
|
||||||
CapsLockVoiceHotkey::~CapsLockVoiceHotkey() {
|
|
||||||
stop();
|
|
||||||
}
|
|
||||||
|
|
||||||
bool CapsLockVoiceHotkey::start() {
|
|
||||||
if (active_) return true;
|
|
||||||
|
|
||||||
QDBusConnection bus = Impl::bus();
|
|
||||||
if (!bus.isConnected()) {
|
|
||||||
emit error("无法连接到 D-Bus session bus");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// 连接信号
|
|
||||||
bus.connect(kPortalService, kPortalObjectPath,
|
|
||||||
kGlobalShortcutsIface, "Activated",
|
|
||||||
this, SLOT(handleActivated(QString)));
|
|
||||||
|
|
||||||
bus.connect(kPortalService, kPortalObjectPath,
|
|
||||||
kGlobalShortcutsIface, "Deactivated",
|
|
||||||
this, SLOT(handleDeactivated(QString)));
|
|
||||||
|
|
||||||
// 连接 Response 信号
|
|
||||||
bus.connect(kPortalService, QString(),
|
|
||||||
kRequestIface, "Response",
|
|
||||||
this, SLOT(onPortalResponse(uint, QVariantMap)));
|
|
||||||
|
|
||||||
// 发送 CreateSession
|
|
||||||
QDBusInterface portal(kPortalService, kPortalObjectPath,
|
|
||||||
kGlobalShortcutsIface, bus);
|
|
||||||
|
|
||||||
QString sessionToken = Impl::makeToken("io_impress_sess");
|
|
||||||
QString requestToken = Impl::makeToken("io_impress_req");
|
|
||||||
|
|
||||||
QVariantMap options;
|
|
||||||
options["handle_token"] = requestToken;
|
|
||||||
options["session_handle_token"] = sessionToken;
|
|
||||||
|
|
||||||
QDBusMessage reply = portal.call("CreateSession", options);
|
|
||||||
if (reply.type() == QDBusMessage::ErrorMessage) {
|
|
||||||
emit error(QString("CreateSession 失败: %1").arg(reply.errorMessage()));
|
|
||||||
LOG_ERROR(kTag, reply.errorMessage());
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// 保存预期 session path
|
|
||||||
QString sender = bus.baseService();
|
|
||||||
impl_->sessionPath = Impl::makeSessionPath(sender, sessionToken);
|
|
||||||
impl_->state = Impl::WaitingSession;
|
|
||||||
|
|
||||||
LOG_INFO(kTag, "CreateSession 已发送,等待用户授权...");
|
|
||||||
LOG_DEBUG(kTag, QString("Session path: %1").arg(impl_->sessionPath));
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
void CapsLockVoiceHotkey::stop() {
|
|
||||||
if (!active_ && impl_->state == Impl::Idle) return;
|
|
||||||
|
|
||||||
QDBusConnection bus = Impl::bus();
|
|
||||||
bus.disconnect(kPortalService, kPortalObjectPath,
|
|
||||||
kGlobalShortcutsIface, "Activated",
|
|
||||||
this, SLOT(handleActivated(QString)));
|
|
||||||
bus.disconnect(kPortalService, kPortalObjectPath,
|
|
||||||
kGlobalShortcutsIface, "Deactivated",
|
|
||||||
this, SLOT(handleDeactivated(QString)));
|
|
||||||
bus.disconnect(kPortalService, QString(),
|
|
||||||
kRequestIface, "Response",
|
|
||||||
this, SLOT(onPortalResponse(uint, QVariantMap)));
|
|
||||||
|
|
||||||
active_ = false;
|
|
||||||
recording_ = false;
|
|
||||||
impl_->state = Impl::Idle;
|
|
||||||
impl_->sessionPath.clear();
|
|
||||||
LOG_INFO(kTag, "CapsLock 语音快捷键已停止");
|
|
||||||
}
|
|
||||||
|
|
||||||
void CapsLockVoiceHotkey::onPortalResponse(uint response, const QVariantMap& results) {
|
|
||||||
if (impl_->state == Impl::WaitingSession) {
|
|
||||||
handleSessionResponse(response, results);
|
|
||||||
} else if (impl_->state == Impl::WaitingBind) {
|
|
||||||
handleBindResponse(response, results);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void CapsLockVoiceHotkey::handleSessionResponse(uint response, const QVariantMap& results) {
|
|
||||||
if (impl_->state != Impl::WaitingSession) return;
|
|
||||||
|
|
||||||
if (response != 0) {
|
|
||||||
emit error(QString("Session 被拒绝 (response=%1)").arg(response));
|
|
||||||
LOG_ERROR(kTag, QString("Session 被拒绝: %1").arg(response));
|
|
||||||
impl_->state = Impl::Idle;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
QString actualPath = results.value("session_handle").toString();
|
|
||||||
if (!actualPath.isEmpty()) {
|
|
||||||
impl_->sessionPath = actualPath;
|
|
||||||
}
|
|
||||||
LOG_INFO(kTag, QString("Session 已授权: %1").arg(impl_->sessionPath));
|
|
||||||
|
|
||||||
// 发送 BindShortcuts
|
|
||||||
impl_->state = Impl::WaitingBind;
|
|
||||||
|
|
||||||
QDBusInterface portal(kPortalService, kPortalObjectPath,
|
|
||||||
kGlobalShortcutsIface, Impl::bus());
|
|
||||||
|
|
||||||
QString bindToken = Impl::makeToken("io_impress_bind");
|
|
||||||
|
|
||||||
QVariantMap shortcutProps;
|
|
||||||
shortcutProps["description"] = "语音输入(CapsLock)";
|
|
||||||
|
|
||||||
QList<QVariant> shortcuts;
|
|
||||||
QVariantMap shortcutEntry;
|
|
||||||
shortcutEntry["id"] = "voice_input";
|
|
||||||
shortcutEntry["properties"] = shortcutProps;
|
|
||||||
shortcuts.append(shortcutEntry);
|
|
||||||
|
|
||||||
QVariantMap bindOptions;
|
|
||||||
bindOptions["handle_token"] = bindToken;
|
|
||||||
|
|
||||||
QDBusMessage reply = portal.call("BindShortcuts",
|
|
||||||
QDBusObjectPath(impl_->sessionPath),
|
|
||||||
shortcuts,
|
|
||||||
QString(), // parent_window (空 = Wayland 模式)
|
|
||||||
bindOptions);
|
|
||||||
|
|
||||||
if (reply.type() == QDBusMessage::ErrorMessage) {
|
|
||||||
emit error(QString("BindShortcuts 失败: %1").arg(reply.errorMessage()));
|
|
||||||
LOG_ERROR(kTag, reply.errorMessage());
|
|
||||||
impl_->state = Impl::Idle;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
impl_->pendingRequestPath = reply.arguments().isEmpty() ?
|
|
||||||
QString() : reply.arguments()[0].toString();
|
|
||||||
LOG_INFO(kTag, "BindShortcuts 已发送,等待用户设置快捷键...");
|
|
||||||
}
|
|
||||||
|
|
||||||
void CapsLockVoiceHotkey::handleBindResponse(uint response, const QVariantMap&) {
|
|
||||||
if (impl_->state != Impl::WaitingBind) return;
|
|
||||||
|
|
||||||
if (response != 0) {
|
|
||||||
emit error(QString("快捷键绑定被拒绝 (response=%1)").arg(response));
|
|
||||||
LOG_ERROR(kTag, QString("Bind 被拒绝: %1").arg(response));
|
|
||||||
impl_->state = Impl::Idle;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// 快捷键绑定成功
|
|
||||||
active_ = true;
|
|
||||||
impl_->state = Impl::Active;
|
|
||||||
emit ready();
|
|
||||||
LOG_INFO(kTag, "快捷键已注册,CapsLock 语音输入已就绪");
|
|
||||||
}
|
|
||||||
|
|
||||||
void CapsLockVoiceHotkey::handleActivated(const QString& shortcutId) {
|
|
||||||
if (!active_) return;
|
|
||||||
LOG_DEBUG(kTag, QString("快捷键按下: %1").arg(shortcutId));
|
|
||||||
recording_ = true;
|
|
||||||
emit recordingStarted();
|
|
||||||
}
|
|
||||||
|
|
||||||
void CapsLockVoiceHotkey::handleDeactivated(const QString& shortcutId) {
|
|
||||||
if (!active_) return;
|
|
||||||
LOG_DEBUG(kTag, QString("快捷键松开: %1").arg(shortcutId));
|
|
||||||
recording_ = false;
|
|
||||||
emit recordingStopped();
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace impress
|
|
||||||
@ -1,66 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include <QObject>
|
|
||||||
#include <QString>
|
|
||||||
#include <QTimer>
|
|
||||||
#include <memory>
|
|
||||||
|
|
||||||
namespace impress {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief CapsLock 长按语音输入快捷键管理器
|
|
||||||
*
|
|
||||||
* 使用 freedesktop GlobalShortcuts D-Bus Portal 实现 Wayland 兼容的全局快捷键。
|
|
||||||
* 工作流程:
|
|
||||||
* 1. 用户长按 CapsLock 1 秒后触发录音
|
|
||||||
* 2. 长按期间持续录音
|
|
||||||
* 3. 松开 CapsLock 后停止录音并触发转写
|
|
||||||
* 4. 短按(< 1s)直接传递 CapsLock 事件(切换大小写锁定)
|
|
||||||
*
|
|
||||||
* 首次启动时需要用户通过 GNOME 对话框授权。
|
|
||||||
*/
|
|
||||||
class CapsLockVoiceHotkey : public QObject {
|
|
||||||
Q_OBJECT
|
|
||||||
public:
|
|
||||||
explicit CapsLockVoiceHotkey(QObject* parent = nullptr);
|
|
||||||
~CapsLockVoiceHotkey() override;
|
|
||||||
|
|
||||||
/** @brief 初始化并注册快捷键(首次需要用户授权) */
|
|
||||||
bool start();
|
|
||||||
|
|
||||||
/** @brief 停止并注销快捷键 */
|
|
||||||
void stop();
|
|
||||||
|
|
||||||
/** @brief 是否已激活 */
|
|
||||||
bool isActive() const { return active_; }
|
|
||||||
|
|
||||||
/** @brief 当前是否正在录音(CapsLock 长按超过 1s 后) */
|
|
||||||
bool isRecording() const { return recording_; }
|
|
||||||
|
|
||||||
signals:
|
|
||||||
/** @brief 开始录音(长按超过 1 秒后) */
|
|
||||||
void recordingStarted();
|
|
||||||
|
|
||||||
/** @brief 停止录音(松开快捷键后) */
|
|
||||||
void recordingStopped();
|
|
||||||
|
|
||||||
/** @brief 快捷键已注册(用户授权后) */
|
|
||||||
void ready();
|
|
||||||
|
|
||||||
/** @brief 初始化失败 */
|
|
||||||
void error(const QString& message);
|
|
||||||
|
|
||||||
private:
|
|
||||||
struct Impl;
|
|
||||||
std::unique_ptr<Impl> impl_;
|
|
||||||
bool active_ = false;
|
|
||||||
bool recording_ = false;
|
|
||||||
|
|
||||||
void handleSessionResponse(uint response, const QVariantMap& results);
|
|
||||||
void handleBindResponse(uint response, const QVariantMap& results);
|
|
||||||
void handleActivated(const QString& shortcutId);
|
|
||||||
void handleDeactivated(const QString& shortcutId);
|
|
||||||
void onPortalResponse(uint response, const QVariantMap& results);
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace impress
|
|
||||||
@ -12,10 +12,6 @@
|
|||||||
#include <QMutex>
|
#include <QMutex>
|
||||||
#include <QMutexLocker>
|
#include <QMutexLocker>
|
||||||
#include <QFileInfo>
|
#include <QFileInfo>
|
||||||
#include <QDir>
|
|
||||||
#include <QDateTime>
|
|
||||||
#include <QFile>
|
|
||||||
#include <QDataStream>
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
|
||||||
@ -26,54 +22,6 @@
|
|||||||
|
|
||||||
static const char* const kTag = "SenseVoiceEngine";
|
static const char* const kTag = "SenseVoiceEngine";
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief 简易 WAV 写入(不依赖 dr_wav,避免多定义冲突)
|
|
||||||
*/
|
|
||||||
static bool saveWav16(const QString& path, const std::vector<float>& samples, int sampleRate) {
|
|
||||||
QFile file(path);
|
|
||||||
if (!file.open(QIODevice::WriteOnly)) return false;
|
|
||||||
|
|
||||||
int numSamples = static_cast<int>(samples.size());
|
|
||||||
int dataSize = numSamples * 2; // 16-bit mono
|
|
||||||
int totalSize = 36 + dataSize; // RIFF header size
|
|
||||||
quint16 audioFormat = 1; // PCM
|
|
||||||
quint16 numChannels = 1;
|
|
||||||
quint32 byteRate = sampleRate * 2; // sampleRate * numChannels * bitsPerSample/8
|
|
||||||
quint16 blockAlign = 2; // numChannels * bitsPerSample/8
|
|
||||||
quint16 bitsPerSample = 16;
|
|
||||||
|
|
||||||
QDataStream out(&file);
|
|
||||||
out.setByteOrder(QDataStream::LittleEndian);
|
|
||||||
|
|
||||||
// RIFF header
|
|
||||||
out.writeRawData("RIFF", 4);
|
|
||||||
out << static_cast<quint32>(totalSize);
|
|
||||||
out.writeRawData("WAVE", 4);
|
|
||||||
|
|
||||||
// fmt chunk
|
|
||||||
out.writeRawData("fmt ", 4);
|
|
||||||
out << static_cast<quint32>(16); // chunk size
|
|
||||||
out << audioFormat;
|
|
||||||
out << numChannels;
|
|
||||||
out << static_cast<quint32>(sampleRate);
|
|
||||||
out << byteRate;
|
|
||||||
out << blockAlign;
|
|
||||||
out << bitsPerSample;
|
|
||||||
|
|
||||||
// data chunk
|
|
||||||
out.writeRawData("data", 4);
|
|
||||||
out << static_cast<quint32>(dataSize);
|
|
||||||
|
|
||||||
// PCM data (float → int16)
|
|
||||||
for (float s : samples) {
|
|
||||||
s = std::max(-1.0f, std::min(1.0f, s)); // clip
|
|
||||||
qint16 val = static_cast<qint16>(s * 32767.0f);
|
|
||||||
out << val;
|
|
||||||
}
|
|
||||||
|
|
||||||
return file.error() == QFile::FileError::NoError;
|
|
||||||
}
|
|
||||||
|
|
||||||
namespace impress {
|
namespace impress {
|
||||||
|
|
||||||
/** 语言代码映射 */
|
/** 语言代码映射 */
|
||||||
@ -279,11 +227,6 @@ bool SenseVoiceEngine::isLoaded() const {
|
|||||||
return loaded_;
|
return loaded_;
|
||||||
}
|
}
|
||||||
|
|
||||||
void SenseVoiceEngine::setDebugSaveAudio(bool enable) {
|
|
||||||
debugSaveAudio_ = enable;
|
|
||||||
LOG_INFO(kTag, QString("调试录音保存: %1").arg(enable ? "开启" : "关闭"));
|
|
||||||
}
|
|
||||||
|
|
||||||
/** CTC 贪婪解码:去重 + 去除空白 */
|
/** CTC 贪婪解码:去重 + 去除空白 */
|
||||||
static std::vector<int> ctcGreedyDecode(const std::vector<int>& tokens, int blankToken) {
|
static std::vector<int> ctcGreedyDecode(const std::vector<int>& tokens, int blankToken) {
|
||||||
std::vector<int> result;
|
std::vector<int> result;
|
||||||
@ -343,53 +286,9 @@ RecognitionResult SenseVoiceEngine::infer(const std::vector<float>& samples,
|
|||||||
try {
|
try {
|
||||||
// 1. 重采样到 16kHz
|
// 1. 重采样到 16kHz
|
||||||
Timer preprocessTimer;
|
Timer preprocessTimer;
|
||||||
|
|
||||||
// 调试模式:保存原始音频到 WAV 文件
|
|
||||||
if (debugSaveAudio_ && !samples.empty()) {
|
|
||||||
QString debugDir = "/tmp/impress_audio_debug";
|
|
||||||
QDir dir;
|
|
||||||
if (!dir.exists(debugDir)) {
|
|
||||||
dir.mkpath(debugDir);
|
|
||||||
}
|
|
||||||
QString timestamp = QDateTime::currentDateTime().toString("yyyyMMdd_HHmmss_zzz");
|
|
||||||
QString wavPath = QString("%1/audio_%2_%3Hz.wav")
|
|
||||||
.arg(debugDir).arg(timestamp).arg(sampleRate);
|
|
||||||
|
|
||||||
if (saveWav16(wavPath, samples, sampleRate)) {
|
|
||||||
LOG_DEBUG(kTag, QString("调试音频已保存: %1 (%2 样本, %3Hz)")
|
|
||||||
.arg(wavPath).arg(samples.size()).arg(sampleRate));
|
|
||||||
} else {
|
|
||||||
LOG_WARNING(kTag, QString("无法创建调试音频文件: %1").arg(wavPath));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<float> processedSamples = samples;
|
std::vector<float> processedSamples = samples;
|
||||||
int currentSampleRate = sampleRate;
|
int currentSampleRate = sampleRate;
|
||||||
|
|
||||||
// 计算输入音频 RMS 电平用于诊断
|
|
||||||
double rms = 0.0;
|
|
||||||
bool hasNaN = false;
|
|
||||||
for (float s : samples) {
|
|
||||||
if (std::isnan(s) || std::isinf(s)) { hasNaN = true; break; }
|
|
||||||
rms += s * s;
|
|
||||||
}
|
|
||||||
rms = std::sqrt(rms / samples.size());
|
|
||||||
|
|
||||||
if (hasNaN) {
|
|
||||||
result.text = "[错误] 输入音频包含 NaN/Inf 值,请检查麦克风设备";
|
|
||||||
result.latency_ms = timer.elapsedMs();
|
|
||||||
LOG_ERROR(kTag, QString("输入音频包含无效值 (NaN/Inf), 样本数: %1").arg(samples.size()));
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
if (rms < 1e-6) {
|
|
||||||
result.text = "";
|
|
||||||
result.latency_ms = timer.elapsedMs();
|
|
||||||
LOG_DEBUG(kTag, QString("静音段 (RMS: %1), 跳过推理").arg(rms, 0, 'f', 6));
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
LOG_DEBUG(kTag, QString("输入音频 RMS: %1 (样本数: %2)").arg(rms, 0, 'f', 6).arg(samples.size()));
|
|
||||||
|
|
||||||
if (sampleRate != 16000) {
|
if (sampleRate != 16000) {
|
||||||
AudioProcessor processor(16000);
|
AudioProcessor processor(16000);
|
||||||
processedSamples = processor.resample(samples, sampleRate);
|
processedSamples = processor.resample(samples, sampleRate);
|
||||||
@ -471,14 +370,13 @@ RecognitionResult SenseVoiceEngine::infer(const std::vector<float>& samples,
|
|||||||
|
|
||||||
for (int t = 0; t < seqLen; t++) {
|
for (int t = 0; t < seqLen; t++) {
|
||||||
int offset = t * vocabSize;
|
int offset = t * vocabSize;
|
||||||
int bestAbsIdx = argmax(logitsData, offset, offset + vocabSize);
|
int bestToken = argmax(logitsData, offset, offset + vocabSize);
|
||||||
int bestToken = bestAbsIdx - offset; // 绝对索引 → token ID
|
|
||||||
|
|
||||||
if (bestToken != SenseVoiceTokenizer::kTokenBlank) {
|
if (bestToken != SenseVoiceTokenizer::kTokenBlank) {
|
||||||
rawTokens.push_back(bestToken);
|
rawTokens.push_back(bestToken);
|
||||||
|
|
||||||
// 计算置信度
|
// 计算置信度
|
||||||
float maxLogit = logitsData[bestAbsIdx];
|
float maxLogit = logitsData[offset + bestToken];
|
||||||
// 近似置信度: 使用 softmax 的最大值位置
|
// 近似置信度: 使用 softmax 的最大值位置
|
||||||
totalConf += maxLogit;
|
totalConf += maxLogit;
|
||||||
confCount++;
|
confCount++;
|
||||||
@ -500,8 +398,8 @@ RecognitionResult SenseVoiceEngine::infer(const std::vector<float>& samples,
|
|||||||
result.text = "";
|
result.text = "";
|
||||||
} else if (impl_->tokenizer.isLoaded()) {
|
} else if (impl_->tokenizer.isLoaded()) {
|
||||||
result.text = impl_->tokenizer.decode(decodedTokens);
|
result.text = impl_->tokenizer.decode(decodedTokens);
|
||||||
LOG_DEBUG(kTag, QString("解码文本: %1 个 token → %2 字符: %3")
|
LOG_DEBUG(kTag, QString("解码文本: %1 个 token → %2 字符")
|
||||||
.arg(decodedTokens.size()).arg(result.text.length()).arg(result.text));
|
.arg(decodedTokens.size()).arg(result.text.length()));
|
||||||
} else {
|
} else {
|
||||||
// 降级:输出 token ID
|
// 降级:输出 token ID
|
||||||
QString decodedText;
|
QString decodedText;
|
||||||
|
|||||||
@ -48,9 +48,6 @@ public:
|
|||||||
int sampleRate,
|
int sampleRate,
|
||||||
const QString& language = QString());
|
const QString& language = QString());
|
||||||
|
|
||||||
/** @brief 设置调试模式:开启后每次推理保存音频到 WAV */
|
|
||||||
void setDebugSaveAudio(bool enable);
|
|
||||||
|
|
||||||
signals:
|
signals:
|
||||||
void modelLoaded(const QString& modelPath);
|
void modelLoaded(const QString& modelPath);
|
||||||
void modelLoadError(const QString& modelPath, const QString& error);
|
void modelLoadError(const QString& modelPath, const QString& error);
|
||||||
@ -61,7 +58,6 @@ private:
|
|||||||
struct Impl;
|
struct Impl;
|
||||||
std::unique_ptr<Impl> impl_;
|
std::unique_ptr<Impl> impl_;
|
||||||
bool loaded_ = false;
|
bool loaded_ = false;
|
||||||
bool debugSaveAudio_ = false;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace impress
|
} // namespace impress
|
||||||
|
|||||||
@ -1,236 +0,0 @@
|
|||||||
#include "voice_input_service.h"
|
|
||||||
#include "audio/audio_capture.h"
|
|
||||||
#include "sense_voice_engine.h"
|
|
||||||
#include "caps_lock_voice_hotkey.h"
|
|
||||||
#include "wayland_text_injector.h"
|
|
||||||
#include "app/config_manager.h"
|
|
||||||
#include "utils/logger.h"
|
|
||||||
|
|
||||||
#include <QThread>
|
|
||||||
#include <QTimer>
|
|
||||||
#include <QtConcurrent>
|
|
||||||
|
|
||||||
static const char* const kTag = "VoiceInputService";
|
|
||||||
|
|
||||||
namespace impress {
|
|
||||||
|
|
||||||
struct VoiceInputService::Impl {
|
|
||||||
AudioCapture* audioCapture = nullptr;
|
|
||||||
SenseVoiceEngine* sttEngine = nullptr;
|
|
||||||
CapsLockVoiceHotkey* hotkey = nullptr;
|
|
||||||
WaylandTextInjector* injector = nullptr;
|
|
||||||
};
|
|
||||||
|
|
||||||
VoiceInputService::VoiceInputService(ConfigManager* configManager, QObject* parent)
|
|
||||||
: QObject(parent)
|
|
||||||
, configManager_(configManager)
|
|
||||||
, impl_(std::make_unique<Impl>())
|
|
||||||
{
|
|
||||||
longPressTimer_ = new QTimer(this);
|
|
||||||
longPressTimer_->setSingleShot(true);
|
|
||||||
connect(longPressTimer_, &QTimer::timeout, this, [this]() {
|
|
||||||
// 长按超时仍未松开 → 确认为长按录音
|
|
||||||
if (!longPressDetected_) {
|
|
||||||
longPressDetected_ = true;
|
|
||||||
emit statusChanged("正在录音...");
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
VoiceInputService::~VoiceInputService() {
|
|
||||||
stop();
|
|
||||||
}
|
|
||||||
|
|
||||||
bool VoiceInputService::start() {
|
|
||||||
if (running_) return true;
|
|
||||||
|
|
||||||
// 1. 初始化音频采集
|
|
||||||
impl_->audioCapture = new AudioCapture(this);
|
|
||||||
connect(impl_->audioCapture, &AudioCapture::audioDataReady,
|
|
||||||
this, &VoiceInputService::onAudioData);
|
|
||||||
|
|
||||||
// 2. 初始化 STT 引擎并加载模型
|
|
||||||
impl_->sttEngine = new SenseVoiceEngine(this);
|
|
||||||
|
|
||||||
// 从配置加载模型
|
|
||||||
QString modelPath = configManager_->get("stt.model_path").toString();
|
|
||||||
QString tokensPath = configManager_->get("stt.tokens_path").toString();
|
|
||||||
QString device = configManager_->get("stt.device").toString();
|
|
||||||
int numThreads = configManager_->get("stt.num_threads").toInt();
|
|
||||||
|
|
||||||
if (!modelPath.isEmpty()) {
|
|
||||||
LOG_INFO(kTag, QString("正在加载 STT 模型: %1").arg(modelPath));
|
|
||||||
bool modelLoaded = impl_->sttEngine->loadModelSync(modelPath, tokensPath, device, numThreads);
|
|
||||||
if (!modelLoaded) {
|
|
||||||
emit error(QString("STT 模型加载失败: %1").arg(modelPath));
|
|
||||||
LOG_ERROR(kTag, "STT 模型加载失败");
|
|
||||||
} else {
|
|
||||||
LOG_INFO(kTag, "STT 模型加载成功");
|
|
||||||
// 同步调试音频设置
|
|
||||||
bool debugSave = configManager_->get("stt.debug_save_audio").toBool();
|
|
||||||
impl_->sttEngine->setDebugSaveAudio(debugSave);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
LOG_WARNING(kTag, "模型路径为空,请先在配置中设置模型路径");
|
|
||||||
}
|
|
||||||
|
|
||||||
// 3. 初始化全局快捷键
|
|
||||||
impl_->hotkey = new CapsLockVoiceHotkey(this);
|
|
||||||
connect(impl_->hotkey, &CapsLockVoiceHotkey::recordingStarted,
|
|
||||||
this, &VoiceInputService::onHotkeyActivated);
|
|
||||||
connect(impl_->hotkey, &CapsLockVoiceHotkey::recordingStopped,
|
|
||||||
this, &VoiceInputService::onHotkeyDeactivated);
|
|
||||||
connect(impl_->hotkey, &CapsLockVoiceHotkey::ready,
|
|
||||||
this, [this]() {
|
|
||||||
emit statusChanged("语音输入就绪(快捷键已注册)");
|
|
||||||
});
|
|
||||||
connect(impl_->hotkey, &CapsLockVoiceHotkey::error,
|
|
||||||
this, &VoiceInputService::error);
|
|
||||||
|
|
||||||
// 4. 初始化文本注入器
|
|
||||||
impl_->injector = new WaylandTextInjector(this);
|
|
||||||
if (!impl_->injector->initialize()) {
|
|
||||||
emit error("文本注入器初始化失败,无法注入识别结果");
|
|
||||||
LOG_ERROR(kTag, "文本注入器初始化失败");
|
|
||||||
}
|
|
||||||
|
|
||||||
// 启动快捷键(首次会弹出授权对话框)
|
|
||||||
if (!impl_->hotkey->start()) {
|
|
||||||
emit error("全局快捷键启动失败");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
running_ = true;
|
|
||||||
emit statusChanged("语音输入已启动(等待授权...)");
|
|
||||||
LOG_INFO(kTag, "语音输入服务已启动");
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
void VoiceInputService::stop() {
|
|
||||||
if (!running_) return;
|
|
||||||
|
|
||||||
longPressTimer_->stop();
|
|
||||||
|
|
||||||
if (impl_->audioCapture) {
|
|
||||||
impl_->audioCapture->stop();
|
|
||||||
}
|
|
||||||
if (impl_->sttEngine) {
|
|
||||||
impl_->sttEngine->unloadModel();
|
|
||||||
}
|
|
||||||
if (impl_->hotkey) {
|
|
||||||
impl_->hotkey->stop();
|
|
||||||
}
|
|
||||||
|
|
||||||
running_ = false;
|
|
||||||
recording_ = false;
|
|
||||||
longPressDetected_ = false;
|
|
||||||
audioBuffer_.clear();
|
|
||||||
|
|
||||||
LOG_INFO(kTag, "语音输入服务已停止");
|
|
||||||
}
|
|
||||||
|
|
||||||
void VoiceInputService::onHotkeyActivated() {
|
|
||||||
LOG_DEBUG(kTag, "快捷键激活(按下)");
|
|
||||||
recording_ = true;
|
|
||||||
longPressDetected_ = false;
|
|
||||||
audioBuffer_.clear();
|
|
||||||
|
|
||||||
// 启动长按定时器
|
|
||||||
longPressTimer_->start(longPressThreshold_);
|
|
||||||
|
|
||||||
// 开始音频采集(后台预采集)
|
|
||||||
int deviceIndex = -1; // 默认设备
|
|
||||||
impl_->audioCapture->start(deviceIndex, 16000, 20);
|
|
||||||
|
|
||||||
emit statusChanged("等待长按确认...");
|
|
||||||
}
|
|
||||||
|
|
||||||
void VoiceInputService::onHotkeyDeactivated() {
|
|
||||||
LOG_DEBUG(kTag, "快捷键停用(松开)");
|
|
||||||
recording_ = false;
|
|
||||||
longPressTimer_->stop();
|
|
||||||
|
|
||||||
// 停止音频采集
|
|
||||||
if (impl_->audioCapture && impl_->audioCapture->isRunning()) {
|
|
||||||
impl_->audioCapture->stop();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!longPressDetected_) {
|
|
||||||
// 短按 → 模拟 CapsLock 按键
|
|
||||||
LOG_DEBUG(kTag, "短按,模拟 CapsLock");
|
|
||||||
simulateCapsLock();
|
|
||||||
emit statusChanged("短按:切换 CapsLock");
|
|
||||||
} else {
|
|
||||||
// 长按 → 停止录音并转写
|
|
||||||
stopRecordingAndTranscribe();
|
|
||||||
}
|
|
||||||
|
|
||||||
longPressDetected_ = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
void VoiceInputService::onAudioData(const std::vector<float>& samples, int sampleRate) {
|
|
||||||
if (!recording_) return;
|
|
||||||
|
|
||||||
audioSampleRate_ = sampleRate;
|
|
||||||
audioBuffer_.insert(audioBuffer_.end(), samples.begin(), samples.end());
|
|
||||||
}
|
|
||||||
|
|
||||||
void VoiceInputService::stopRecordingAndTranscribe() {
|
|
||||||
if (audioBuffer_.empty()) {
|
|
||||||
emit statusChanged("未检测到音频输入");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
emit statusChanged("正在识别...");
|
|
||||||
|
|
||||||
QString language = configManager_->get("stt.language").toString();
|
|
||||||
|
|
||||||
(void)QtConcurrent::run([this, buffer = audioBuffer_, lang = language]() {
|
|
||||||
QString text;
|
|
||||||
|
|
||||||
if (!impl_->sttEngine->isLoaded()) {
|
|
||||||
LOG_WARNING(kTag, "模型未加载,跳过推理");
|
|
||||||
text = "[错误] 模型未加载,请先在配置中设置模型路径";
|
|
||||||
} else {
|
|
||||||
auto result = impl_->sttEngine->infer(buffer, audioSampleRate_, lang);
|
|
||||||
text = result.text;
|
|
||||||
}
|
|
||||||
|
|
||||||
QMetaObject::invokeMethod(this, [this, text]() {
|
|
||||||
onRecognitionComplete(text);
|
|
||||||
}, Qt::QueuedConnection);
|
|
||||||
});
|
|
||||||
|
|
||||||
audioBuffer_.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
void VoiceInputService::onRecognitionComplete(const QString& text) {
|
|
||||||
if (text.isEmpty()) {
|
|
||||||
emit statusChanged("识别结果:无语音输入");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
emit recognitionResult(text);
|
|
||||||
emit statusChanged(QString("识别结果: %1").arg(text));
|
|
||||||
|
|
||||||
// 注入文本到光标位置
|
|
||||||
if (impl_->injector && impl_->injector->isInitialized()) {
|
|
||||||
impl_->injector->injectText(text);
|
|
||||||
LOG_INFO(kTag, QString("文本已注入: %1").arg(text));
|
|
||||||
} else {
|
|
||||||
LOG_WARNING(kTag, "文本注入器未就绪,无法注入");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void VoiceInputService::simulateCapsLock() {
|
|
||||||
if (impl_->injector && impl_->injector->isInitialized()) {
|
|
||||||
// CapsLock keysym = 0xffe5
|
|
||||||
unsigned int capslockKeysym = 0xffe5;
|
|
||||||
impl_->injector->simulateKeycode(capslockKeysym);
|
|
||||||
LOG_DEBUG(kTag, "模拟 CapsLock 按键已注入");
|
|
||||||
} else {
|
|
||||||
LOG_WARNING(kTag, "文本注入器未初始化,无法模拟 CapsLock");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace impress
|
|
||||||
@ -1,80 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include <QObject>
|
|
||||||
#include <QString>
|
|
||||||
#include <QTimer>
|
|
||||||
#include <vector>
|
|
||||||
#include <memory>
|
|
||||||
|
|
||||||
namespace impress {
|
|
||||||
|
|
||||||
class AudioCapture;
|
|
||||||
class SenseVoiceEngine;
|
|
||||||
class CapsLockVoiceHotkey;
|
|
||||||
class WaylandTextInjector;
|
|
||||||
class ConfigManager;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief CapsLock 语音输入服务
|
|
||||||
*
|
|
||||||
* 协调全局快捷键、音频采集、STT 推理和文本注入。
|
|
||||||
* 状态机:
|
|
||||||
* 1. 空闲 → 按下快捷键 → 开始录音
|
|
||||||
* 2. 长按超过 1 秒 → 开始正式录音(清除之前的静音段)
|
|
||||||
* 3. 松开快捷键 → 停止录音 → 推理 → 注入文本
|
|
||||||
* 4. 短按(< 1 秒)→ 注入 CapsLock 按键(切换大小写)
|
|
||||||
*/
|
|
||||||
class VoiceInputService : public QObject {
|
|
||||||
Q_OBJECT
|
|
||||||
public:
|
|
||||||
explicit VoiceInputService(ConfigManager* configManager, QObject* parent = nullptr);
|
|
||||||
~VoiceInputService() override;
|
|
||||||
|
|
||||||
/** @brief 启动服务(初始化所有组件) */
|
|
||||||
bool start();
|
|
||||||
|
|
||||||
/** @brief 停止服务 */
|
|
||||||
void stop();
|
|
||||||
|
|
||||||
/** @brief 是否已启动 */
|
|
||||||
bool isRunning() const { return running_; }
|
|
||||||
|
|
||||||
/** @brief 是否正在录音 */
|
|
||||||
bool isRecording() const { return recording_; }
|
|
||||||
|
|
||||||
/** @brief 长按阈值(毫秒),默认 1000ms */
|
|
||||||
void setLongPressThreshold(int ms) { longPressThreshold_ = ms; }
|
|
||||||
int longPressThreshold() const { return longPressThreshold_; }
|
|
||||||
|
|
||||||
signals:
|
|
||||||
void statusChanged(const QString& status);
|
|
||||||
void recognitionResult(const QString& text);
|
|
||||||
void error(const QString& message);
|
|
||||||
|
|
||||||
private slots:
|
|
||||||
void onHotkeyActivated();
|
|
||||||
void onHotkeyDeactivated();
|
|
||||||
void onAudioData(const std::vector<float>& samples, int sampleRate);
|
|
||||||
void onRecognitionComplete(const QString& text);
|
|
||||||
|
|
||||||
private:
|
|
||||||
struct Impl;
|
|
||||||
ConfigManager* configManager_ = nullptr;
|
|
||||||
std::unique_ptr<Impl> impl_;
|
|
||||||
|
|
||||||
bool running_ = false;
|
|
||||||
bool recording_ = false;
|
|
||||||
bool longPressDetected_ = false;
|
|
||||||
int longPressThreshold_ = 1000;
|
|
||||||
|
|
||||||
std::vector<float> audioBuffer_;
|
|
||||||
int audioSampleRate_ = 16000;
|
|
||||||
|
|
||||||
QTimer* longPressTimer_ = nullptr;
|
|
||||||
|
|
||||||
void startRecording();
|
|
||||||
void stopRecordingAndTranscribe();
|
|
||||||
void simulateCapsLock();
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace impress
|
|
||||||
@ -1,181 +0,0 @@
|
|||||||
#include "wayland_text_injector.h"
|
|
||||||
#include "utils/logger.h"
|
|
||||||
|
|
||||||
#include <QLibrary>
|
|
||||||
#include <QThread>
|
|
||||||
|
|
||||||
static const char* const kTag = "WaylandTextInjector";
|
|
||||||
|
|
||||||
namespace impress {
|
|
||||||
|
|
||||||
// XTest 函数指针类型
|
|
||||||
typedef int (*XTestFakeKeyEventFn)(void* display, unsigned int keycode,
|
|
||||||
int is_press, unsigned long delay);
|
|
||||||
typedef void* (*XOpenDisplayFn)(const char* display_name);
|
|
||||||
typedef int (*XCloseDisplayFn)(void* display);
|
|
||||||
typedef unsigned int (*XKeysymToKeycodeFn)(void* display, unsigned long keysym);
|
|
||||||
typedef unsigned long (*XStringToKeysymFn)(const char* str);
|
|
||||||
typedef int (*XSyncFn)(void* display, int discard);
|
|
||||||
|
|
||||||
struct WaylandTextInjector::Impl {
|
|
||||||
QLibrary x11Lib;
|
|
||||||
QLibrary xtstLib;
|
|
||||||
|
|
||||||
XOpenDisplayFn XOpenDisplay = nullptr;
|
|
||||||
XCloseDisplayFn XCloseDisplay = nullptr;
|
|
||||||
XKeysymToKeycodeFn XKeysymToKeycode = nullptr;
|
|
||||||
XStringToKeysymFn XStringToKeysym = nullptr;
|
|
||||||
XSyncFn XSyncFnPtr = nullptr;
|
|
||||||
XTestFakeKeyEventFn XTestFakeKeyEvent = nullptr;
|
|
||||||
|
|
||||||
void* display = nullptr;
|
|
||||||
|
|
||||||
bool loadLibraries() {
|
|
||||||
// 加载 libX11
|
|
||||||
x11Lib.setFileName("libX11.so.6");
|
|
||||||
if (!x11Lib.load()) {
|
|
||||||
LOG_ERROR(kTag, QString("无法加载 libX11: %1").arg(x11Lib.errorString()));
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// 加载 libXtst
|
|
||||||
xtstLib.setFileName("libXtst.so.6");
|
|
||||||
if (!xtstLib.load()) {
|
|
||||||
LOG_ERROR(kTag, QString("无法加载 libXtst: %1").arg(xtstLib.errorString()));
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// 解析 X11 符号
|
|
||||||
XOpenDisplay = reinterpret_cast<XOpenDisplayFn>(x11Lib.resolve("XOpenDisplay"));
|
|
||||||
XCloseDisplay = reinterpret_cast<XCloseDisplayFn>(x11Lib.resolve("XCloseDisplay"));
|
|
||||||
XKeysymToKeycode = reinterpret_cast<XKeysymToKeycodeFn>(x11Lib.resolve("XKeysymToKeycode"));
|
|
||||||
XStringToKeysym = reinterpret_cast<XStringToKeysymFn>(x11Lib.resolve("XStringToKeysym"));
|
|
||||||
XSyncFnPtr = reinterpret_cast<XSyncFn>(x11Lib.resolve("XSync"));
|
|
||||||
|
|
||||||
// 解析 XTest 符号
|
|
||||||
XTestFakeKeyEvent = reinterpret_cast<XTestFakeKeyEventFn>(
|
|
||||||
xtstLib.resolve("XTestFakeKeyEvent"));
|
|
||||||
|
|
||||||
if (!XOpenDisplay || !XCloseDisplay || !XKeysymToKeycode ||
|
|
||||||
!XSyncFnPtr || !XTestFakeKeyEvent) {
|
|
||||||
LOG_ERROR(kTag, "无法解析 X11/XTest 符号");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// 打开 X11 显示(通过 XWayland)
|
|
||||||
display = XOpenDisplay(nullptr);
|
|
||||||
if (!display) {
|
|
||||||
display = XOpenDisplay(":0");
|
|
||||||
}
|
|
||||||
if (!display) {
|
|
||||||
LOG_ERROR(kTag, "无法连接 X11 显示(XWayland)");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
LOG_INFO(kTag, "XTest 文本注入器已初始化");
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
WaylandTextInjector::WaylandTextInjector(QObject* parent)
|
|
||||||
: QObject(parent)
|
|
||||||
, impl_(std::make_unique<Impl>())
|
|
||||||
{}
|
|
||||||
|
|
||||||
WaylandTextInjector::~WaylandTextInjector() {
|
|
||||||
if (impl_->display && impl_->XCloseDisplay) {
|
|
||||||
impl_->XCloseDisplay(impl_->display);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool WaylandTextInjector::initialize() {
|
|
||||||
if (initialized_) return true;
|
|
||||||
initialized_ = impl_->loadLibraries();
|
|
||||||
return initialized_;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool WaylandTextInjector::injectText(const QString& text) {
|
|
||||||
if (!initialized_) {
|
|
||||||
LOG_ERROR(kTag, "文本注入器未初始化");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (text.isEmpty()) return true;
|
|
||||||
|
|
||||||
LOG_DEBUG(kTag, QString("注入文本 (%1 字符): %2").arg(text.length()).arg(text));
|
|
||||||
|
|
||||||
for (int i = 0; i < text.length(); i++) {
|
|
||||||
if (!injectChar(text[i])) {
|
|
||||||
LOG_WARNING(kTag, QString("字符注入失败: '%1'").arg(text[i]));
|
|
||||||
}
|
|
||||||
// 字符间短暂延迟
|
|
||||||
QThread::usleep(10000); // 10ms
|
|
||||||
}
|
|
||||||
|
|
||||||
LOG_DEBUG(kTag, "文本注入完成");
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool WaylandTextInjector::injectChar(QChar ch) {
|
|
||||||
if (!impl_->display) return false;
|
|
||||||
|
|
||||||
// 处理常见字符映射
|
|
||||||
unsigned long keysym;
|
|
||||||
if (ch.isLetterOrNumber() || ch.isPunct() || ch.isSymbol()) {
|
|
||||||
// ASCII 字符直接使用 keysym
|
|
||||||
keysym = ch.unicode();
|
|
||||||
} else if (ch == '\n' || ch == '\r') {
|
|
||||||
keysym = 0xff0d; // XK_Return
|
|
||||||
} else if (ch == '\t') {
|
|
||||||
keysym = 0xff09; // XK_Tab
|
|
||||||
} else if (ch == ' ') {
|
|
||||||
keysym = 0x020; // XK_space
|
|
||||||
} else {
|
|
||||||
// 尝试通过 XStringToKeysym 解析
|
|
||||||
QByteArray ba = QString(ch).toUtf8();
|
|
||||||
keysym = impl_->XStringToKeysym(ba.constData());
|
|
||||||
if (keysym == 0) {
|
|
||||||
return false; // 不支持的字符
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned int keycode = impl_->XKeysymToKeycode(impl_->display, keysym);
|
|
||||||
if (keycode == 0) return false;
|
|
||||||
|
|
||||||
// Shift 处理(大写字母需要按住 Shift)
|
|
||||||
bool needShift = ch.isUpper() && ch.isLetter();
|
|
||||||
if (needShift) {
|
|
||||||
unsigned int shiftCode = impl_->XKeysymToKeycode(impl_->display, 0xffe1); // XK_Shift_L
|
|
||||||
if (shiftCode) {
|
|
||||||
impl_->XTestFakeKeyEvent(impl_->display, shiftCode, 1, 0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// 按键按下 + 释放
|
|
||||||
impl_->XTestFakeKeyEvent(impl_->display, keycode, 1, 0);
|
|
||||||
impl_->XTestFakeKeyEvent(impl_->display, keycode, 0, 0);
|
|
||||||
|
|
||||||
if (needShift) {
|
|
||||||
unsigned int shiftCode = impl_->XKeysymToKeycode(impl_->display, 0xffe1);
|
|
||||||
if (shiftCode) {
|
|
||||||
impl_->XTestFakeKeyEvent(impl_->display, shiftCode, 0, 0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl_->XSyncFnPtr(impl_->display, 0);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool WaylandTextInjector::simulateKeycode(unsigned int keycode) {
|
|
||||||
if (!impl_->display || !impl_->XTestFakeKeyEvent) return false;
|
|
||||||
|
|
||||||
LOG_DEBUG(kTag, QString("模拟 keycode: 0x%1").arg(keycode, 0, 16));
|
|
||||||
|
|
||||||
// 按下 + 释放
|
|
||||||
impl_->XTestFakeKeyEvent(impl_->display, keycode, 1, 0);
|
|
||||||
impl_->XTestFakeKeyEvent(impl_->display, keycode, 0, 0);
|
|
||||||
impl_->XSyncFnPtr(impl_->display, 0);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace impress
|
|
||||||
@ -1,44 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include <QObject>
|
|
||||||
#include <QString>
|
|
||||||
#include <memory>
|
|
||||||
|
|
||||||
namespace impress {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief 文本注入器
|
|
||||||
*
|
|
||||||
* 通过 XTest (XWayland) 或 RemoteDesktop Portal 将文本注入到当前光标位置。
|
|
||||||
* 使用 dlopen 动态加载 libXtst,无需编译时依赖 XTest 头文件。
|
|
||||||
*/
|
|
||||||
class WaylandTextInjector : public QObject {
|
|
||||||
Q_OBJECT
|
|
||||||
public:
|
|
||||||
explicit WaylandTextInjector(QObject* parent = nullptr);
|
|
||||||
~WaylandTextInjector() override;
|
|
||||||
|
|
||||||
/** @brief 初始化(加载 XTest 库) */
|
|
||||||
bool initialize();
|
|
||||||
|
|
||||||
/** @brief 将文本注入到当前光标位置 */
|
|
||||||
bool injectText(const QString& text);
|
|
||||||
|
|
||||||
/** @brief 是否已初始化 */
|
|
||||||
bool isInitialized() const { return initialized_; }
|
|
||||||
|
|
||||||
/** @brief 模拟 X11 keycode 按下+释放(用于 CapsLock 等系统按键) */
|
|
||||||
bool simulateKeycode(unsigned int keycode);
|
|
||||||
|
|
||||||
signals:
|
|
||||||
void error(const QString& message);
|
|
||||||
|
|
||||||
private:
|
|
||||||
struct Impl;
|
|
||||||
std::unique_ptr<Impl> impl_;
|
|
||||||
bool initialized_ = false;
|
|
||||||
|
|
||||||
bool injectChar(QChar ch);
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace impress
|
|
||||||
@ -174,10 +174,6 @@ void FileTranscribePage::onStartTranscribe() {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 从配置同步调试开关到引擎
|
|
||||||
sttEngine_->setDebugSaveAudio(
|
|
||||||
configManager_->get("stt.debug_save_audio").toBool());
|
|
||||||
|
|
||||||
isTranscribing_ = true;
|
isTranscribing_ = true;
|
||||||
currentTaskIndex_ = 0;
|
currentTaskIndex_ = 0;
|
||||||
progressBar_->setVisible(true);
|
progressBar_->setVisible(true);
|
||||||
|
|||||||
@ -2,7 +2,6 @@
|
|||||||
#include "stt_test_page.h"
|
#include "stt_test_page.h"
|
||||||
#include "file_transcribe_page.h"
|
#include "file_transcribe_page.h"
|
||||||
#include "settings_page.h"
|
#include "settings_page.h"
|
||||||
#include "core/voice_input_service.h"
|
|
||||||
#include "app/config_manager.h"
|
#include "app/config_manager.h"
|
||||||
#include "utils/logger.h"
|
#include "utils/logger.h"
|
||||||
|
|
||||||
@ -27,28 +26,6 @@ MainWindow::MainWindow(ConfigManager* configManager, QWidget* parent)
|
|||||||
setupMenuBar();
|
setupMenuBar();
|
||||||
loadStyleSheet();
|
loadStyleSheet();
|
||||||
|
|
||||||
// 初始化语音输入服务
|
|
||||||
voiceInputService_ = new VoiceInputService(configManager_, this);
|
|
||||||
connect(voiceInputService_, &VoiceInputService::statusChanged,
|
|
||||||
this, [this](const QString& status) {
|
|
||||||
LOG_DEBUG(kTag, QString("语音输入状态: %1").arg(status));
|
|
||||||
});
|
|
||||||
connect(voiceInputService_, &VoiceInputService::error,
|
|
||||||
this, [this](const QString& err) {
|
|
||||||
LOG_ERROR(kTag, err);
|
|
||||||
});
|
|
||||||
connect(voiceInputService_, &VoiceInputService::recognitionResult,
|
|
||||||
this, [this](const QString& text) {
|
|
||||||
LOG_INFO(kTag, QString("语音识别结果: %1").arg(text));
|
|
||||||
});
|
|
||||||
|
|
||||||
// 监听配置变化,动态启停语音输入服务
|
|
||||||
connect(configManager_, &ConfigManager::configChanged,
|
|
||||||
this, &MainWindow::onVoiceInputConfigChanged);
|
|
||||||
|
|
||||||
// 启动时检查配置
|
|
||||||
onVoiceInputConfigChanged();
|
|
||||||
|
|
||||||
LOG_INFO(kTag, "主窗口已创建");
|
LOG_INFO(kTag, "主窗口已创建");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -102,24 +79,8 @@ void MainWindow::loadStyleSheet() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void MainWindow::closeEvent(QCloseEvent* event) {
|
void MainWindow::closeEvent(QCloseEvent* event) {
|
||||||
if (voiceInputService_) {
|
|
||||||
voiceInputService_->stop();
|
|
||||||
}
|
|
||||||
LOG_INFO(kTag, "主窗口关闭");
|
LOG_INFO(kTag, "主窗口关闭");
|
||||||
QMainWindow::closeEvent(event);
|
QMainWindow::closeEvent(event);
|
||||||
}
|
}
|
||||||
|
|
||||||
void MainWindow::onVoiceInputConfigChanged() {
|
|
||||||
if (!voiceInputService_) return;
|
|
||||||
|
|
||||||
bool enabled = configManager_->get("stt.capslock_voice_enabled").toBool();
|
|
||||||
if (enabled && !voiceInputService_->isRunning()) {
|
|
||||||
voiceInputService_->start();
|
|
||||||
LOG_INFO(kTag, "CapsLock 语音输入已启用");
|
|
||||||
} else if (!enabled && voiceInputService_->isRunning()) {
|
|
||||||
voiceInputService_->stop();
|
|
||||||
LOG_INFO(kTag, "CapsLock 语音输入已关闭");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace impress
|
} // namespace impress
|
||||||
|
|||||||
@ -10,7 +10,6 @@ class ConfigManager;
|
|||||||
class STTTestPage;
|
class STTTestPage;
|
||||||
class FileTranscribePage;
|
class FileTranscribePage;
|
||||||
class SettingsPage;
|
class SettingsPage;
|
||||||
class VoiceInputService;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief 主窗口
|
* @brief 主窗口
|
||||||
@ -30,10 +29,8 @@ private:
|
|||||||
void setupUI();
|
void setupUI();
|
||||||
void setupMenuBar();
|
void setupMenuBar();
|
||||||
void loadStyleSheet();
|
void loadStyleSheet();
|
||||||
void onVoiceInputConfigChanged();
|
|
||||||
|
|
||||||
ConfigManager* configManager_;
|
ConfigManager* configManager_;
|
||||||
VoiceInputService* voiceInputService_;
|
|
||||||
STTTestPage* sttPage_;
|
STTTestPage* sttPage_;
|
||||||
FileTranscribePage* transcribePage_;
|
FileTranscribePage* transcribePage_;
|
||||||
SettingsPage* settingsPage_;
|
SettingsPage* settingsPage_;
|
||||||
|
|||||||
@ -83,14 +83,6 @@ void SettingsPage::setupUI() {
|
|||||||
streamingCheck_->setChecked(true);
|
streamingCheck_->setChecked(true);
|
||||||
sttLayout->addRow("流式识别:", streamingCheck_);
|
sttLayout->addRow("流式识别:", streamingCheck_);
|
||||||
|
|
||||||
debugSaveAudioCheck_ = new QCheckBox("保存调试音频到 /tmp/impress_audio_debug/", this);
|
|
||||||
debugSaveAudioCheck_->setToolTip("开启后,每次识别会将原始音频保存为 WAV 文件,用于调试音频质量问题");
|
|
||||||
sttLayout->addRow("调试录音:", debugSaveAudioCheck_);
|
|
||||||
|
|
||||||
capslockVoiceCheck_ = new QCheckBox("启用 CapsLock 长按语音输入", this);
|
|
||||||
capslockVoiceCheck_->setToolTip("长按 CapsLock 键 1 秒后触发录音,松开后自动转写并输入到光标位置");
|
|
||||||
sttLayout->addRow("快捷语音:", capslockVoiceCheck_);
|
|
||||||
|
|
||||||
beamSizeSpin_ = new QSpinBox(this);
|
beamSizeSpin_ = new QSpinBox(this);
|
||||||
beamSizeSpin_->setRange(1, 20);
|
beamSizeSpin_->setRange(1, 20);
|
||||||
beamSizeSpin_->setValue(5);
|
beamSizeSpin_->setValue(5);
|
||||||
@ -182,8 +174,6 @@ void SettingsPage::loadFromConfig() {
|
|||||||
sampleRateSpin_->setValue(configManager_->get("stt.sample_rate").toInt());
|
sampleRateSpin_->setValue(configManager_->get("stt.sample_rate").toInt());
|
||||||
languageCombo_->setCurrentText(configManager_->get("stt.language").toString());
|
languageCombo_->setCurrentText(configManager_->get("stt.language").toString());
|
||||||
streamingCheck_->setChecked(configManager_->get("stt.streaming").toBool());
|
streamingCheck_->setChecked(configManager_->get("stt.streaming").toBool());
|
||||||
debugSaveAudioCheck_->setChecked(configManager_->get("stt.debug_save_audio").toBool());
|
|
||||||
capslockVoiceCheck_->setChecked(configManager_->get("stt.capslock_voice_enabled").toBool());
|
|
||||||
beamSizeSpin_->setValue(configManager_->get("stt.beam_size").toInt());
|
beamSizeSpin_->setValue(configManager_->get("stt.beam_size").toInt());
|
||||||
temperatureSpin_->setValue(configManager_->get("stt.temperature").toDouble());
|
temperatureSpin_->setValue(configManager_->get("stt.temperature").toDouble());
|
||||||
|
|
||||||
@ -206,8 +196,6 @@ void SettingsPage::saveToConfig() {
|
|||||||
configManager_->set("stt.sample_rate", sampleRateSpin_->value());
|
configManager_->set("stt.sample_rate", sampleRateSpin_->value());
|
||||||
configManager_->set("stt.language", languageCombo_->currentText());
|
configManager_->set("stt.language", languageCombo_->currentText());
|
||||||
configManager_->set("stt.streaming", streamingCheck_->isChecked());
|
configManager_->set("stt.streaming", streamingCheck_->isChecked());
|
||||||
configManager_->set("stt.debug_save_audio", debugSaveAudioCheck_->isChecked());
|
|
||||||
configManager_->set("stt.capslock_voice_enabled", capslockVoiceCheck_->isChecked());
|
|
||||||
configManager_->set("stt.beam_size", beamSizeSpin_->value());
|
configManager_->set("stt.beam_size", beamSizeSpin_->value());
|
||||||
configManager_->set("stt.temperature", temperatureSpin_->value());
|
configManager_->set("stt.temperature", temperatureSpin_->value());
|
||||||
|
|
||||||
|
|||||||
@ -51,8 +51,6 @@ private:
|
|||||||
QSpinBox* sampleRateSpin_;
|
QSpinBox* sampleRateSpin_;
|
||||||
QComboBox* languageCombo_;
|
QComboBox* languageCombo_;
|
||||||
QCheckBox* streamingCheck_;
|
QCheckBox* streamingCheck_;
|
||||||
QCheckBox* debugSaveAudioCheck_;
|
|
||||||
QCheckBox* capslockVoiceCheck_;
|
|
||||||
QSpinBox* beamSizeSpin_;
|
QSpinBox* beamSizeSpin_;
|
||||||
QDoubleSpinBox* temperatureSpin_;
|
QDoubleSpinBox* temperatureSpin_;
|
||||||
|
|
||||||
|
|||||||
@ -135,10 +135,6 @@ void STTTestPage::onToggleRecording() {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 从配置同步调试开关到引擎
|
|
||||||
sttEngine_->setDebugSaveAudio(
|
|
||||||
configManager_->get("stt.debug_save_audio").toBool());
|
|
||||||
|
|
||||||
// 异步加载模型
|
// 异步加载模型
|
||||||
if (!sttEngine_->isLoaded() ||
|
if (!sttEngine_->isLoaded() ||
|
||||||
currentModelPath_ != modelPath) {
|
currentModelPath_ != modelPath) {
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user