From 32d3a8e986921d1a27dedf06aba83bcc657610ac Mon Sep 17 00:00:00 2001 From: impressionyang Date: Tue, 12 May 2026 20:17:46 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20SenseVoice=E6=8E=A8=E7=90=86=E5=BC=A0?= =?UTF-8?q?=E9=87=8F=E7=B1=BB=E5=9E=8B=E9=94=99=E8=AF=AF=20int64=E2=86=92i?= =?UTF-8?q?nt32?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ONNX Runtime 报错 "Unexpected input data type. Actual: tensor(int64), expected: tensor(int32)"。SenseVoice 模型的 x_length、language、text_norm 三个输入需要 int32 类型而非 int64。 Co-Authored-By: Claude Opus 4.6 --- src/core/sense_voice_engine.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/core/sense_voice_engine.cpp b/src/core/sense_voice_engine.cpp index 8319ec9..ec516f7 100644 --- a/src/core/sense_voice_engine.cpp +++ b/src/core/sense_voice_engine.cpp @@ -318,24 +318,24 @@ RecognitionResult SenseVoiceEngine::infer(const std::vector& samples, int64_t xShape[] = {1, numFrames, kLFROutputDim}; auto memInfo = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU); - int64_t xLengthVal = numFrames; + int32_t xLengthVal = numFrames; int64_t xLengthShape[] = {1}; int langCode = languageToInt(lang); - int64_t langVal = langCode; + int32_t langVal = langCode; int64_t langShape[] = {1}; - int64_t textNormVal = kTextNormWithITN; + int32_t textNormVal = kTextNormWithITN; int64_t textNormShape[] = {1}; std::vector inputTensors; inputTensors.push_back(Ort::Value::CreateTensor( memInfo, lfrFeatures.data(), lfrFeatures.size(), xShape, 3)); - inputTensors.push_back(Ort::Value::CreateTensor( + inputTensors.push_back(Ort::Value::CreateTensor( memInfo, &xLengthVal, 1, xLengthShape, 1)); - inputTensors.push_back(Ort::Value::CreateTensor( + inputTensors.push_back(Ort::Value::CreateTensor( memInfo, &langVal, 1, langShape, 1)); - inputTensors.push_back(Ort::Value::CreateTensor( + inputTensors.push_back(Ort::Value::CreateTensor( memInfo, &textNormVal, 1, textNormShape, 1)); // 4. 运行推理