fix(embedding): decode base64 embedding responses before vector processing (#6807)

* fix(embedding): decode base64 embedding responses before vector processing

When a model's extra body config includes `encoding_format: "base64"`, the
embedding API returns a base64-encoded IEEE 754 little-endian float32 array
instead of a `number[]`. The previous code passed this raw string directly
to `formatVectors`, which called `.reduce()` on it and threw:
  TypeError: a.reduce is not a function

Add `decodeEmbedding()` that detects base64 strings and decodes them to
`number[]` via `Buffer → Float32Array → Array.from()`, then use it in
`getVectorsByText` before calling `formatVectors`.

Fixes #6769

* perf: test

---------

Co-authored-by: octo-patch <octo-patch@github.com>
Co-authored-by: archer <545436317@qq.com>
This commit is contained in:
Octopus
2026-04-23 16:49:04 +08:00
committed by archer
parent 036af0f1be
commit c0072fabbc
3 changed files with 663 additions and 210 deletions
+13 -1
View File
@@ -91,7 +91,9 @@ export async function getVectorsByText({ model, input, type, headers }: GetVecto
return tokens.reduce((sum, item) => sum + item, 0);
})(),
Promise.all(
res.data.map((item) => formatVectors(item.embedding, model.normalization))
res.data.map((item) =>
formatVectors(decodeEmbedding(item.embedding), model.normalization)
)
)
]);
@@ -121,6 +123,16 @@ export async function getVectorsByText({ model, input, type, headers }: GetVecto
}
}
export function decodeEmbedding(embedding: number[] | string): number[] {
if (typeof embedding === 'string') {
// base64-encoded IEEE 754 little-endian float32 array
const buf = Buffer.from(embedding, 'base64');
const floats = new Float32Array(buf.buffer, buf.byteOffset, buf.byteLength / 4);
return Array.from(floats);
}
return embedding;
}
export function formatVectors(vector: number[], normalization = false) {
// normalization processing
function normalizationVector(vector: number[]) {