FastGPT/test/mocks/core/ai/embedding.ts
Archer 2ccb5b50c6
Some checks are pending
Document deploy / sync-images (push) Waiting to run
Document deploy / generate-timestamp (push) Blocked by required conditions
Document deploy / build-images (map[domain:https://fastgpt.cn suffix:cn]) (push) Blocked by required conditions
Document deploy / build-images (map[domain:https://fastgpt.io suffix:io]) (push) Blocked by required conditions
Document deploy / update-images (map[deployment:fastgpt-docs domain:https://fastgpt.cn kube_config:KUBE_CONFIG_CN suffix:cn]) (push) Blocked by required conditions
Document deploy / update-images (map[deployment:fastgpt-docs domain:https://fastgpt.io kube_config:KUBE_CONFIG_IO suffix:io]) (push) Blocked by required conditions
Build FastGPT images in Personal warehouse / get-vars (push) Waiting to run
Build FastGPT images in Personal warehouse / build-fastgpt-images (map[arch:amd64 runs-on:ubuntu-24.04]) (push) Blocked by required conditions
Build FastGPT images in Personal warehouse / build-fastgpt-images (map[arch:arm64 runs-on:ubuntu-24.04-arm]) (push) Blocked by required conditions
Build FastGPT images in Personal warehouse / release-fastgpt-images (push) Blocked by required conditions
V4.14.4 features (#6036)
* feat: add query optimize and bill (#6021)

* add query optimize and bill

* perf: query extension

* fix: embe model

* remove log

* remove log

* fix: test

---------

Co-authored-by: xxyyh <2289112474@qq>
Co-authored-by: archer <545436317@qq.com>

* feat: notice (#6013)

* feat: record user's language

* feat: notice points/dataset indexes; support count limit; update docker-compose.yml

* fix: ts error

* feat: send auth code i18n

* chore: dataset notice limit

* chore: adjust

* fix: ts

* fix: countLimit race condition; i18n en-prefix locale fallback to en

---------

Co-authored-by: archer <545436317@qq.com>

* perf: comment

* perf: send inform code

* fix: type error (#6029)

* feat: add ip region for chat logs (#6010)

* feat: add ip region for chat logs

* refactor: use Geolite2.mmdb

* fix: export chat logs

* fix: return location directly

* test: add unit test

* perf: log show ip data

* adjust commercial plans (#6008)

* plan frontend

* plan limit

* coupon

* discount coupon

* fix

* type

* fix audit

* type

* plan name

* legacy plan

* track

* feat: add discount coupon

* fix

* fix discount coupon

* openapi

* type

* type

* env

* api type

* fix

* fix: simple agent plugin input & agent dashboard card (#6034)

* refactor: remove gridfs (#6031)

* fix: replace gridfs multer operations with s3 compatible ops

* wip: s3 features

* refactor: remove gridfs

* fix

* perf: mock test

* doc

* doc

* doc

* fix: test

* fix: s3

* fix: mock s3

* remove invalid config

* fix: init query extension

* initv4144 (#6037)

* chore: initv4144

* fix

* version

* fix: new plans (#6039)

* fix: new plans

* qr modal tip

* fix: buffer raw text filename (#6040)

* fix: initv4144 (#6041)

* fix: pay refresh (#6042)

* fix: migration shell

* rename collection

* clear timerlock

* clear timerlock

* perf: faq

* perf: bill schema

* fix: openapi

* doc

* fix: share var render

* feat: delete dataset queue

* plan usage display (#6043)

* plan usage display

* text

* fix

* fix: ts

* perf: remove invalid code

* perf: init shell

* doc

* perf: rename field

* perf: avatar presign

* init

* custom plan text (#6045)

* fix plans

* fix

* fixed

* computed

---------

Co-authored-by: archer <545436317@qq.com>

* init shell

* plan text & price page back button (#6046)

* init

* index

* delete dataset

* delete dataset

* perf: delete dataset

* init

---------

Co-authored-by: YeYuheng <57035043+YYH211@users.noreply.github.com>
Co-authored-by: xxyyh <2289112474@qq>
Co-authored-by: Finley Ge <32237950+FinleyGe@users.noreply.github.com>
Co-authored-by: Roy <whoeverimf5@gmail.com>
Co-authored-by: heheer <heheer@sealos.io>
2025-12-08 01:44:15 +08:00

132 lines
4.1 KiB
TypeScript

import { vi } from 'vitest';
/**
* Mock embedding generation utilities for testing
*/
/**
* Generate a deterministic normalized vector based on text content
* Uses a simple hash-based approach to ensure same text produces same vector
*/
export const generateMockEmbedding = (text: string, dimension: number = 1536): number[] => {
// Simple hash function to generate seed from text
let hash = 0;
for (let i = 0; i < text.length; i++) {
const char = text.charCodeAt(i);
hash = (hash << 5) - hash + char;
hash = hash & hash; // Convert to 32-bit integer
}
// Generate vector using seeded random
const vector: number[] = [];
let seed = Math.abs(hash);
for (let i = 0; i < dimension; i++) {
// Linear congruential generator
seed = (seed * 1103515245 + 12345) & 0x7fffffff;
vector.push((seed / 0x7fffffff) * 2 - 1); // Range [-1, 1]
}
// Normalize the vector (L2 norm = 1)
const norm = Math.sqrt(vector.reduce((sum, val) => sum + val * val, 0));
return vector.map((val) => val / norm);
};
/**
* Generate multiple mock embeddings for a list of texts
*/
export const generateMockEmbeddings = (texts: string[], dimension: number = 1536): number[][] => {
return texts.map((text) => generateMockEmbedding(text, dimension));
};
/**
* Create a mock response for getVectorsByText
*/
export const createMockVectorsResponse = (
texts: string | string[],
dimension: number = 1536
): { tokens: number; vectors: number[][] } => {
const textArray = Array.isArray(texts) ? texts : [texts];
const vectors = generateMockEmbeddings(textArray, dimension);
// Estimate tokens (roughly 1 token per 4 characters)
const tokens = textArray.reduce((sum, text) => sum + Math.ceil(text.length / 4), 0);
return { tokens, vectors };
};
/**
* Generate a vector similar to another vector with controlled similarity
* @param baseVector - The base vector to create similarity from
* @param similarity - Target cosine similarity (0-1), higher means more similar
*/
export const generateSimilarVector = (baseVector: number[], similarity: number = 0.9): number[] => {
const dimension = baseVector.length;
const noise = generateMockEmbedding(`noise_${Date.now()}_${Math.random()}`, dimension);
// Interpolate between base vector and noise
const vector = baseVector.map((val, i) => val * similarity + noise[i] * (1 - similarity));
// Normalize
const norm = Math.sqrt(vector.reduce((sum, val) => sum + val * val, 0));
return vector.map((val) => val / norm);
};
/**
* Generate a vector orthogonal (dissimilar) to the given vector
*/
export const generateOrthogonalVector = (baseVector: number[]): number[] => {
const dimension = baseVector.length;
const randomVector = generateMockEmbedding(`orthogonal_${Date.now()}`, dimension);
// Gram-Schmidt orthogonalization
const dotProduct = baseVector.reduce((sum, val, i) => sum + val * randomVector[i], 0);
const vector = randomVector.map((val, i) => val - dotProduct * baseVector[i]);
// Normalize
const norm = Math.sqrt(vector.reduce((sum, val) => sum + val * val, 0));
return vector.map((val) => val / norm);
};
/**
* Mock implementation for getVectorsByText
* Automatically generates embeddings based on input text
*/
export const mockGetVectorsByText = vi.fn(
async ({
input,
type
}: {
model: any;
input: string[] | string;
type?: string;
}): Promise<{ tokens: number; vectors: number[][] }> => {
const texts = Array.isArray(input) ? input : [input];
return createMockVectorsResponse(texts);
}
);
/**
* Setup global mock for embedding module
*/
vi.mock('@fastgpt/service/core/ai/embedding', async (importOriginal) => {
const actual = (await importOriginal()) as any;
return {
...actual,
getVectorsByText: mockGetVectorsByText
};
});
/**
* Setup global mock for AI model module
*/
vi.mock('@fastgpt/service/core/ai/model', async (importOriginal) => {
const actual = (await importOriginal()) as any;
return {
...actual,
getEmbeddingModel: vi.fn().mockReturnValue({
model: 'text-embedding-ada-002',
name: 'text-embedding-ada-002'
})
};
});