mirror of
https://github.com/labring/FastGPT.git
synced 2025-12-25 20:02:47 +00:00
Some checks are pending
Document deploy / sync-images (push) Waiting to run
Document deploy / generate-timestamp (push) Blocked by required conditions
Document deploy / build-images (map[domain:https://fastgpt.cn suffix:cn]) (push) Blocked by required conditions
Document deploy / build-images (map[domain:https://fastgpt.io suffix:io]) (push) Blocked by required conditions
Document deploy / update-images (map[deployment:fastgpt-docs domain:https://fastgpt.cn kube_config:KUBE_CONFIG_CN suffix:cn]) (push) Blocked by required conditions
Document deploy / update-images (map[deployment:fastgpt-docs domain:https://fastgpt.io kube_config:KUBE_CONFIG_IO suffix:io]) (push) Blocked by required conditions
Build FastGPT images in Personal warehouse / get-vars (push) Waiting to run
Build FastGPT images in Personal warehouse / build-fastgpt-images (map[arch:amd64 runs-on:ubuntu-24.04]) (push) Blocked by required conditions
Build FastGPT images in Personal warehouse / build-fastgpt-images (map[arch:arm64 runs-on:ubuntu-24.04-arm]) (push) Blocked by required conditions
Build FastGPT images in Personal warehouse / release-fastgpt-images (push) Blocked by required conditions
* feat: add query optimize and bill (#6021) * add query optimize and bill * perf: query extension * fix: embe model * remove log * remove log * fix: test --------- Co-authored-by: xxyyh <2289112474@qq> Co-authored-by: archer <545436317@qq.com> * feat: notice (#6013) * feat: record user's language * feat: notice points/dataset indexes; support count limit; update docker-compose.yml * fix: ts error * feat: send auth code i18n * chore: dataset notice limit * chore: adjust * fix: ts * fix: countLimit race condition; i18n en-prefix locale fallback to en --------- Co-authored-by: archer <545436317@qq.com> * perf: comment * perf: send inform code * fix: type error (#6029) * feat: add ip region for chat logs (#6010) * feat: add ip region for chat logs * refactor: use Geolite2.mmdb * fix: export chat logs * fix: return location directly * test: add unit test * perf: log show ip data * adjust commercial plans (#6008) * plan frontend * plan limit * coupon * discount coupon * fix * type * fix audit * type * plan name * legacy plan * track * feat: add discount coupon * fix * fix discount coupon * openapi * type * type * env * api type * fix * fix: simple agent plugin input & agent dashboard card (#6034) * refactor: remove gridfs (#6031) * fix: replace gridfs multer operations with s3 compatible ops * wip: s3 features * refactor: remove gridfs * fix * perf: mock test * doc * doc * doc * fix: test * fix: s3 * fix: mock s3 * remove invalid config * fix: init query extension * initv4144 (#6037) * chore: initv4144 * fix * version * fix: new plans (#6039) * fix: new plans * qr modal tip * fix: buffer raw text filename (#6040) * fix: initv4144 (#6041) * fix: pay refresh (#6042) * fix: migration shell * rename collection * clear timerlock * clear timerlock * perf: faq * perf: bill schema * fix: openapi * doc * fix: share var render * feat: delete dataset queue * plan usage display (#6043) * plan usage display * text * fix * fix: ts * perf: remove invalid code * perf: init shell * doc * perf: rename field * perf: avatar presign * init * custom plan text (#6045) * fix plans * fix * fixed * computed --------- Co-authored-by: archer <545436317@qq.com> * init shell * plan text & price page back button (#6046) * init * index * delete dataset * delete dataset * perf: delete dataset * init --------- Co-authored-by: YeYuheng <57035043+YYH211@users.noreply.github.com> Co-authored-by: xxyyh <2289112474@qq> Co-authored-by: Finley Ge <32237950+FinleyGe@users.noreply.github.com> Co-authored-by: Roy <whoeverimf5@gmail.com> Co-authored-by: heheer <heheer@sealos.io>
132 lines
4.1 KiB
TypeScript
132 lines
4.1 KiB
TypeScript
import { vi } from 'vitest';
|
|
|
|
/**
|
|
* Mock embedding generation utilities for testing
|
|
*/
|
|
|
|
/**
|
|
* Generate a deterministic normalized vector based on text content
|
|
* Uses a simple hash-based approach to ensure same text produces same vector
|
|
*/
|
|
export const generateMockEmbedding = (text: string, dimension: number = 1536): number[] => {
|
|
// Simple hash function to generate seed from text
|
|
let hash = 0;
|
|
for (let i = 0; i < text.length; i++) {
|
|
const char = text.charCodeAt(i);
|
|
hash = (hash << 5) - hash + char;
|
|
hash = hash & hash; // Convert to 32-bit integer
|
|
}
|
|
|
|
// Generate vector using seeded random
|
|
const vector: number[] = [];
|
|
let seed = Math.abs(hash);
|
|
for (let i = 0; i < dimension; i++) {
|
|
// Linear congruential generator
|
|
seed = (seed * 1103515245 + 12345) & 0x7fffffff;
|
|
vector.push((seed / 0x7fffffff) * 2 - 1); // Range [-1, 1]
|
|
}
|
|
|
|
// Normalize the vector (L2 norm = 1)
|
|
const norm = Math.sqrt(vector.reduce((sum, val) => sum + val * val, 0));
|
|
return vector.map((val) => val / norm);
|
|
};
|
|
|
|
/**
|
|
* Generate multiple mock embeddings for a list of texts
|
|
*/
|
|
export const generateMockEmbeddings = (texts: string[], dimension: number = 1536): number[][] => {
|
|
return texts.map((text) => generateMockEmbedding(text, dimension));
|
|
};
|
|
|
|
/**
|
|
* Create a mock response for getVectorsByText
|
|
*/
|
|
export const createMockVectorsResponse = (
|
|
texts: string | string[],
|
|
dimension: number = 1536
|
|
): { tokens: number; vectors: number[][] } => {
|
|
const textArray = Array.isArray(texts) ? texts : [texts];
|
|
const vectors = generateMockEmbeddings(textArray, dimension);
|
|
|
|
// Estimate tokens (roughly 1 token per 4 characters)
|
|
const tokens = textArray.reduce((sum, text) => sum + Math.ceil(text.length / 4), 0);
|
|
|
|
return { tokens, vectors };
|
|
};
|
|
|
|
/**
|
|
* Generate a vector similar to another vector with controlled similarity
|
|
* @param baseVector - The base vector to create similarity from
|
|
* @param similarity - Target cosine similarity (0-1), higher means more similar
|
|
*/
|
|
export const generateSimilarVector = (baseVector: number[], similarity: number = 0.9): number[] => {
|
|
const dimension = baseVector.length;
|
|
const noise = generateMockEmbedding(`noise_${Date.now()}_${Math.random()}`, dimension);
|
|
|
|
// Interpolate between base vector and noise
|
|
const vector = baseVector.map((val, i) => val * similarity + noise[i] * (1 - similarity));
|
|
|
|
// Normalize
|
|
const norm = Math.sqrt(vector.reduce((sum, val) => sum + val * val, 0));
|
|
return vector.map((val) => val / norm);
|
|
};
|
|
|
|
/**
|
|
* Generate a vector orthogonal (dissimilar) to the given vector
|
|
*/
|
|
export const generateOrthogonalVector = (baseVector: number[]): number[] => {
|
|
const dimension = baseVector.length;
|
|
const randomVector = generateMockEmbedding(`orthogonal_${Date.now()}`, dimension);
|
|
|
|
// Gram-Schmidt orthogonalization
|
|
const dotProduct = baseVector.reduce((sum, val, i) => sum + val * randomVector[i], 0);
|
|
const vector = randomVector.map((val, i) => val - dotProduct * baseVector[i]);
|
|
|
|
// Normalize
|
|
const norm = Math.sqrt(vector.reduce((sum, val) => sum + val * val, 0));
|
|
return vector.map((val) => val / norm);
|
|
};
|
|
|
|
/**
|
|
* Mock implementation for getVectorsByText
|
|
* Automatically generates embeddings based on input text
|
|
*/
|
|
export const mockGetVectorsByText = vi.fn(
|
|
async ({
|
|
input,
|
|
type
|
|
}: {
|
|
model: any;
|
|
input: string[] | string;
|
|
type?: string;
|
|
}): Promise<{ tokens: number; vectors: number[][] }> => {
|
|
const texts = Array.isArray(input) ? input : [input];
|
|
return createMockVectorsResponse(texts);
|
|
}
|
|
);
|
|
|
|
/**
|
|
* Setup global mock for embedding module
|
|
*/
|
|
vi.mock('@fastgpt/service/core/ai/embedding', async (importOriginal) => {
|
|
const actual = (await importOriginal()) as any;
|
|
return {
|
|
...actual,
|
|
getVectorsByText: mockGetVectorsByText
|
|
};
|
|
});
|
|
|
|
/**
|
|
* Setup global mock for AI model module
|
|
*/
|
|
vi.mock('@fastgpt/service/core/ai/model', async (importOriginal) => {
|
|
const actual = (await importOriginal()) as any;
|
|
return {
|
|
...actual,
|
|
getEmbeddingModel: vi.fn().mockReturnValue({
|
|
model: 'text-embedding-ada-002',
|
|
name: 'text-embedding-ada-002'
|
|
})
|
|
};
|
|
});
|