From fbbc32361bca8f430cd9c7f6126cf71feb36cce3 Mon Sep 17 00:00:00 2001 From: archer <545436317@qq.com> Date: Wed, 5 Apr 2023 20:37:37 +0800 Subject: [PATCH] =?UTF-8?q?perf:=20=E5=8A=A0=E5=BF=AB=E6=8B=86=E5=88=86QA?= =?UTF-8?q?=E5=92=8C=E7=94=9F=E6=88=90=E5=90=91=E9=87=8F=EF=BC=9B=E4=BD=99?= =?UTF-8?q?=E9=A2=9D=E4=B8=8D=E8=B6=B3=E6=8F=90=E9=86=92?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../timer/{initDataItemTime.ts => test.ts} | 26 ++-- src/service/errorCode.ts | 3 + src/service/events/generateQA.ts | 113 ++++++++++-------- src/service/events/generateVector.ts | 6 +- src/service/mongo.ts | 1 - src/service/preChatStore.ts | 21 ---- src/service/response.ts | 10 +- src/types/index.d.ts | 2 +- 8 files changed, 89 insertions(+), 93 deletions(-) rename src/pages/api/timer/{initDataItemTime.ts => test.ts} (52%) delete mode 100644 src/service/preChatStore.ts diff --git a/src/pages/api/timer/initDataItemTime.ts b/src/pages/api/timer/test.ts similarity index 52% rename from src/pages/api/timer/initDataItemTime.ts rename to src/pages/api/timer/test.ts index 3a7342683..1611628e6 100644 --- a/src/pages/api/timer/initDataItemTime.ts +++ b/src/pages/api/timer/test.ts @@ -1,7 +1,7 @@ // Next.js API route support: https://nextjs.org/docs/api-routes/introduction import type { NextApiRequest, NextApiResponse } from 'next'; import { jsonRes } from '@/service/response'; -import { connectToDatabase, DataItem, Data } from '@/service/mongo'; +import { connectToDatabase, SplitData } from '@/service/mongo'; export default async function handler(req: NextApiRequest, res: NextApiResponse) { try { @@ -10,20 +10,18 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse) } await connectToDatabase(); - // await DataItem.updateMany( - // {}, - // { - // type: 'QA' - // // times: 2 - // } - // ); + const data = await SplitData.aggregate([ + { $match: { textList: { $exists: true, $ne: [] } } }, + { $sample: { size: 1 } } + ]); - await Data.updateMany( - {}, - { - type: 'QA' - } - ); + const dataItem: any = data[0]; + const textList: string[] = dataItem.textList.slice(-5); + console.log(textList); + console.log(dataItem.textList.slice(0, -5)); + await SplitData.findByIdAndUpdate(dataItem._id, { + textList: dataItem.textList.slice(0, -5) + }); jsonRes(res, { data: {} diff --git a/src/service/errorCode.ts b/src/service/errorCode.ts index 81ac1dc58..aba2cb5d8 100644 --- a/src/service/errorCode.ts +++ b/src/service/errorCode.ts @@ -6,6 +6,9 @@ export const openaiError: Record = { 'Too Many Requests': '请求次数太多了,请慢点~', 'Bad Gateway': '网关异常,请重试' }; +export const openaiError2: Record = { + insufficient_quota: 'API 余额不足' +}; export const proxyError: Record = { ECONNABORTED: true, ECONNRESET: true diff --git a/src/service/events/generateQA.ts b/src/service/events/generateQA.ts index 8460ad963..84162bf14 100644 --- a/src/service/events/generateQA.ts +++ b/src/service/events/generateQA.ts @@ -9,39 +9,35 @@ import { generateVector } from './generateVector'; import { connectRedis } from '../redis'; import { VecModelDataPrefix } from '@/constants/redis'; import { customAlphabet } from 'nanoid'; +import { ModelSplitDataSchema } from '@/types/mongoSchema'; const nanoid = customAlphabet('abcdefghijklmnopqrstuvwxyz1234567890', 12); -export async function generateQA(): Promise { - // 最多 5 个进程 - if (global.generatingQA >= 5) { - console.log('QA 最多5个进程'); - return; - } - global.generatingQA++; +export async function generateQA(next = false): Promise { + if (global.generatingQA === true && !next) return; + global.generatingQA = true; let dataId = null; try { const redis = await connectRedis(); // 找出一个需要生成的 dataItem - const dataItem = await SplitData.findOne({ - textList: { $exists: true, $ne: [] } - }); + const data = await SplitData.aggregate([ + { $match: { textList: { $exists: true, $ne: [] } } }, + { $sample: { size: 1 } } + ]); + + const dataItem: ModelSplitDataSchema = data[0]; if (!dataItem) { console.log('没有需要生成 QA 的数据'); - global.generatingQA = 0; + global.generatingQA = false; return; } dataId = dataItem._id; - // 源文本 - const text = dataItem.textList[dataItem.textList.length - 1]; - if (!text) { - await SplitData.findByIdAndUpdate(dataItem._id, { $pop: { textList: 1 } }); // 弹出无效文本 - throw new Error('无文本'); - } + // 获取 5 个源文本 + const textList: string[] = dataItem.textList.slice(-5); // 获取 openapi Key let userApiKey, systemKey; @@ -62,7 +58,7 @@ export async function generateQA(): Promise { throw new Error('获取 openai key 失败'); } - console.log('正在生成一组QA, ID:', dataItem._id); + console.log(`正在生成一组QA, 包含 ${textList.length} 组文本。ID: ${dataItem._id}`); const startTime = Date.now(); @@ -76,33 +72,50 @@ export async function generateQA(): Promise { }; // 请求 chatgpt 获取回答 - const response = await chatAPI - .createChatCompletion( - { - model: ChatModelNameEnum.GPT35, - temperature: 0.8, - n: 1, - messages: [ - systemPrompt, + const response = await Promise.allSettled( + textList.map((text) => + chatAPI + .createChatCompletion( { - role: 'user', - content: text + model: ChatModelNameEnum.GPT35, + temperature: 0.8, + n: 1, + messages: [ + systemPrompt, + { + role: 'user', + content: text + } + ] + }, + { + timeout: 180000, + httpsAgent } - ] - }, - { - timeout: 180000, - httpsAgent - } + ) + .then((res) => ({ + rawContent: res?.data.choices[0].message?.content || '', // chatgpt原本的回复 + result: splitText(res?.data.choices[0].message?.content || '') // 格式化后的QA对 + })) ) - .then((res) => ({ - rawContent: res?.data.choices[0].message?.content || '', // chatgpt原本的回复 - result: splitText(res?.data.choices[0].message?.content || '') // 格式化后的QA对 - })); + ); + + // 获取成功的回答 + const successResponse: { + rawContent: string; + result: { + q: string; + a: string; + }[]; + }[] = response.filter((item) => item.status === 'fulfilled').map((item: any) => item.value); + + const resultList = successResponse.map((item) => item.result).flat(); await Promise.allSettled([ - SplitData.findByIdAndUpdate(dataItem._id, { $pop: { textList: 1 } }), // 弹出已经拆分的文本 - ...response.result.map((item) => { + SplitData.findByIdAndUpdate(dataItem._id, { + textList: dataItem.textList.slice(0, -5) + }), // 删掉后5个数据 + ...resultList.map((item) => { // 插入 redis return redis.sendCommand([ 'HMSET', @@ -125,20 +138,21 @@ export async function generateQA(): Promise { '生成QA成功,time:', `${(Date.now() - startTime) / 1000}s`, 'QA数量:', - response.result.length + resultList.length ); // 计费 pushSplitDataBill({ - isPay: !userApiKey && response.result.length > 0, + isPay: !userApiKey && resultList.length > 0, userId: dataItem.userId, type: 'QA', - text: systemPrompt.content + text + response.rawContent + text: + systemPrompt.content + + textList.join('') + + successResponse.map((item) => item.rawContent).join('') }); - global.generatingQA--; - - generateQA(); + generateQA(true); generateVector(); } catch (error: any) { // log @@ -157,14 +171,13 @@ export async function generateQA(): Promise { errorText: 'api 余额不足' }); - generateQA(); + generateQA(true); return; } setTimeout(() => { - global.generatingQA--; - generateQA(); - }, 5000); + generateQA(true); + }, 4000); } } diff --git a/src/service/events/generateVector.ts b/src/service/events/generateVector.ts index 5de819d75..90b6550dd 100644 --- a/src/service/events/generateVector.ts +++ b/src/service/events/generateVector.ts @@ -75,9 +75,7 @@ export async function generateVector(next = false): Promise { console.log(`生成向量成功: ${dataItem.id}`); - setTimeout(() => { - generateVector(true); - }, 4000); + generateVector(true); } catch (error: any) { // log if (error?.response) { @@ -88,7 +86,7 @@ export async function generateVector(next = false): Promise { } if (dataId && error?.response?.data?.error?.type === 'insufficient_quota') { - console.log('api 余额不足'); + console.log('api 余额不足,删除 redis 模型数据'); const redis = await connectRedis(); redis.del(dataId); generateVector(true); diff --git a/src/service/mongo.ts b/src/service/mongo.ts index f9a675db8..bf08865bb 100644 --- a/src/service/mongo.ts +++ b/src/service/mongo.ts @@ -27,7 +27,6 @@ export async function connectToDatabase(): Promise { global.mongodb = null; } - global.generatingQA = 0; generateQA(); // generateAbstract(); generateVector(true); diff --git a/src/service/preChatStore.ts b/src/service/preChatStore.ts deleted file mode 100644 index 6e0c04093..000000000 --- a/src/service/preChatStore.ts +++ /dev/null @@ -1,21 +0,0 @@ -import { ChatItemType } from '../types/chat'; - -export const chatWindows = new Map(); - -/** - * 获取聊天窗口信息 - */ -export const getWindowMessages = (id: string) => { - return chatWindows.get(id) || []; -}; - -export const pushWindowMessage = (id: string, prompt: ChatItemType) => { - const messages = chatWindows.get(id) || []; - messages.push(prompt); - chatWindows.set(id, messages); - return messages; -}; - -export const deleteWindow = (id: string) => { - chatWindows.delete(id); -}; diff --git a/src/service/response.ts b/src/service/response.ts index 46d2fe9a1..844132587 100644 --- a/src/service/response.ts +++ b/src/service/response.ts @@ -1,5 +1,5 @@ import { NextApiResponse } from 'next'; -import { openaiError, proxyError } from './errorCode'; +import { openaiError, openaiError2, proxyError } from './errorCode'; export interface ResponseType { code: number; @@ -25,13 +25,19 @@ export const jsonRes = ( msg = error; } else if (proxyError[error?.code]) { msg = '服务器代理出错'; + } else if (openaiError2[error?.response?.data?.error?.type]) { + msg = openaiError2[error?.response?.data?.error?.type]; } else if (openaiError[error?.response?.statusText]) { msg = openaiError[error.response.statusText]; } console.log('error->'); console.log('code:', error.code); - console.log('statusText:', error?.response?.statusText); console.log('msg:', msg); + // request 时候报错 + if (error?.response) { + console.log('statusText:', error?.response?.statusText); + console.log('type:', error?.response?.data?.error?.type); + } } res.json({ diff --git a/src/types/index.d.ts b/src/types/index.d.ts index 9590356d1..75fef8231 100644 --- a/src/types/index.d.ts +++ b/src/types/index.d.ts @@ -4,7 +4,7 @@ import type { RedisClientType } from 'redis'; declare global { var mongodb: Mongoose | string | null; var redisClient: RedisClientType | null; - var generatingQA: number; + var generatingQA: boolean; var generatingAbstract: boolean; var generatingVector: boolean; var QRCode: any;