mirror of
https://github.com/labring/FastGPT.git
synced 2025-12-26 04:32:50 +00:00
Some checks are pending
Document deploy / sync-images (push) Waiting to run
Document deploy / generate-timestamp (push) Blocked by required conditions
Document deploy / build-images (map[domain:https://fastgpt.cn suffix:cn]) (push) Blocked by required conditions
Document deploy / build-images (map[domain:https://fastgpt.io suffix:io]) (push) Blocked by required conditions
Document deploy / update-images (map[deployment:fastgpt-docs domain:https://fastgpt.cn kube_config:KUBE_CONFIG_CN suffix:cn]) (push) Blocked by required conditions
Document deploy / update-images (map[deployment:fastgpt-docs domain:https://fastgpt.io kube_config:KUBE_CONFIG_IO suffix:io]) (push) Blocked by required conditions
Build FastGPT images in Personal warehouse / get-vars (push) Waiting to run
Build FastGPT images in Personal warehouse / build-fastgpt-images (map[arch:amd64 runs-on:ubuntu-24.04]) (push) Blocked by required conditions
Build FastGPT images in Personal warehouse / build-fastgpt-images (map[arch:arm64 runs-on:ubuntu-24.04-arm]) (push) Blocked by required conditions
Build FastGPT images in Personal warehouse / release-fastgpt-images (push) Blocked by required conditions
* feat: migrate chat files to s3 (#5802) * feat: migrate chat files to s3 * feat: add delete jobs for deleting s3 files * chore: improvements * fix: lockfile * fix: imports * feat: add ttl for those uploaded files but not send yet * feat: init bullmq worker * fix: s3 key * perf: s3 internal url * remove env * fix: re-sign a new url * fix: re-sign a new url * perf: s3 code --------- Co-authored-by: archer <545436317@qq.com> * update pacakge * feat: add more file type for uploading (#5807) * fix: re-sign a new url * wip: file selector * feat: add more file type for uploading * feat: migrate chat files to s3 (#5802) * feat: migrate chat files to s3 * feat: add delete jobs for deleting s3 files * chore: improvements * fix: lockfile * fix: imports * feat: add ttl for those uploaded files but not send yet * feat: init bullmq worker * fix: s3 key * perf: s3 internal url * remove env * fix: re-sign a new url * fix: re-sign a new url * perf: s3 code --------- Co-authored-by: archer <545436317@qq.com> * fix: limit minmax available file upload number * perf: file select modal code * fix: fileselect refresh * fix: ts --------- Co-authored-by: archer <545436317@qq.com> * bugfix: chat page (#5809) * fix: upload avatar * fix: chat page username display issue and setting button visibility * doc * Markdown match base64 performance * feat: improve global variables(time, file, dataset) (#5804) * feat: improve global variables(time, file, dataset) * feat: optimize code * perf: time variables code * fix: model, file * fix: hide file upload * fix: ts * hide dataset select --------- Co-authored-by: archer <545436317@qq.com> * perf: insert training queue * perf: s3 upload error i18n * fix: share page s3 * fix: timeselector ui error * var update node * Timepicker ui * feat: plugin support password * fix: password disabled UX * fix: button size * fix: no model cache for chat page (#5820) * rename function * fix: workflow bug * fix: interactive loop * fix test * perf: common textare no richtext * move system plugin config (#5803) (#5813) * move system plugin config (#5803) * move system plugin config * extract tag bar * filter * tool detail temp * marketplace * params * fix * type * search * tags render * status * ui * code * connect to backend (#5815) * feat: marketplace apis & type definitions (#5817) * chore: marketplace init * chore: marketplace list api type * chore: detail api * marketplace & import * feat: marketplace ui (#5826) * temp * marketplace * import * feat: detail return readme * chore: cache data expire 10 mins * chore: update docs * feat: marketplace ui --------- Co-authored-by: heheer <zhiyu44@qq.com> * feat: marketplace (#5830) * temp * marketplace * chore: tool list tag filter * chore: adjust --------- Co-authored-by: heheer <zhiyu44@qq.com> * tool detail drawer * remove tag filter * fix * fix * fix build * update pnpm-lock * fix type * perf code * marketplace router * fix build * navbar icon * fix ui * fix init * docs: marketplace/plugin (#5832) * temp * marketplace * docs(plugin): system tool docs --------- Co-authored-by: heheer <zhiyu44@qq.com> * default url * feat: i18n/ docker build (#5833) * chore: docker build * feat: i18n selector * fix * fix * fix: i18n parse * fix: i18n parse --------- Co-authored-by: heheer <heheer@sealos.io> Co-authored-by: Finley Ge <32237950+FinleyGe@users.noreply.github.com> Co-authored-by: heheer <zhiyu44@qq.com> * marketplace url * update action * market place code * market place code * title * fix: nextconfig * fix: copilot review * Remove bypassable regex-based XSS sanitization from marketplace search (#5835) * Initial plan * Remove problematic regex-based XSS sanitization from search inputs Co-authored-by: c121914yu <50446880+c121914yu@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: c121914yu <50446880+c121914yu@users.noreply.github.com> * feat: tool tag openapi * api check * fix: tsc * fix: ts * fix: lock * sdk version * ts * sdk version * remove invalid tip * perf: export data add timezone * perf: admin plugin api move * perf: tool code * move tag code * perf: marketplace and team plugin code * remove workflow invalid request * rename global tool code * rename global tool code * rename api * fix some bugs (#5841) * fix some bugs * fix * perf: Tag filter * fix: ts * fix: ts --------- Co-authored-by: archer <545436317@qq.com> * perf: Concat function * fix: workflow snapshot push * fix: ts type * fix: login to config/* * fix: ts * fix: model avatar (#5848) * fix: model avatar * fix: ts * fix: avatar migration to s3 * update lock * fix: avatar redirect --------- Co-authored-by: archer <545436317@qq.com> * fix tool detail (#5847) * fix tool detail * init script * fix build * perf: plugin detail modal * change tooltags to tags * fix icon --------- Co-authored-by: archer <545436317@qq.com> * fix tag filter scroll (#5852) * fix create app plugin & import info (#5853) * tag size * rename toolkit * download url * import plugin status (#5854) * init doc * fix: init shell --------- Co-authored-by: 伍闲犬 <whoeverimf5@gmail.com> Co-authored-by: Zeng Qingwen <143274079+fishwww-ww@users.noreply.github.com> Co-authored-by: heheer <heheer@sealos.io> Co-authored-by: Finley Ge <32237950+FinleyGe@users.noreply.github.com> Co-authored-by: heheer <zhiyu44@qq.com> Co-authored-by: Copilot <198982749+Copilot@users.noreply.github.com>
92 lines
2.6 KiB
TypeScript
92 lines
2.6 KiB
TypeScript
import TurndownService from 'turndown';
|
|
import { type ImageType } from '../readFile/type';
|
|
import { getNanoid } from '@fastgpt/global/common/string/tools';
|
|
// @ts-ignore
|
|
const turndownPluginGfm = require('joplin-turndown-plugin-gfm');
|
|
|
|
const MAX_HTML_SIZE = 100 * 1000; // 100k characters limit
|
|
|
|
const processBase64Images = (htmlContent: string) => {
|
|
// 优化后的正则:
|
|
// 1. 使用精确的 base64 字符集 [A-Za-z0-9+/=]+ 避免回溯
|
|
// 2. 明确捕获 mime 类型和 base64 数据
|
|
// 3. 减少不必要的捕获组
|
|
const base64Regex = /src="data:([^;]+);base64,([A-Za-z0-9+/=]+)"/g;
|
|
const images: ImageType[] = [];
|
|
|
|
const processedHtml = htmlContent.replace(base64Regex, (_match, mime, base64Data) => {
|
|
const uuid = `IMAGE_${getNanoid(12)}_IMAGE`;
|
|
images.push({
|
|
uuid,
|
|
base64: base64Data,
|
|
mime
|
|
});
|
|
return `src="${uuid}"`;
|
|
});
|
|
|
|
return { processedHtml, images };
|
|
};
|
|
|
|
export const html2md = (
|
|
html: string
|
|
): {
|
|
rawText: string;
|
|
imageList: ImageType[];
|
|
} => {
|
|
const turndownService = new TurndownService({
|
|
headingStyle: 'atx',
|
|
bulletListMarker: '-',
|
|
codeBlockStyle: 'fenced',
|
|
fence: '```',
|
|
emDelimiter: '_',
|
|
strongDelimiter: '**',
|
|
linkStyle: 'inlined',
|
|
linkReferenceStyle: 'full'
|
|
});
|
|
|
|
try {
|
|
turndownService.remove(['i', 'script', 'iframe', 'style']);
|
|
turndownService.use(turndownPluginGfm.gfm);
|
|
|
|
// add custom handling for media tag
|
|
turndownService.addRule('media', {
|
|
filter: ['video', 'source', 'audio'],
|
|
replacement: function (content, node) {
|
|
const mediaNode = node as HTMLVideoElement | HTMLAudioElement | HTMLSourceElement;
|
|
const src = mediaNode.getAttribute('src');
|
|
const sources = mediaNode.getElementsByTagName('source');
|
|
const firstSourceSrc = sources.length > 0 ? sources[0].getAttribute('src') : null;
|
|
const mediaSrc = src || firstSourceSrc;
|
|
|
|
if (mediaSrc) {
|
|
return `[${mediaSrc}](${mediaSrc}) `;
|
|
}
|
|
|
|
return content;
|
|
}
|
|
});
|
|
|
|
// Base64 img to id, otherwise it will occupy memory when going to md
|
|
const { processedHtml, images } = processBase64Images(html);
|
|
|
|
// if html is too large, return the original html
|
|
if (processedHtml.length > MAX_HTML_SIZE) {
|
|
return { rawText: processedHtml, imageList: [] };
|
|
}
|
|
|
|
const md = turndownService.turndown(processedHtml);
|
|
// const { text, imageList } = matchMdImg(md);
|
|
|
|
return {
|
|
rawText: md,
|
|
imageList: images
|
|
};
|
|
} catch (error) {
|
|
console.log('html 2 markdown error', error);
|
|
return {
|
|
rawText: '',
|
|
imageList: []
|
|
};
|
|
}
|
|
};
|