FastGPT/packages/global/common/string/markdown.ts
Archer a499d05a02
Some checks are pending
Document deploy / sync-images (push) Waiting to run
Document deploy / generate-timestamp (push) Blocked by required conditions
Document deploy / build-images (map[domain:https://fastgpt.cn suffix:cn]) (push) Blocked by required conditions
Document deploy / build-images (map[domain:https://fastgpt.io suffix:io]) (push) Blocked by required conditions
Document deploy / update-images (map[deployment:fastgpt-docs domain:https://fastgpt.cn kube_config:KUBE_CONFIG_CN suffix:cn]) (push) Blocked by required conditions
Document deploy / update-images (map[deployment:fastgpt-docs domain:https://fastgpt.io kube_config:KUBE_CONFIG_IO suffix:io]) (push) Blocked by required conditions
Build FastGPT images in Personal warehouse / get-vars (push) Waiting to run
Build FastGPT images in Personal warehouse / build-fastgpt-images (map[arch:amd64 runs-on:ubuntu-24.04]) (push) Blocked by required conditions
Build FastGPT images in Personal warehouse / build-fastgpt-images (map[arch:arm64 runs-on:ubuntu-24.04-arm]) (push) Blocked by required conditions
Build FastGPT images in Personal warehouse / release-fastgpt-images (push) Blocked by required conditions
V4.14.0 features (#5850)
* feat: migrate chat files to s3 (#5802)

* feat: migrate chat files to s3

* feat: add delete jobs for deleting s3 files

* chore: improvements

* fix: lockfile

* fix: imports

* feat: add ttl for those uploaded files but not send yet

* feat: init bullmq worker

* fix: s3 key

* perf: s3 internal url

* remove env

* fix: re-sign a new url

* fix: re-sign a new url

* perf: s3 code

---------

Co-authored-by: archer <545436317@qq.com>

* update pacakge

* feat: add more file type for uploading (#5807)

* fix: re-sign a new url

* wip: file selector

* feat: add more file type for uploading

* feat: migrate chat files to s3 (#5802)

* feat: migrate chat files to s3

* feat: add delete jobs for deleting s3 files

* chore: improvements

* fix: lockfile

* fix: imports

* feat: add ttl for those uploaded files but not send yet

* feat: init bullmq worker

* fix: s3 key

* perf: s3 internal url

* remove env

* fix: re-sign a new url

* fix: re-sign a new url

* perf: s3 code

---------

Co-authored-by: archer <545436317@qq.com>

* fix: limit minmax available file upload number

* perf: file select modal code

* fix: fileselect refresh

* fix: ts

---------

Co-authored-by: archer <545436317@qq.com>

* bugfix: chat page (#5809)

* fix: upload avatar

* fix: chat page username display issue and setting button visibility

* doc

* Markdown match base64 performance

* feat: improve global variables(time, file, dataset) (#5804)

* feat: improve global variables(time, file, dataset)

* feat: optimize code

* perf: time variables code

* fix: model, file

* fix: hide file upload

* fix: ts

* hide dataset select

---------

Co-authored-by: archer <545436317@qq.com>

* perf: insert training queue

* perf: s3 upload error i18n

* fix: share page s3

* fix: timeselector ui error

* var update node

* Timepicker ui

* feat: plugin support password

* fix: password disabled UX

* fix: button size

* fix: no model cache for chat page (#5820)

* rename function

* fix: workflow bug

* fix: interactive loop

* fix test

* perf: common textare no richtext

* move system plugin config (#5803) (#5813)

* move system plugin config (#5803)

* move system plugin config

* extract tag bar

* filter

* tool detail temp

* marketplace

* params

* fix

* type

* search

* tags render

* status

* ui

* code

* connect to backend (#5815)

* feat: marketplace apis & type definitions (#5817)

* chore: marketplace init

* chore: marketplace list api type

* chore: detail api

* marketplace & import

* feat: marketplace ui (#5826)

* temp

* marketplace

* import

* feat: detail return readme

* chore: cache data expire 10 mins

* chore: update docs

* feat: marketplace ui

---------

Co-authored-by: heheer <zhiyu44@qq.com>

* feat: marketplace (#5830)

* temp

* marketplace

* chore: tool list tag filter

* chore: adjust

---------

Co-authored-by: heheer <zhiyu44@qq.com>

* tool detail drawer

* remove tag filter

* fix

* fix

* fix build

* update pnpm-lock

* fix type

* perf code

* marketplace router

* fix build

* navbar icon

* fix ui

* fix init

* docs: marketplace/plugin (#5832)

* temp

* marketplace

* docs(plugin): system tool docs

---------

Co-authored-by: heheer <zhiyu44@qq.com>

* default url

* feat: i18n/ docker build (#5833)

* chore: docker build

* feat: i18n selector

* fix

* fix

* fix: i18n parse

* fix: i18n parse

---------

Co-authored-by: heheer <heheer@sealos.io>
Co-authored-by: Finley Ge <32237950+FinleyGe@users.noreply.github.com>
Co-authored-by: heheer <zhiyu44@qq.com>

* marketplace url

* update action

* market place code

* market place code

* title

* fix: nextconfig

* fix: copilot review

* Remove bypassable regex-based XSS sanitization from marketplace search (#5835)

* Initial plan

* Remove problematic regex-based XSS sanitization from search inputs

Co-authored-by: c121914yu <50446880+c121914yu@users.noreply.github.com>

---------

Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
Co-authored-by: c121914yu <50446880+c121914yu@users.noreply.github.com>

* feat: tool tag openapi

* api check

* fix: tsc

* fix: ts

* fix: lock

* sdk version

* ts

* sdk version

* remove invalid tip

* perf: export data add timezone

* perf: admin plugin api move

* perf: tool code

* move tag code

* perf: marketplace and team plugin code

* remove workflow invalid request

* rename global tool code

* rename global tool code

* rename api

* fix some bugs (#5841)

* fix some bugs

* fix

* perf: Tag filter

* fix: ts

* fix: ts

---------

Co-authored-by: archer <545436317@qq.com>

* perf: Concat function

* fix: workflow snapshot push

* fix: ts type

* fix: login to config/*

* fix: ts

* fix: model avatar (#5848)

* fix: model avatar

* fix: ts

* fix: avatar migration to s3

* update lock

* fix: avatar redirect

---------

Co-authored-by: archer <545436317@qq.com>

* fix tool detail (#5847)

* fix tool detail

* init script

* fix build

* perf: plugin detail modal

* change tooltags to tags

* fix icon

---------

Co-authored-by: archer <545436317@qq.com>

* fix tag filter scroll (#5852)

* fix create app plugin & import info (#5853)

* tag size

* rename toolkit

* download url

* import plugin status (#5854)

* init doc

* fix: init shell

---------

Co-authored-by: 伍闲犬 <whoeverimf5@gmail.com>
Co-authored-by: Zeng Qingwen <143274079+fishwww-ww@users.noreply.github.com>
Co-authored-by: heheer <heheer@sealos.io>
Co-authored-by: Finley Ge <32237950+FinleyGe@users.noreply.github.com>
Co-authored-by: heheer <zhiyu44@qq.com>
Co-authored-by: Copilot <198982749+Copilot@users.noreply.github.com>
2025-11-04 16:58:12 +08:00

201 lines
5.6 KiB
TypeScript

import { batchRun } from '../system/utils';
import { getNanoid, simpleText } from './tools';
import type { ImageType } from '../../../service/worker/readFile/type';
/* Delete redundant text in markdown */
export const simpleMarkdownText = (rawText: string) => {
rawText = simpleText(rawText);
// Remove a line feed from a hyperlink or picture
rawText = rawText.replace(/\[([^\]]+)\]\((.+?)\)/g, (match, linkText, url) => {
const cleanedLinkText = linkText.replace(/\n/g, ' ').trim();
if (!url) {
return '';
}
return `[${cleanedLinkText}](${url})`;
});
// replace special #\.* ……
const reg1 = /\\([#`!*()+-_\[\]{}\\.])/g;
if (reg1.test(rawText)) {
rawText = rawText.replace(reg1, '$1');
}
// replace \\n
rawText = rawText.replace(/\\\\n/g, '\\n');
// Remove headings and code blocks front spaces
['####', '###', '##', '#', '```', '~~~'].forEach((item, i) => {
const reg = new RegExp(`\\n\\s*${item}`, 'g');
if (reg.test(rawText)) {
rawText = rawText.replace(new RegExp(`(\\n)( *)(${item})`, 'g'), '$1$3');
}
});
return rawText.trim();
};
export const htmlTable2Md = (content: string): string => {
return content.replace(/<table>[\s\S]*?<\/table>/g, (htmlTable) => {
try {
// Clean up whitespace and newlines
const cleanHtml = htmlTable.replace(/\n\s*/g, '');
const rows = cleanHtml.match(/<tr>(.*?)<\/tr>/g);
if (!rows) return htmlTable;
// Parse table data
let tableData: string[][] = [];
let maxColumns = 0;
// Try to convert to markdown table
rows.forEach((row, rowIndex) => {
if (!tableData[rowIndex]) {
tableData[rowIndex] = [];
}
let colIndex = 0;
const cells = row.match(/<td[^>]*\/>|<td[^>]*>.*?<\/td>/g) || [];
cells.forEach((cell) => {
while (tableData[rowIndex][colIndex]) {
colIndex++;
}
const colspan = parseInt(cell.match(/colspan="(\d+)"/)?.[1] || '1');
const rowspan = parseInt(cell.match(/rowspan="(\d+)"/)?.[1] || '1');
let content = '';
if (cell.endsWith('/>')) {
content = '';
} else {
content = cell.replace(/<td[^>]*>|<\/td>/g, '').trim();
}
for (let i = 0; i < rowspan; i++) {
for (let j = 0; j < colspan; j++) {
if (!tableData[rowIndex + i]) {
tableData[rowIndex + i] = [];
}
tableData[rowIndex + i][colIndex + j] = i === 0 && j === 0 ? content : '^^';
}
}
colIndex += colspan;
maxColumns = Math.max(maxColumns, colIndex);
});
for (let i = 0; i < maxColumns; i++) {
if (!tableData[rowIndex][i]) {
tableData[rowIndex][i] = ' ';
}
}
});
const chunks: string[] = [];
const headerCells = tableData[0]
.slice(0, maxColumns)
.map((cell) => (cell === '^^' ? ' ' : cell || ' '));
const headerRow = '| ' + headerCells.join(' | ') + ' |';
chunks.push(headerRow);
const separator = '| ' + Array(headerCells.length).fill('---').join(' | ') + ' |';
chunks.push(separator);
tableData.slice(1).forEach((row) => {
const paddedRow = row
.slice(0, maxColumns)
.map((cell) => (cell === '^^' ? ' ' : cell || ' '));
while (paddedRow.length < maxColumns) {
paddedRow.push(' ');
}
chunks.push('| ' + paddedRow.join(' | ') + ' |');
});
return chunks.join('\n');
} catch (error) {
return htmlTable;
}
});
};
/**
* format markdown
* 1. upload base64
* 2. replace \
*/
export const uploadMarkdownBase64 = async ({
rawText,
uploadImgController
}: {
rawText: string;
uploadImgController?: (base64: string) => Promise<string>;
}) => {
if (uploadImgController) {
// match base64, upload and replace it
const base64Regex = /data:image\/.*;base64,([^\)]+)/g;
const base64Arr = rawText.match(base64Regex) || [];
// upload base64 and replace it
await batchRun(
base64Arr,
async (base64Img) => {
try {
const str = await uploadImgController(base64Img);
rawText = rawText.replace(base64Img, str);
} catch (error) {
rawText = rawText.replace(base64Img, '');
rawText = rawText.replace(/!\[.*\]\(\)/g, '');
}
},
20
);
}
// Remove white space on both sides of the picture
// const trimReg = /(!\[.*\]\(.*\))\s*/g;
// if (trimReg.test(rawText)) {
// rawText = rawText.replace(trimReg, '$1');
// }
return rawText;
};
export const markdownProcess = async ({
rawText,
uploadImgController
}: {
rawText: string;
uploadImgController?: (base64: string) => Promise<string>;
}) => {
const imageProcess = await uploadMarkdownBase64({
rawText,
uploadImgController
});
return simpleMarkdownText(imageProcess);
};
export const matchMdImg = (text: string) => {
// 优化后的正则:
// 1. 使用 [^\]]* 匹配 alt 文本(更精确)
// 2. 使用 [A-Za-z0-9+/=]+ 匹配 base64 数据(避免回溯)
// 3. 明确匹配 data:image/ 前缀
const base64Regex = /!\[([^\]]*)\]\((data:image\/([^;]+);base64,([A-Za-z0-9+/=]+))\)/g;
const imageList: ImageType[] = [];
text = text.replace(base64Regex, (_match, altText, _fullDataUrl, mime, base64Data) => {
const uuid = `IMAGE_${getNanoid(12)}_IMAGE`;
imageList.push({
uuid,
base64: base64Data,
mime: `image/${mime}`
});
// 保持原有的 alt 文本,只替换 base64 部分
return `![${altText}](${uuid})`;
});
return {
text,
imageList
};
};