80 lines
2.2 KiB
JavaScript
80 lines
2.2 KiB
JavaScript
// eslint-disable-next-line vue/prefer-import-from-vue
|
|
import { isHTMLTag } from '@vue/shared';
|
|
import { load } from 'cheerio';
|
|
import {} from 'vuepress/shared';
|
|
import { isArray } from '../../shared/index.js';
|
|
const MEDIA_WITH_ALT = ['img'];
|
|
const REMOVED_TAGS = [
|
|
// non content
|
|
'title',
|
|
'base',
|
|
'meta',
|
|
'template',
|
|
'script',
|
|
'style',
|
|
'canvas',
|
|
'slot',
|
|
// not main content
|
|
'nav',
|
|
'aside',
|
|
'footer',
|
|
// deleted
|
|
'del',
|
|
's',
|
|
// rich media
|
|
'audio',
|
|
'video',
|
|
'canvas',
|
|
'iframe',
|
|
'map',
|
|
'area',
|
|
'track',
|
|
'object',
|
|
// input
|
|
'input',
|
|
'textarea',
|
|
'select',
|
|
'option',
|
|
'optgroup',
|
|
'datalist',
|
|
];
|
|
const handleNode = (node, { base, removedTags }) => {
|
|
if (node.type === 'tag') {
|
|
// toc should be dropped
|
|
if ([node.attribs.class, node.attribs.id].some((item) => ['table-of-contents', 'toc'].includes(item)))
|
|
return '';
|
|
// return alt text
|
|
if (MEDIA_WITH_ALT.includes(node.tagName)) {
|
|
return node.attribs.alt || '';
|
|
}
|
|
// html tags can be returned
|
|
if (!REMOVED_TAGS.includes(node.tagName) &&
|
|
!removedTags.includes(node.tagName) &&
|
|
isHTMLTag(node.tagName)) {
|
|
return handleNodes(node.children, { base, removedTags });
|
|
}
|
|
return '';
|
|
}
|
|
if (node.type === 'text')
|
|
return node.data;
|
|
return '';
|
|
};
|
|
const handleNodes = (nodes, { base, removedTags }) => isArray(nodes)
|
|
? nodes.map((node) => handleNode(node, { base, removedTags })).join('')
|
|
: '';
|
|
const $ = load('');
|
|
export const getText = (content, base, { length = 300, singleLine, removedTags = ['table', 'pre'], } = {}) => {
|
|
let result = '';
|
|
const rootNodes = $.parseHTML(content) ?? [];
|
|
for (const node of rootNodes) {
|
|
const text = handleNode(node, { base, removedTags });
|
|
if (text) {
|
|
result += text;
|
|
if (text.length >= length)
|
|
break;
|
|
}
|
|
}
|
|
return (singleLine ? result.replace(/\n/g, ' ').replace(/\s+/g, ' ') : result).trim();
|
|
};
|
|
export const getPageText = ({ options: { base } }, { contentRendered }, options = {}) => getText(contentRendered, base, options);
|