From ddfe99638ad38fe1cd8d78450730c518f28e1c8e Mon Sep 17 00:00:00 2001 From: Joshua Chen Date: Wed, 8 Feb 2023 00:48:30 -0500 Subject: [PATCH] fix(mdx-loader): handle Markdown within heading IDs --- .../remark/headings/__tests__/index.test.ts | 51 ++++++++++------ .../src/remark/headings/index.ts | 59 ++++++++++++------- 2 files changed, 70 insertions(+), 40 deletions(-) diff --git a/packages/docusaurus-mdx-loader/src/remark/headings/__tests__/index.test.ts b/packages/docusaurus-mdx-loader/src/remark/headings/__tests__/index.test.ts index 47a31c836f..4cfd331d93 100644 --- a/packages/docusaurus-mdx-loader/src/remark/headings/__tests__/index.test.ts +++ b/packages/docusaurus-mdx-loader/src/remark/headings/__tests__/index.test.ts @@ -156,40 +156,23 @@ describe('headings remark plugin', () => { const result = process( [ '## I โ™ฅ unicode', - '', '## Dash-dash', - '', '## enโ€“dash', - '', '## emโ€“dash', - '', '## ๐Ÿ˜„ unicode emoji', - '', '## ๐Ÿ˜„-๐Ÿ˜„ unicode emoji', - '', '## ๐Ÿ˜„_๐Ÿ˜„ unicode emoji', - '', '##', - '', '## ', - '', '## Initial spaces', - '', '## Final spaces ', - '', '## Duplicate', - '', '## Duplicate', - '', '## :ok: No underscore', - '', '## :ok_hand: Single', - '', '## :ok_hand::hatched_chick: Two in a row with no spaces', - '', '## :ok_hand: :hatched_chick: Two in a row', - '', - ].join('\n'), + ].join('\n\n'), ); const expected = u('root', [ heading('I โ™ฅ unicode', 'i--unicode'), @@ -308,4 +291,36 @@ describe('headings remark plugin', () => { }, ]); }); + + it('handles Markdown in headings', () => { + const result = process('## Bar {#\\_\\_bar__}'); + + const headers: {text: string; id: string}[] = []; + visit(result, 'heading', (node) => { + headers.push({text: toString(node), id: node.data!.id as string}); + }); + + const result2 = process('## Ba\\_r {#bar}'); + + const headers2: {text: string; id: string}[] = []; + visit(result2, 'heading', (node) => { + headers2.push({text: toString(node), id: node.data!.id as string}); + }); + + expect(headers2).toEqual([ + { + id: 'bar', + text: 'Ba_r', + }, + ]); + + expect(() => + process('## Bar {#__bar__}'), + ).toThrowErrorMatchingInlineSnapshot( + `"The heading ID must not contain Markdown markup. Heading: Bar {#bar}"`, + ); + expect(() => process('## Bar {#`bar`}')).toThrowErrorMatchingInlineSnapshot( + `"The heading ID must not contain Markdown markup. Heading: Bar {#bar}"`, + ); + }); }); diff --git a/packages/docusaurus-mdx-loader/src/remark/headings/index.ts b/packages/docusaurus-mdx-loader/src/remark/headings/index.ts index 98aaa37f75..7d1b2a9fb7 100644 --- a/packages/docusaurus-mdx-loader/src/remark/headings/index.ts +++ b/packages/docusaurus-mdx-loader/src/remark/headings/index.ts @@ -11,7 +11,7 @@ import {parseMarkdownHeadingId, createSlugger} from '@docusaurus/utils'; import visit from 'unist-util-visit'; import mdastToString from 'mdast-util-to-string'; import type {Transformer} from 'unified'; -import type {Heading, Text} from 'mdast'; +import type {Heading} from 'mdast'; export default function plugin(): Transformer { return (root) => { @@ -36,30 +36,45 @@ export default function plugin(): Transformer { // Support explicit heading IDs const parsedHeading = parseMarkdownHeadingId(heading); - id = parsedHeading.id ?? slugs.slug(heading); - if (parsedHeading.id) { - // When there's an id, it is always in the last child node - // Sometimes heading is in multiple "parts" (** syntax creates a child - // node): - // ## part1 *part2* part3 {#id} - const lastNode = headingNode.children[ - headingNode.children.length - 1 - ] as Text; + id = parsedHeading.id; - if (headingNode.children.length > 1) { - const lastNodeText = parseMarkdownHeadingId(lastNode.value).text; - // When last part contains test+id, remove the id - if (lastNodeText) { - lastNode.value = lastNodeText; - } - // When last part contains only the id: completely remove that node - else { - headingNode.children.pop(); - } - } else { - lastNode.value = parsedHeading.text; + let trailingTextContainingId = ''; + let node = headingNode.children.pop(); + // Keep going back until the span of text nodes forms the heading ID + while ( + node?.type === 'text' && + !parseMarkdownHeadingId(trailingTextContainingId).id + ) { + trailingTextContainingId = node.value + trailingTextContainingId; + node = headingNode.children.pop(); } + // Last node popped was excess lookahead, so push it back + if (node) { + headingNode.children.push(node); + } + const {text: trailingText, id: contentId} = parseMarkdownHeadingId( + trailingTextContainingId, + ); + if (!contentId) { + // If the trailing text does not contain an ID, this means the + // ID extraction logic removed some Markdown markup from the "ID" + // (e.g. ## Heading {#**id**}). The behavior here is undefined, so + // we throw an error. + throw new Error( + `The heading ID must not contain Markdown markup. Heading: ${heading}`, + ); + } + if (trailingText) { + // If the trailing text contains an ID, but also contains other + // text, we add the trailing text as a new text node + headingNode.children.push({ + type: 'text', + value: trailingText, + }); + } + } else { + id = slugs.slug(heading); } }