From ddfe99638ad38fe1cd8d78450730c518f28e1c8e Mon Sep 17 00:00:00 2001
From: Joshua Chen <sidachen2003@gmail.com>
Date: Wed, 8 Feb 2023 00:48:30 -0500
Subject: [PATCH] fix(mdx-loader): handle Markdown within heading IDs

---
 .../remark/headings/__tests__/index.test.ts   | 51 ++++++++++------
 .../src/remark/headings/index.ts              | 59 ++++++++++++-------
 2 files changed, 70 insertions(+), 40 deletions(-)

diff --git a/packages/docusaurus-mdx-loader/src/remark/headings/__tests__/index.test.ts b/packages/docusaurus-mdx-loader/src/remark/headings/__tests__/index.test.ts
index 47a31c836f..4cfd331d93 100644
--- a/packages/docusaurus-mdx-loader/src/remark/headings/__tests__/index.test.ts
+++ b/packages/docusaurus-mdx-loader/src/remark/headings/__tests__/index.test.ts
@@ -156,40 +156,23 @@ describe('headings remark plugin', () => {
     const result = process(
       [
         '## I ♥ unicode',
-        '',
         '## Dash-dash',
-        '',
         '## en–dash',
-        '',
         '## em–dash',
-        '',
         '## 😄 unicode emoji',
-        '',
         '## 😄-😄 unicode emoji',
-        '',
         '## 😄_😄 unicode emoji',
-        '',
         '##',
-        '',
         '## ',
-        '',
         '##     Initial spaces',
-        '',
         '## Final spaces   ',
-        '',
         '## Duplicate',
-        '',
         '## Duplicate',
-        '',
         '## :ok: No underscore',
-        '',
         '## :ok_hand: Single',
-        '',
         '## :ok_hand::hatched_chick: Two in a row with no spaces',
-        '',
         '## :ok_hand: :hatched_chick: Two in a row',
-        '',
-      ].join('\n'),
+      ].join('\n\n'),
     );
     const expected = u('root', [
       heading('I ♥ unicode', 'i--unicode'),
@@ -308,4 +291,36 @@ describe('headings remark plugin', () => {
       },
     ]);
   });
+
+  it('handles Markdown in headings', () => {
+    const result = process('## Bar {#\\_\\_bar__}');
+
+    const headers: {text: string; id: string}[] = [];
+    visit(result, 'heading', (node) => {
+      headers.push({text: toString(node), id: node.data!.id as string});
+    });
+
+    const result2 = process('## Ba\\_r {#bar}');
+
+    const headers2: {text: string; id: string}[] = [];
+    visit(result2, 'heading', (node) => {
+      headers2.push({text: toString(node), id: node.data!.id as string});
+    });
+
+    expect(headers2).toEqual([
+      {
+        id: 'bar',
+        text: 'Ba_r',
+      },
+    ]);
+
+    expect(() =>
+      process('## Bar {#__bar__}'),
+    ).toThrowErrorMatchingInlineSnapshot(
+      `"The heading ID must not contain Markdown markup. Heading: Bar {#bar}"`,
+    );
+    expect(() => process('## Bar {#`bar`}')).toThrowErrorMatchingInlineSnapshot(
+      `"The heading ID must not contain Markdown markup. Heading: Bar {#bar}"`,
+    );
+  });
 });
diff --git a/packages/docusaurus-mdx-loader/src/remark/headings/index.ts b/packages/docusaurus-mdx-loader/src/remark/headings/index.ts
index 98aaa37f75..7d1b2a9fb7 100644
--- a/packages/docusaurus-mdx-loader/src/remark/headings/index.ts
+++ b/packages/docusaurus-mdx-loader/src/remark/headings/index.ts
@@ -11,7 +11,7 @@ import {parseMarkdownHeadingId, createSlugger} from '@docusaurus/utils';
 import visit from 'unist-util-visit';
 import mdastToString from 'mdast-util-to-string';
 import type {Transformer} from 'unified';
-import type {Heading, Text} from 'mdast';
+import type {Heading} from 'mdast';
 
 export default function plugin(): Transformer {
   return (root) => {
@@ -36,30 +36,45 @@ export default function plugin(): Transformer {
         // Support explicit heading IDs
         const parsedHeading = parseMarkdownHeadingId(heading);
 
-        id = parsedHeading.id ?? slugs.slug(heading);
-
         if (parsedHeading.id) {
-          // When there's an id, it is always in the last child node
-          // Sometimes heading is in multiple "parts" (** syntax creates a child
-          // node):
-          // ## part1 *part2* part3 {#id}
-          const lastNode = headingNode.children[
-            headingNode.children.length - 1
-          ] as Text;
+          id = parsedHeading.id;
 
-          if (headingNode.children.length > 1) {
-            const lastNodeText = parseMarkdownHeadingId(lastNode.value).text;
-            // When last part contains test+id, remove the id
-            if (lastNodeText) {
-              lastNode.value = lastNodeText;
-            }
-            // When last part contains only the id: completely remove that node
-            else {
-              headingNode.children.pop();
-            }
-          } else {
-            lastNode.value = parsedHeading.text;
+          let trailingTextContainingId = '';
+          let node = headingNode.children.pop();
+          // Keep going back until the span of text nodes forms the heading ID
+          while (
+            node?.type === 'text' &&
+            !parseMarkdownHeadingId(trailingTextContainingId).id
+          ) {
+            trailingTextContainingId = node.value + trailingTextContainingId;
+            node = headingNode.children.pop();
           }
+          // Last node popped was excess lookahead, so push it back
+          if (node) {
+            headingNode.children.push(node);
+          }
+          const {text: trailingText, id: contentId} = parseMarkdownHeadingId(
+            trailingTextContainingId,
+          );
+          if (!contentId) {
+            // If the trailing text does not contain an ID, this means the
+            // ID extraction logic removed some Markdown markup from the "ID"
+            // (e.g. ## Heading {#**id**}). The behavior here is undefined, so
+            // we throw an error.
+            throw new Error(
+              `The heading ID must not contain Markdown markup. Heading: ${heading}`,
+            );
+          }
+          if (trailingText) {
+            // If the trailing text contains an ID, but also contains other
+            // text, we add the trailing text as a new text node
+            headingNode.children.push({
+              type: 'text',
+              value: trailingText,
+            });
+          }
+        } else {
+          id = slugs.slug(heading);
         }
       }