From c74352046b09b7992955d0423dc138de6c5417b3 Mon Sep 17 00:00:00 2001 From: shaohuzhang1 <80892890+shaohuzhang1@users.noreply.github.com> Date: Wed, 20 Aug 2025 11:36:38 +0800 Subject: [PATCH] fix: The website knowledge base lacks comprehensive methods for determining files and folders, resulting in ineffective access to links within the final document #2935 (#3893) --- apps/common/util/fork.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/apps/common/util/fork.py b/apps/common/util/fork.py index 90527db9c..a8df9f7b2 100644 --- a/apps/common/util/fork.py +++ b/apps/common/util/fork.py @@ -3,6 +3,7 @@ import logging import re import traceback from functools import reduce +from pathlib import Path from typing import List, Set from urllib.parse import urljoin, urlparse, ParseResult, urlsplit, urlunparse @@ -70,6 +71,8 @@ class Fork: def __init__(self, base_fork_url: str, selector_list: List[str]): base_fork_url = remove_fragment(base_fork_url) + if any([True for end_str in ['index.html', '.htm', '.html'] if base_fork_url.endswith(end_str)]): + self.base_fork_url = str(Path(base_fork_url).parent) self.base_fork_url = urljoin(base_fork_url if base_fork_url.endswith("/") else base_fork_url + '/', '.') parsed = urlsplit(base_fork_url) query = parsed.query