fix: The website knowledge base lacks comprehensive methods for determining files and folders, resulting in ineffective access to links within the final document #2935 (#3893)

This commit is contained in:
shaohuzhang1 2025-08-20 11:36:38 +08:00 committed by GitHub
parent e910217850
commit c74352046b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -3,6 +3,7 @@ import logging
import re
import traceback
from functools import reduce
from pathlib import Path
from typing import List, Set
from urllib.parse import urljoin, urlparse, ParseResult, urlsplit, urlunparse
@ -70,6 +71,8 @@ class Fork:
def __init__(self, base_fork_url: str, selector_list: List[str]):
base_fork_url = remove_fragment(base_fork_url)
if any([True for end_str in ['index.html', '.htm', '.html'] if base_fork_url.endswith(end_str)]):
self.base_fork_url = str(Path(base_fork_url).parent)
self.base_fork_url = urljoin(base_fork_url if base_fork_url.endswith("/") else base_fork_url + '/', '.')
parsed = urlsplit(base_fork_url)
query = parsed.query