mirror of
https://github.com/1Panel-dev/MaxKB.git
synced 2025-12-26 01:33:05 +00:00
fix: 同步web站点内容编码错误,导致乱码
This commit is contained in:
parent
a01d5beb59
commit
cf003aa2d2
|
|
@ -6,6 +6,7 @@ from functools import reduce
|
|||
from typing import List, Set
|
||||
from urllib.parse import urljoin, urlparse, ParseResult, urlsplit
|
||||
|
||||
import chardet
|
||||
import html2text as ht
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
|
@ -121,7 +122,7 @@ class Fork:
|
|||
|
||||
@staticmethod
|
||||
def get_beautiful_soup(response):
|
||||
encoding = response.apparent_encoding if response.apparent_encoding is not None else 'utf-8'
|
||||
encoding = response.encoding if response.encoding and response.encoding != 'ISO-8859-1' is not None else response.apparent_encoding
|
||||
html_content = response.content.decode(encoding)
|
||||
return BeautifulSoup(html_content, "html.parser")
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue