From cf003aa2d27d56a1394ab6c450638bbd1c70d2ae Mon Sep 17 00:00:00 2001 From: shaohuzhang1 Date: Mon, 25 Mar 2024 18:46:25 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E5=90=8C=E6=AD=A5web=E7=AB=99=E7=82=B9?= =?UTF-8?q?=E5=86=85=E5=AE=B9=E7=BC=96=E7=A0=81=E9=94=99=E8=AF=AF,?= =?UTF-8?q?=E5=AF=BC=E8=87=B4=E4=B9=B1=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- apps/common/util/fork.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/apps/common/util/fork.py b/apps/common/util/fork.py index 027ff118b..eba10bbb1 100644 --- a/apps/common/util/fork.py +++ b/apps/common/util/fork.py @@ -6,6 +6,7 @@ from functools import reduce from typing import List, Set from urllib.parse import urljoin, urlparse, ParseResult, urlsplit +import chardet import html2text as ht import requests from bs4 import BeautifulSoup @@ -121,7 +122,7 @@ class Fork: @staticmethod def get_beautiful_soup(response): - encoding = response.apparent_encoding if response.apparent_encoding is not None else 'utf-8' + encoding = response.encoding if response.encoding and response.encoding != 'ISO-8859-1' is not None else response.apparent_encoding html_content = response.content.decode(encoding) return BeautifulSoup(html_content, "html.parser")