fix: 修改已知bug(#30)

* fix: 刷新公共访问链接后,客户端统计重置 * fix: 导出未提交的sql文件 * fix: 创建 MaxKB 在线文档的知识库，只能获取根地址数据，子地址数据无法获取
2025-12-25 17:22:55 +00:00 · 2024-04-02 19:32:04 +08:00 · 2024-04-02 19:32:04 +08:00 · 11d8c6f174
parent 8e018a1ee8
commit 11d8c6f174
8 changed files with 96 additions and 27 deletions
--- a/apps/application/serializers/application_serializers.py
+++ b/apps/application/serializers/application_serializers.py
@ -209,15 +209,16 @@ class ApplicationSerializer(serializers.Serializer):
            access_token = self.data.get("access_token")
            application_access_token = QuerySet(ApplicationAccessToken).filter(access_token=access_token).first()
            if application_access_token is not None and application_access_token.is_active:
-                if token is None or (token_details is not None and 'client_id' not in token_details) or (
-                        token_details is not None and token_details.get(
-                    'access_token') != application_access_token.access_token):
+                if token_details is not None and 'client_id' in token_details and token_details.get(
+                        'client_id') is not None:
+                    client_id = token_details.get('client_id')
+                else:
                    client_id = str(uuid.uuid1())
-                    token = signing.dumps({'application_id': str(application_access_token.application_id),
-                                           'user_id': str(application_access_token.application.user.id),
-                                           'access_token': application_access_token.access_token,
-                                           'type': AuthenticationType.APPLICATION_ACCESS_TOKEN.value,
-                                           'client_id': client_id})
+                token = signing.dumps({'application_id': str(application_access_token.application_id),
+                                       'user_id': str(application_access_token.application.user.id),
+                                       'access_token': application_access_token.access_token,
+                                       'type': AuthenticationType.APPLICATION_ACCESS_TOKEN.value,
+                                       'client_id': client_id})
                return token
            else:
                raise NotFound404(404, "无效的access_token")
--- a/apps/application/sql/export_application_chat.sql
+++ b/apps/application/sql/export_application_chat.sql
@ -0,0 +1,37 @@
+SELECT
+	application_chat."id" as chat_id,
+    application_chat.abstract as abstract,
+    application_chat_record_temp.problem_text as problem_text,
+    application_chat_record_temp.answer_text as answer_text,
+    application_chat_record_temp.message_tokens as message_tokens,
+    application_chat_record_temp.answer_tokens as answer_tokens,
+    application_chat_record_temp.run_time as run_time,
+    application_chat_record_temp.details::JSON as details,
+    application_chat_record_temp."index" as "index",
+    application_chat_record_temp.improve_paragraph_list  as improve_paragraph_list,
+    application_chat_record_temp.vote_status as vote_status,
+    application_chat_record_temp.create_time as create_time
+FROM
+	application_chat application_chat
+	LEFT JOIN (
+	SELECT COUNT
+		( "id" ) AS chat_record_count,
+		SUM ( CASE WHEN "vote_status" = '0' THEN 1 ELSE 0 END ) AS star_num,
+		SUM ( CASE WHEN "vote_status" = '1' THEN 1 ELSE 0 END ) AS trample_num,
+		SUM ( CASE WHEN array_length( application_chat_record.improve_paragraph_id_list, 1 ) IS NULL THEN 0 ELSE array_length( application_chat_record.improve_paragraph_id_list, 1 ) END ) AS mark_sum,
+		chat_id
+	FROM
+		application_chat_record
+	GROUP BY
+		application_chat_record.chat_id
+	) chat_record_temp ON application_chat."id" = chat_record_temp.chat_id
+	LEFT JOIN (
+	SELECT
+		*,
+	CASE
+			WHEN array_length( application_chat_record.improve_paragraph_id_list, 1 ) IS NULL THEN
+			'{}' ELSE ( SELECT ARRAY_AGG ( row_to_json ( paragraph ) ) FROM paragraph WHERE "id" = ANY ( application_chat_record.improve_paragraph_id_list ) )
+		END as improve_paragraph_list
+		FROM
+		application_chat_record application_chat_record
+	) application_chat_record_temp ON application_chat_record_temp.chat_id = application_chat."id"
--- a/apps/common/handle/impl/text_split_handle.py
+++ b/apps/common/handle/impl/text_split_handle.py
@ -9,7 +9,7 @@
 import re
 from typing import List

-import chardet
+from charset_normalizer import detect

 from common.handle.base_split_handle import BaseSplitHandle
 from common.util.split_model import SplitModel
@ -26,7 +26,7 @@ class TextSplitHandle(BaseSplitHandle):
        file_name: str = file.name.lower()
        if file_name.endswith(".md") or file_name.endswith('.txt'):
            return True
-        result = chardet.detect(buffer)
+        result = detect(buffer)
        if result['encoding'] != 'ascii' and result['confidence'] > 0.5:
            return True
        return False
@ -38,7 +38,7 @@ class TextSplitHandle(BaseSplitHandle):
        else:
            split_model = SplitModel(default_pattern_list, with_filter=with_filter, limit=limit)
        try:
-            content = buffer.decode(chardet.detect(buffer)['encoding'])
+            content = buffer.decode(detect(buffer)['encoding'])
        except BaseException as e:
            return {'name': file.name,
                    'content': []}
--- a/apps/common/util/fork.py
+++ b/apps/common/util/fork.py
@ -4,9 +4,8 @@ import re
 import traceback
 from functools import reduce
 from typing import List, Set
-from urllib.parse import urljoin, urlparse, ParseResult, urlsplit
+from urllib.parse import urljoin, urlparse, ParseResult, urlsplit, urlunparse

-import chardet
 import html2text as ht
 import requests
 from bs4 import BeautifulSoup
@ -44,6 +43,13 @@ class ForkManage:
                ForkManage.fork_child(child_link, selector_list, level - 1, exclude_link_url, fork_handler)


+def remove_fragment(url: str) -> str:
+    parsed_url = urlparse(url)
+    modified_url = ParseResult(scheme=parsed_url.scheme, netloc=parsed_url.netloc, path=parsed_url.path,
+                               params=parsed_url.params, query=parsed_url.query, fragment=None)
+    return urlunparse(modified_url)
+
+
 class Fork:
    class Response:
        def __init__(self, content: str, child_link_list: List[ChildLink], status, message: str):
@ -61,6 +67,7 @@ class Fork:
            return Fork.Response('', [], 500, message)

    def __init__(self, base_fork_url: str, selector_list: List[str]):
+        base_fork_url = remove_fragment(base_fork_url)
        self.base_fork_url = urljoin(base_fork_url if base_fork_url.endswith("/") else base_fork_url + '/', '.')
        parsed = urlsplit(base_fork_url)
        query = parsed.query
@ -74,9 +81,11 @@ class Fork:
                                    fragment='').geturl()

    def get_child_link_list(self, bf: BeautifulSoup):
-        pattern = "^((?!(http:|https:|tel:/|#|mailto:|javascript:))|" + self.base_fork_url + ").*"
+        pattern = "^((?!(http:|https:|tel:/|#|mailto:|javascript:))|" + self.base_fork_url + "|/).*"
        link_list = bf.find_all(name='a', href=re.compile(pattern))
-        result = [ChildLink(link.get('href'), link) for link in link_list]
+        result = [ChildLink(link.get('href'), link) if link.get('href').startswith(self.base_url) else ChildLink(
+            self.base_url + link.get('href'), link) for link in link_list]
+        result = [row for row in result if row.url.startswith(self.base_fork_url)]
        return result

    def get_content_html(self, bf: BeautifulSoup):
@ -122,9 +131,18 @@ class Fork:

    @staticmethod
    def get_beautiful_soup(response):
-        encoding = response.encoding if response.encoding and response.encoding != 'ISO-8859-1' is not None else response.apparent_encoding
+        encoding = response.encoding if response.encoding is not None and response.encoding != 'ISO-8859-1' else response.apparent_encoding
        html_content = response.content.decode(encoding)
-        return BeautifulSoup(html_content, "html.parser")
+        beautiful_soup = BeautifulSoup(html_content, "html.parser")
+        meta_list = beautiful_soup.find_all('meta')
+        charset_list = [meta.attrs.get('charset') for meta in meta_list if
+                        meta.attrs is not None and 'charset' in meta.attrs]
+        if len(charset_list) > 0:
+            charset = charset_list[0]
+            if charset != encoding:
+                html_content = response.content.decode(charset)
+                return BeautifulSoup(html_content, "html.parser")
+        return beautiful_soup

    def fork(self):
        try:
--- a/apps/dataset/serializers/document_serializers.py
+++ b/apps/dataset/serializers/document_serializers.py
@ -30,12 +30,11 @@ from common.util.common import post
 from common.util.field_message import ErrMessage
 from common.util.file_util import get_file_content
 from common.util.fork import Fork
-from common.util.split_model import SplitModel, get_split_model
+from common.util.split_model import get_split_model
 from dataset.models.data_set import DataSet, Document, Paragraph, Problem, Type, Status, ProblemParagraphMapping
 from dataset.serializers.common_serializers import BatchSerializer, MetaSerializer
 from dataset.serializers.paragraph_serializers import ParagraphSerializers, ParagraphInstanceSerializer
 from smartdoc.conf import PROJECT_DIR
-import chardet


 class DocumentEditInstanceSerializer(ApiMixin, serializers.Serializer):
--- a/pyproject.toml
+++ b/pyproject.toml
@ -17,7 +17,6 @@ jieba = "^0.42.1"
 diskcache = "^5.6.3"
 pillow = "^10.2.0"
 filetype = "^1.2.0"
-chardet = "^5.2.0"
 torch = "^2.2.1"
 sentence-transformers = "^2.2.2"
 blinker = "^1.6.3"
@ -30,7 +29,6 @@ html2text = "^2024.2.26"
 langchain-openai = "^0.0.8"
 django-ipware = "^6.0.4"
 django-apscheduler = "^0.6.2"
-chardet2 = "^2.0.3"
 pymupdf = "^1.24.0"
 python-docx = "^1.1.0"
 xlwt = "^1.3.0"
--- a/ui/src/api/log.ts
+++ b/ui/src/api/log.ts
@ -38,6 +38,15 @@ const exportChatLog: (
  exportExcel(applicantion_name, `${prefix}/${applicaiton_id}/chat/export`, param, loading)
 }

+const exportChatLog: (
+  applicaiton_id: string,
+  applicantion_name: string,
+  param: any,
+  loading?: Ref<boolean>
+) => void = (applicaiton_id, applicantion_name, param, loading) => {
+  exportExcel(applicantion_name, `${prefix}/${applicaiton_id}/chat/export`, param, loading)
+}
+
 /**
 * 删除日志
 * @param 参数 applicaiton_id, chat_id,
--- a/ui/src/components/app-charts/components/LineCharts.vue
+++ b/ui/src/components/app-charts/components/LineCharts.vue
@ -4,6 +4,7 @@
 <script lang="ts" setup>
 import { onMounted, nextTick, watch, onBeforeUnmount } from 'vue'
 import * as echarts from 'echarts'
+import { numberFormat } from '@/utils/utils'
 const props = defineProps({
  id: {
    type: String,
@ -57,12 +58,13 @@ function initChart() {
    },
    tooltip: {
      trigger: 'axis',
-      axisPointer: {
-        type: 'cross',
-        label: {
-          backgroundColor: '#6a7985'
-        }
-      }
+      valueFormatter: (value: any) => numberFormat(value)
+      // axisPointer: {
+      //   type: 'cross',
+      //   label: {
+      //     backgroundColor: '#6a7985'
+      //   }
+      // }
    },
    legend: {
      right: 0,
@ -89,6 +91,11 @@ function initChart() {
        lineStyle: {
          color: '#EFF0F1'
        }
+      },
+      axisLabel: {
+        formatter: (value: any) => {
+          return numberFormat(value)
+        }
      }
    },
    series: series