fix: 修改已知bug(#30)

* fix: 刷新公共访问链接后,客户端统计重置

* fix: 导出未提交的sql文件

* fix: 创建 MaxKB 在线文档的知识库,只能获取根地址数据,子地址数据无法获取
This commit is contained in:
shaohuzhang1 2024-04-02 19:32:04 +08:00 committed by GitHub
parent 8e018a1ee8
commit 11d8c6f174
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 96 additions and 27 deletions

View File

@ -209,15 +209,16 @@ class ApplicationSerializer(serializers.Serializer):
access_token = self.data.get("access_token")
application_access_token = QuerySet(ApplicationAccessToken).filter(access_token=access_token).first()
if application_access_token is not None and application_access_token.is_active:
if token is None or (token_details is not None and 'client_id' not in token_details) or (
token_details is not None and token_details.get(
'access_token') != application_access_token.access_token):
if token_details is not None and 'client_id' in token_details and token_details.get(
'client_id') is not None:
client_id = token_details.get('client_id')
else:
client_id = str(uuid.uuid1())
token = signing.dumps({'application_id': str(application_access_token.application_id),
'user_id': str(application_access_token.application.user.id),
'access_token': application_access_token.access_token,
'type': AuthenticationType.APPLICATION_ACCESS_TOKEN.value,
'client_id': client_id})
token = signing.dumps({'application_id': str(application_access_token.application_id),
'user_id': str(application_access_token.application.user.id),
'access_token': application_access_token.access_token,
'type': AuthenticationType.APPLICATION_ACCESS_TOKEN.value,
'client_id': client_id})
return token
else:
raise NotFound404(404, "无效的access_token")

View File

@ -0,0 +1,37 @@
SELECT
application_chat."id" as chat_id,
application_chat.abstract as abstract,
application_chat_record_temp.problem_text as problem_text,
application_chat_record_temp.answer_text as answer_text,
application_chat_record_temp.message_tokens as message_tokens,
application_chat_record_temp.answer_tokens as answer_tokens,
application_chat_record_temp.run_time as run_time,
application_chat_record_temp.details::JSON as details,
application_chat_record_temp."index" as "index",
application_chat_record_temp.improve_paragraph_list as improve_paragraph_list,
application_chat_record_temp.vote_status as vote_status,
application_chat_record_temp.create_time as create_time
FROM
application_chat application_chat
LEFT JOIN (
SELECT COUNT
( "id" ) AS chat_record_count,
SUM ( CASE WHEN "vote_status" = '0' THEN 1 ELSE 0 END ) AS star_num,
SUM ( CASE WHEN "vote_status" = '1' THEN 1 ELSE 0 END ) AS trample_num,
SUM ( CASE WHEN array_length( application_chat_record.improve_paragraph_id_list, 1 ) IS NULL THEN 0 ELSE array_length( application_chat_record.improve_paragraph_id_list, 1 ) END ) AS mark_sum,
chat_id
FROM
application_chat_record
GROUP BY
application_chat_record.chat_id
) chat_record_temp ON application_chat."id" = chat_record_temp.chat_id
LEFT JOIN (
SELECT
*,
CASE
WHEN array_length( application_chat_record.improve_paragraph_id_list, 1 ) IS NULL THEN
'{}' ELSE ( SELECT ARRAY_AGG ( row_to_json ( paragraph ) ) FROM paragraph WHERE "id" = ANY ( application_chat_record.improve_paragraph_id_list ) )
END as improve_paragraph_list
FROM
application_chat_record application_chat_record
) application_chat_record_temp ON application_chat_record_temp.chat_id = application_chat."id"

View File

@ -9,7 +9,7 @@
import re
from typing import List
import chardet
from charset_normalizer import detect
from common.handle.base_split_handle import BaseSplitHandle
from common.util.split_model import SplitModel
@ -26,7 +26,7 @@ class TextSplitHandle(BaseSplitHandle):
file_name: str = file.name.lower()
if file_name.endswith(".md") or file_name.endswith('.txt'):
return True
result = chardet.detect(buffer)
result = detect(buffer)
if result['encoding'] != 'ascii' and result['confidence'] > 0.5:
return True
return False
@ -38,7 +38,7 @@ class TextSplitHandle(BaseSplitHandle):
else:
split_model = SplitModel(default_pattern_list, with_filter=with_filter, limit=limit)
try:
content = buffer.decode(chardet.detect(buffer)['encoding'])
content = buffer.decode(detect(buffer)['encoding'])
except BaseException as e:
return {'name': file.name,
'content': []}

View File

@ -4,9 +4,8 @@ import re
import traceback
from functools import reduce
from typing import List, Set
from urllib.parse import urljoin, urlparse, ParseResult, urlsplit
from urllib.parse import urljoin, urlparse, ParseResult, urlsplit, urlunparse
import chardet
import html2text as ht
import requests
from bs4 import BeautifulSoup
@ -44,6 +43,13 @@ class ForkManage:
ForkManage.fork_child(child_link, selector_list, level - 1, exclude_link_url, fork_handler)
def remove_fragment(url: str) -> str:
parsed_url = urlparse(url)
modified_url = ParseResult(scheme=parsed_url.scheme, netloc=parsed_url.netloc, path=parsed_url.path,
params=parsed_url.params, query=parsed_url.query, fragment=None)
return urlunparse(modified_url)
class Fork:
class Response:
def __init__(self, content: str, child_link_list: List[ChildLink], status, message: str):
@ -61,6 +67,7 @@ class Fork:
return Fork.Response('', [], 500, message)
def __init__(self, base_fork_url: str, selector_list: List[str]):
base_fork_url = remove_fragment(base_fork_url)
self.base_fork_url = urljoin(base_fork_url if base_fork_url.endswith("/") else base_fork_url + '/', '.')
parsed = urlsplit(base_fork_url)
query = parsed.query
@ -74,9 +81,11 @@ class Fork:
fragment='').geturl()
def get_child_link_list(self, bf: BeautifulSoup):
pattern = "^((?!(http:|https:|tel:/|#|mailto:|javascript:))|" + self.base_fork_url + ").*"
pattern = "^((?!(http:|https:|tel:/|#|mailto:|javascript:))|" + self.base_fork_url + "|/).*"
link_list = bf.find_all(name='a', href=re.compile(pattern))
result = [ChildLink(link.get('href'), link) for link in link_list]
result = [ChildLink(link.get('href'), link) if link.get('href').startswith(self.base_url) else ChildLink(
self.base_url + link.get('href'), link) for link in link_list]
result = [row for row in result if row.url.startswith(self.base_fork_url)]
return result
def get_content_html(self, bf: BeautifulSoup):
@ -122,9 +131,18 @@ class Fork:
@staticmethod
def get_beautiful_soup(response):
encoding = response.encoding if response.encoding and response.encoding != 'ISO-8859-1' is not None else response.apparent_encoding
encoding = response.encoding if response.encoding is not None and response.encoding != 'ISO-8859-1' else response.apparent_encoding
html_content = response.content.decode(encoding)
return BeautifulSoup(html_content, "html.parser")
beautiful_soup = BeautifulSoup(html_content, "html.parser")
meta_list = beautiful_soup.find_all('meta')
charset_list = [meta.attrs.get('charset') for meta in meta_list if
meta.attrs is not None and 'charset' in meta.attrs]
if len(charset_list) > 0:
charset = charset_list[0]
if charset != encoding:
html_content = response.content.decode(charset)
return BeautifulSoup(html_content, "html.parser")
return beautiful_soup
def fork(self):
try:

View File

@ -30,12 +30,11 @@ from common.util.common import post
from common.util.field_message import ErrMessage
from common.util.file_util import get_file_content
from common.util.fork import Fork
from common.util.split_model import SplitModel, get_split_model
from common.util.split_model import get_split_model
from dataset.models.data_set import DataSet, Document, Paragraph, Problem, Type, Status, ProblemParagraphMapping
from dataset.serializers.common_serializers import BatchSerializer, MetaSerializer
from dataset.serializers.paragraph_serializers import ParagraphSerializers, ParagraphInstanceSerializer
from smartdoc.conf import PROJECT_DIR
import chardet
class DocumentEditInstanceSerializer(ApiMixin, serializers.Serializer):

View File

@ -17,7 +17,6 @@ jieba = "^0.42.1"
diskcache = "^5.6.3"
pillow = "^10.2.0"
filetype = "^1.2.0"
chardet = "^5.2.0"
torch = "^2.2.1"
sentence-transformers = "^2.2.2"
blinker = "^1.6.3"
@ -30,7 +29,6 @@ html2text = "^2024.2.26"
langchain-openai = "^0.0.8"
django-ipware = "^6.0.4"
django-apscheduler = "^0.6.2"
chardet2 = "^2.0.3"
pymupdf = "^1.24.0"
python-docx = "^1.1.0"
xlwt = "^1.3.0"

View File

@ -38,6 +38,15 @@ const exportChatLog: (
exportExcel(applicantion_name, `${prefix}/${applicaiton_id}/chat/export`, param, loading)
}
const exportChatLog: (
applicaiton_id: string,
applicantion_name: string,
param: any,
loading?: Ref<boolean>
) => void = (applicaiton_id, applicantion_name, param, loading) => {
exportExcel(applicantion_name, `${prefix}/${applicaiton_id}/chat/export`, param, loading)
}
/**
*
* @param applicaiton_id, chat_id,

View File

@ -4,6 +4,7 @@
<script lang="ts" setup>
import { onMounted, nextTick, watch, onBeforeUnmount } from 'vue'
import * as echarts from 'echarts'
import { numberFormat } from '@/utils/utils'
const props = defineProps({
id: {
type: String,
@ -57,12 +58,13 @@ function initChart() {
},
tooltip: {
trigger: 'axis',
axisPointer: {
type: 'cross',
label: {
backgroundColor: '#6a7985'
}
}
valueFormatter: (value: any) => numberFormat(value)
// axisPointer: {
// type: 'cross',
// label: {
// backgroundColor: '#6a7985'
// }
// }
},
legend: {
right: 0,
@ -89,6 +91,11 @@ function initChart() {
lineStyle: {
color: '#EFF0F1'
}
},
axisLabel: {
formatter: (value: any) => {
return numberFormat(value)
}
}
},
series: series