From aafc855435f0986b4318f9b035e8dbc7f0b667c1 Mon Sep 17 00:00:00 2001
From: wxg0103 <727495428@qq.com>
Date: Tue, 26 Nov 2024 14:47:07 +0800
Subject: [PATCH 01/11] =?UTF-8?q?refactor:=20=E4=BC=98=E5=8C=96=E6=89=A7?=
=?UTF-8?q?=E8=A1=8C=E8=AF=A6=E6=83=85=E5=B1=95=E7=A4=BA?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
ui/src/components/ai-chat/ExecutionDetailDialog.vue | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/ui/src/components/ai-chat/ExecutionDetailDialog.vue b/ui/src/components/ai-chat/ExecutionDetailDialog.vue
index 46a8f3946..77cf14d79 100644
--- a/ui/src/components/ai-chat/ExecutionDetailDialog.vue
+++ b/ui/src/components/ai-chat/ExecutionDetailDialog.vue
@@ -63,11 +63,10 @@
{{ f.label }}: {{ f.value }}
-
上传的文档:
+
文档:
- {{ f.name }}
-
上传的图片:
+
图片:
From 52575360ed0af000e1f65610e73b24bf4b89291f Mon Sep 17 00:00:00 2001
From: wxg0103 <727495428@qq.com>
Date: Tue, 26 Nov 2024 15:55:58 +0800
Subject: [PATCH 02/11] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E5=9C=A8?=
=?UTF-8?q?=E5=AF=B9=E8=AF=9D=E6=97=A5=E5=BF=97=E4=B8=AD=E5=88=A0=E9=99=A4?=
=?UTF-8?q?=E4=BF=9D=E5=AD=98=E7=9A=84=E6=96=87=E6=A1=A3=EF=BC=8C=E5=85=B3?=
=?UTF-8?q?=E8=81=94=E7=9A=84=E9=97=AE=E9=A2=98=E6=9C=AA=E5=88=A0=E9=99=A4?=
=?UTF-8?q?=E7=9A=84=E7=BC=BA=E9=99=B7?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
--bug=1049555 --user=王孝刚 【应用】将对话日志保存到文档后,在对话日志中删除保存的文档,关联的问题未删除 https://www.tapd.cn/57709429/s/1617726
---
.../serializers/chat_message_serializers.py | 1 +
apps/application/serializers/chat_serializers.py | 4 +---
apps/dataset/serializers/paragraph_serializers.py | 12 ++++++++++--
3 files changed, 12 insertions(+), 5 deletions(-)
diff --git a/apps/application/serializers/chat_message_serializers.py b/apps/application/serializers/chat_message_serializers.py
index 919cb71cf..22fc18bae 100644
--- a/apps/application/serializers/chat_message_serializers.py
+++ b/apps/application/serializers/chat_message_serializers.py
@@ -154,6 +154,7 @@ def get_post_handler(chat_info: ChatInfo):
details=manage.get_details(),
message_tokens=manage.context['message_tokens'],
answer_tokens=manage.context['answer_tokens'],
+ answer_text_list=[answer_text],
run_time=manage.context['run_time'],
index=len(chat_info.chat_record_list) + 1)
chat_info.append_chat_record(chat_record, client_id)
diff --git a/apps/application/serializers/chat_serializers.py b/apps/application/serializers/chat_serializers.py
index f7bb11f24..468c70316 100644
--- a/apps/application/serializers/chat_serializers.py
+++ b/apps/application/serializers/chat_serializers.py
@@ -608,13 +608,11 @@ class ChatRecordSerializer(serializers.Serializer):
title=instance.get("title") if 'title' in instance else '')
problem_text = instance.get('problem_text') if instance.get(
'problem_text') is not None else chat_record.problem_text
- problem = Problem(id=uuid.uuid1(), content=problem_text, dataset_id=dataset_id)
+ problem, _ = Problem.objects.get_or_create(content=problem_text, dataset_id=dataset_id)
problem_paragraph_mapping = ProblemParagraphMapping(id=uuid.uuid1(), dataset_id=dataset_id,
document_id=document_id,
problem_id=problem.id,
paragraph_id=paragraph.id)
- # 插入问题
- problem.save()
# 插入段落
paragraph.save()
# 插入关联问题
diff --git a/apps/dataset/serializers/paragraph_serializers.py b/apps/dataset/serializers/paragraph_serializers.py
index 82aacc79d..a115e544b 100644
--- a/apps/dataset/serializers/paragraph_serializers.py
+++ b/apps/dataset/serializers/paragraph_serializers.py
@@ -540,8 +540,16 @@ class ParagraphSerializers(ApiMixin, serializers.Serializer):
if with_valid:
self.is_valid(raise_exception=True)
paragraph_id = self.data.get('paragraph_id')
- QuerySet(Paragraph).filter(id=paragraph_id).delete()
- QuerySet(ProblemParagraphMapping).filter(paragraph_id=paragraph_id).delete()
+ Paragraph.objects.filter(id=paragraph_id).delete()
+
+ problem_id = ProblemParagraphMapping.objects.filter(paragraph_id=paragraph_id).values_list('problem_id',
+ flat=True).first()
+
+ if problem_id is not None:
+ if ProblemParagraphMapping.objects.filter(problem_id=problem_id).count() == 1:
+ Problem.objects.filter(id=problem_id).delete()
+ ProblemParagraphMapping.objects.filter(paragraph_id=paragraph_id).delete()
+
update_document_char_length(self.data.get('document_id'))
delete_embedding_by_paragraph(paragraph_id)
From 93a5c6eb2dd4fb1c38b7b58d882d336a5fbc65fe Mon Sep 17 00:00:00 2001
From: shaohuzhang1 <80892890+shaohuzhang1@users.noreply.github.com>
Date: Tue, 26 Nov 2024 16:24:03 +0800
Subject: [PATCH 03/11] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E6=96=87?=
=?UTF-8?q?=E6=A1=A3=E7=8A=B6=E6=80=81=E6=95=B0=E6=8D=AE=E6=95=B0=E6=8D=AE?=
=?UTF-8?q?=E9=94=99=E8=AF=AF=E9=97=AE=E9=A2=98=20(#1697)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
apps/common/db/sql_execute.py | 3 +-
apps/common/event/listener_manage.py | 4 +-
apps/common/util/page_utils.py | 3 +-
.../serializers/document_serializers.py | 11 +-
ui/src/views/document/component/Status.vue | 115 ++----------------
.../views/document/component/StatusTable.vue | 104 ++++++++++++++++
6 files changed, 130 insertions(+), 110 deletions(-)
create mode 100644 ui/src/views/document/component/StatusTable.vue
diff --git a/apps/common/db/sql_execute.py b/apps/common/db/sql_execute.py
index 79e7de46a..b12297e1f 100644
--- a/apps/common/db/sql_execute.py
+++ b/apps/common/db/sql_execute.py
@@ -36,8 +36,9 @@ def update_execute(sql: str, params):
"""
with connection.cursor() as cursor:
cursor.execute(sql, params)
+ affected_rows = cursor.rowcount
cursor.close()
- return None
+ return affected_rows
def select_list(sql: str, params: List):
diff --git a/apps/common/event/listener_manage.py b/apps/common/event/listener_manage.py
index a98b29bf1..9c16ad5c2 100644
--- a/apps/common/event/listener_manage.py
+++ b/apps/common/event/listener_manage.py
@@ -10,11 +10,12 @@ import datetime
import logging
import os
import threading
+import time
import traceback
from typing import List
import django.db.models
-from django.db import models
+from django.db import models, transaction
from django.db.models import QuerySet
from django.db.models.functions import Substr, Reverse
from langchain_core.embeddings import Embeddings
@@ -168,6 +169,7 @@ class ListenerManagement:
@staticmethod
def get_aggregation_document_status(document_id):
def aggregation_document_status():
+ pass
sql = get_file_content(
os.path.join(PROJECT_DIR, "apps", "dataset", 'sql', 'update_document_status_meta.sql'))
native_update({'document_custom_sql': QuerySet(Document).filter(id=document_id)}, sql, with_table_name=True)
diff --git a/apps/common/util/page_utils.py b/apps/common/util/page_utils.py
index 7fc176b68..92f21849b 100644
--- a/apps/common/util/page_utils.py
+++ b/apps/common/util/page_utils.py
@@ -18,10 +18,11 @@ def page(query_set, page_size, handler, is_the_task_interrupted=lambda: False):
@param is_the_task_interrupted: 任务是否被中断
@return:
"""
+ query = query_set.order_by("id")
count = query_set.count()
for i in range(0, ceil(count / page_size)):
if is_the_task_interrupted():
return
offset = i * page_size
- paragraph_list = query_set[offset: offset + page_size]
+ paragraph_list = query.all()[offset: offset + page_size]
handler(paragraph_list)
diff --git a/apps/dataset/serializers/document_serializers.py b/apps/dataset/serializers/document_serializers.py
index 1ab74ead2..70facd8db 100644
--- a/apps/dataset/serializers/document_serializers.py
+++ b/apps/dataset/serializers/document_serializers.py
@@ -613,7 +613,8 @@ class DocumentSerializers(ApiMixin, serializers.Serializer):
document_id = self.data.get("document_id")
ListenerManagement.update_status(QuerySet(Document).filter(id=document_id), TaskType.EMBEDDING,
State.PENDING)
- ListenerManagement.update_status(QuerySet(Paragraph).filter(document_id=document_id), TaskType.EMBEDDING,
+ ListenerManagement.update_status(QuerySet(Paragraph).filter(document_id=document_id),
+ TaskType.EMBEDDING,
State.PENDING)
ListenerManagement.get_aggregation_document_status(document_id)()
embedding_model_id = get_embedding_model_id_by_dataset_id(dataset_id=self.data.get('dataset_id'))
@@ -708,8 +709,8 @@ class DocumentSerializers(ApiMixin, serializers.Serializer):
@staticmethod
def post_embedding(result, document_id, dataset_id):
- model_id = get_embedding_model_id_by_dataset_id(dataset_id)
- embedding_by_document.delay(document_id, model_id)
+ DocumentSerializers.Operate(
+ data={'dataset_id': dataset_id, 'document_id': document_id}).refresh()
return result
@staticmethod
@@ -907,8 +908,8 @@ class DocumentSerializers(ApiMixin, serializers.Serializer):
@staticmethod
def post_embedding(document_list, dataset_id):
for document_dict in document_list:
- model_id = get_embedding_model_id_by_dataset_id(dataset_id)
- embedding_by_document.delay(document_dict.get('id'), model_id)
+ DocumentSerializers.Operate(
+ data={'dataset_id': dataset_id, 'document_id': document_dict.get('id')}).refresh()
return document_list
@post(post_function=post_embedding)
diff --git a/ui/src/views/document/component/Status.vue b/ui/src/views/document/component/Status.vue
index 8bbab6784..139365371 100644
--- a/ui/src/views/document/component/Status.vue
+++ b/ui/src/views/document/component/Status.vue
@@ -1,51 +1,13 @@
-
-
-
- {{ taskTypeMap[status.type] }}
-
-
-
- {{ stateMap[status.state](status.type) }}
-
-
-
- {{ stateMap[status.state](status.type) }}
-
-
-
- {{ stateMap[status.state](status.type) }}
-
-
-
- {{ stateMap[status.state](status.type) }}
-
-
-
- {{ stateMap[aggStatus.value](aggStatus.key) }}
-
-
-
- 完成
- {{
- Object.keys(status.aggs ? status.aggs : {})
- .filter((k) => k == State.SUCCESS)
- .map((k) => status.aggs[k])
- .reduce((x: any, y: any) => x + y, 0)
- }}/{{
- Object.values(status.aggs ? status.aggs : {}).reduce((x: any, y: any) => x + y, 0)
- }}
-
-
- {{
- status.time
- ? status.time[
- status.state == State.REVOKED ? State.REVOKED : State.PENDING
- ]?.substring(0, 19)
- : undefined
- }}
-
-
+
+
@@ -72,11 +34,11 @@
diff --git a/ui/src/views/document/component/StatusTable.vue b/ui/src/views/document/component/StatusTable.vue
new file mode 100644
index 000000000..506f1bf98
--- /dev/null
+++ b/ui/src/views/document/component/StatusTable.vue
@@ -0,0 +1,104 @@
+
+
+ {{ taskTypeMap[status.type] }}
+
+
+
+ {{ stateMap[status.state](status.type) }}
+
+
+
+ {{ stateMap[status.state](status.type) }}
+
+
+
+ {{ stateMap[status.state](status.type) }}
+
+
+
+ {{ stateMap[status.state](status.type) }}
+
+
+
+ {{ stateMap[status.state](status.type) }}
+
+
+
+ 完成
+ {{
+ Object.keys(status.aggs ? status.aggs : {})
+ .filter((k) => k == State.SUCCESS)
+ .map((k) => status.aggs[k])
+ .reduce((x: any, y: any) => x + y, 0)
+ }}/{{ Object.values(status.aggs ? status.aggs : {}).reduce((x: any, y: any) => x + y, 0) }}
+
+
+ {{
+ status.time
+ ? status.time[status.state == State.REVOKED ? State.REVOKED : State.PENDING]?.substring(
+ 0,
+ 19
+ )
+ : undefined
+ }}
+
+
+
+
+
From f1083d973e9cbc9e12d29c3049990b53620ad934 Mon Sep 17 00:00:00 2001
From: CaptainB
Date: Tue, 26 Nov 2024 15:44:20 +0800
Subject: [PATCH 04/11] =?UTF-8?q?refactor:=20=E6=96=87=E6=A1=A3=E6=8F=90?=
=?UTF-8?q?=E5=8F=96details=E4=BF=9D=E5=AD=98content=E9=95=BF=E5=BA=A6?=
=?UTF-8?q?=E9=99=90=E5=88=B6=E4=B8=BA500?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../impl/base_document_extract_node.py | 2 +-
ui/src/components/ai-chat/ExecutionDetailDialog.vue | 11 ++++++++++-
2 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/apps/application/flow/step_node/document_extract_node/impl/base_document_extract_node.py b/apps/application/flow/step_node/document_extract_node/impl/base_document_extract_node.py
index 3cb1d601c..2b30a9320 100644
--- a/apps/application/flow/step_node/document_extract_node/impl/base_document_extract_node.py
+++ b/apps/application/flow/step_node/document_extract_node/impl/base_document_extract_node.py
@@ -40,7 +40,7 @@ class BaseDocumentExtractNode(IDocumentExtractNode):
"index": index,
'run_time': self.context.get('run_time'),
'type': self.node.type,
- # 'content': self.context.get('content'), # 不保存content内容,因为content内容可能会很大
+ 'content': self.context.get('content')[:500] + '...', # 不保存content全部内容,因为content内容可能会很大
'status': self.status,
'err_message': self.err_message,
'document_list': self.context.get('document_list')
diff --git a/ui/src/components/ai-chat/ExecutionDetailDialog.vue b/ui/src/components/ai-chat/ExecutionDetailDialog.vue
index 77cf14d79..673de74d7 100644
--- a/ui/src/components/ai-chat/ExecutionDetailDialog.vue
+++ b/ui/src/components/ai-chat/ExecutionDetailDialog.vue
@@ -218,7 +218,16 @@
-
参数输出
+
+ 参数输出
+
+
+
+
Date: Tue, 26 Nov 2024 17:37:10 +0800
Subject: [PATCH 05/11] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E5=88=A0?=
=?UTF-8?q?=E9=99=A4=E7=9F=A5=E8=AF=86=E5=BA=93=E7=9A=84=E6=AE=B5=E8=90=BD?=
=?UTF-8?q?=E5=90=8E=EF=BC=8C=E6=97=A5=E5=BF=97=E5=86=85=E5=AE=B9=E8=BF=98?=
=?UTF-8?q?=E6=98=AF=E6=98=BE=E7=A4=BA=E6=A0=87=E6=B3=A8=E7=9A=84=E7=BC=BA?=
=?UTF-8?q?=E9=99=B7?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
apps/application/serializers/chat_serializers.py | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/apps/application/serializers/chat_serializers.py b/apps/application/serializers/chat_serializers.py
index 468c70316..d07b5676e 100644
--- a/apps/application/serializers/chat_serializers.py
+++ b/apps/application/serializers/chat_serializers.py
@@ -395,7 +395,8 @@ class ChatRecordSerializerModel(serializers.ModelSerializer):
class Meta:
model = ChatRecord
fields = ['id', 'chat_id', 'vote_status', 'problem_text', 'answer_text',
- 'message_tokens', 'answer_tokens', 'const', 'improve_paragraph_id_list', 'run_time', 'index','answer_text_list',
+ 'message_tokens', 'answer_tokens', 'const', 'improve_paragraph_id_list', 'run_time', 'index',
+ 'answer_text_list',
'create_time', 'update_time']
@@ -457,6 +458,7 @@ class ChatRecordSerializer(serializers.Serializer):
def reset_chat_record(chat_record):
dataset_list = []
paragraph_list = []
+
if 'search_step' in chat_record.details and chat_record.details.get('search_step').get(
'paragraph_list') is not None:
paragraph_list = chat_record.details.get('search_step').get(
@@ -468,6 +470,14 @@ class ChatRecordSerializer(serializers.Serializer):
row in
paragraph_list],
{}).items()]
+ if len(chat_record.improve_paragraph_id_list) > 0:
+ paragraph_model_list = QuerySet(Paragraph).filter(id__in=chat_record.improve_paragraph_id_list)
+ if len(paragraph_model_list) < len(chat_record.improve_paragraph_id_list):
+ paragraph_model_id_list = [str(p.id) for p in paragraph_model_list]
+ chat_record.improve_paragraph_id_list = list(
+ filter(lambda p_id: paragraph_model_id_list.__contains__(p_id),
+ chat_record.improve_paragraph_id_list))
+ chat_record.save()
return {
**ChatRecordSerializerModel(chat_record).data,
From a37a6184b45532af60e252e54f2b8ef83d584e23 Mon Sep 17 00:00:00 2001
From: wangdan-fit2cloud
Date: Tue, 26 Nov 2024 19:10:03 +0800
Subject: [PATCH 06/11] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E9=83=A8?=
=?UTF-8?q?=E5=88=86=E6=A0=B7=E5=BC=8F=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../component/chat-input-operate/index.vue | 17 +++++++++--------
.../ai-chat/component/user-form/index.vue | 4 ++--
ui/src/views/application/ApplicationSetting.vue | 2 +-
ui/src/workflow/common/data.ts | 2 +-
4 files changed, 13 insertions(+), 12 deletions(-)
diff --git a/ui/src/components/ai-chat/component/chat-input-operate/index.vue b/ui/src/components/ai-chat/component/chat-input-operate/index.vue
index 60b474a30..b6dd4bf1e 100644
--- a/ui/src/components/ai-chat/component/chat-input-operate/index.vue
+++ b/ui/src/components/ai-chat/component/chat-input-operate/index.vue
@@ -84,14 +84,15 @@
:on-change="(file: any, fileList: any) => uploadFile(file, fileList)"
>
- 上传文件:最多{{
- props.applicationDetails.file_upload_setting.maxFiles
- }}个,每个文件限制
- {{ props.applicationDetails.file_upload_setting.fileLimit }}MB
文件类型:{{
- getAcceptList().replace(/\./g, '').replace(/,/g, '、').toUpperCase()
- }}
+
+ 上传文件:最多{{
+ props.applicationDetails.file_upload_setting.maxFiles
+ }}个,每个文件限制
+ {{ props.applicationDetails.file_upload_setting.fileLimit }}MB
文件类型:{{
+ getAcceptList().replace(/\./g, '').replace(/,/g, '、').toUpperCase()
+ }}
+
+
diff --git a/ui/src/components/ai-chat/component/user-form/index.vue b/ui/src/components/ai-chat/component/user-form/index.vue
index b54fbfea8..cec3b222f 100644
--- a/ui/src/components/ai-chat/component/user-form/index.vue
+++ b/ui/src/components/ai-chat/component/user-form/index.vue
@@ -20,7 +20,7 @@
:key="dynamicsFormRefresh"
v-model="form_data_context"
:model="form_data_context"
- label-position="left"
+ label-position="top"
require-asterisk-position="right"
:render_data="inputFieldList"
ref="dynamicsFormRef"
@@ -29,7 +29,7 @@
v-if="type === 'debug-ai-chat'"
v-model="api_form_data_context"
:model="api_form_data_context"
- label-position="left"
+ label-position="top"
require-asterisk-position="right"
:render_data="apiInputFieldList"
ref="dynamicsFormRef2"
diff --git a/ui/src/views/application/ApplicationSetting.vue b/ui/src/views/application/ApplicationSetting.vue
index e80d6b5a1..a7bc0281a 100644
--- a/ui/src/views/application/ApplicationSetting.vue
+++ b/ui/src/views/application/ApplicationSetting.vue
@@ -538,7 +538,7 @@
diff --git a/ui/src/workflow/common/data.ts b/ui/src/workflow/common/data.ts
index c509520da..ebfe43c58 100644
--- a/ui/src/workflow/common/data.ts
+++ b/ui/src/workflow/common/data.ts
@@ -203,7 +203,7 @@ export const documentExtractNode = {
config: {
fields: [
{
- label: '文件内容',
+ label: '文档内容',
value: 'content'
}
]
From da7e9b146017e6c6b85ee6810cc430ebc5025831 Mon Sep 17 00:00:00 2001
From: shaohuzhang1 <80892890+shaohuzhang1@users.noreply.github.com>
Date: Tue, 26 Nov 2024 19:40:26 +0800
Subject: [PATCH 07/11] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E6=96=87?=
=?UTF-8?q?=E6=A1=A3=E7=8A=B6=E6=80=81=E9=83=A8=E5=88=86=E9=97=AE=E9=A2=98?=
=?UTF-8?q?=20(#1699)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
apps/common/event/listener_manage.py | 21 +++++----
...tus_meta_paragraph_status_meta_and_more.py | 6 +++
.../serializers/document_serializers.py | 3 ++
apps/dataset/task/generate.py | 25 ++++++++---
apps/embedding/task/embedding.py | 1 +
apps/smartdoc/settings/lib.py | 4 +-
.../views/document/component/StatusTable.vue | 20 ++++++---
ui/src/views/document/index.vue | 45 ++++++++++++++++---
8 files changed, 98 insertions(+), 27 deletions(-)
diff --git a/apps/common/event/listener_manage.py b/apps/common/event/listener_manage.py
index 9c16ad5c2..dc57bfaaf 100644
--- a/apps/common/event/listener_manage.py
+++ b/apps/common/event/listener_manage.py
@@ -181,7 +181,8 @@ class ListenerManagement:
def aggregation_document_status():
sql = get_file_content(
os.path.join(PROJECT_DIR, "apps", "dataset", 'sql', 'update_document_status_meta.sql'))
- native_update({'document_custom_sql': QuerySet(Document).filter(dataset_id=dataset_id)}, sql)
+ native_update({'document_custom_sql': QuerySet(Document).filter(dataset_id=dataset_id)}, sql,
+ with_table_name=True)
return aggregation_document_status
@@ -190,7 +191,7 @@ class ListenerManagement:
def aggregation_document_status():
sql = get_file_content(
os.path.join(PROJECT_DIR, "apps", "dataset", 'sql', 'update_document_status_meta.sql'))
- native_update({'document_custom_sql': queryset}, sql)
+ native_update({'document_custom_sql': queryset}, sql, with_table_name=True)
return aggregation_document_status
@@ -249,19 +250,23 @@ class ListenerManagement:
"""
if not try_lock('embedding' + str(document_id)):
return
- max_kb.info(f"开始--->向量化文档:{document_id}")
- # 批量修改状态为PADDING
- ListenerManagement.update_status(QuerySet(Document).filter(id=document_id), TaskType.EMBEDDING, State.STARTED)
try:
- # 删除文档向量数据
- VectorStore.get_embedding_vector().delete_by_document_id(document_id)
-
def is_the_task_interrupted():
document = QuerySet(Document).filter(id=document_id).first()
if document is None or Status(document.status)[TaskType.EMBEDDING] == State.REVOKE:
return True
return False
+ if is_the_task_interrupted():
+ return
+ max_kb.info(f"开始--->向量化文档:{document_id}")
+ # 批量修改状态为PADDING
+ ListenerManagement.update_status(QuerySet(Document).filter(id=document_id), TaskType.EMBEDDING,
+ State.STARTED)
+
+ # 删除文档向量数据
+ VectorStore.get_embedding_vector().delete_by_document_id(document_id)
+
# 根据段落进行向量化处理
page(QuerySet(Paragraph).filter(document_id=document_id).values('id'), 5,
ListenerManagement.get_embedding_paragraph_apply(embedding_model, is_the_task_interrupted,
diff --git a/apps/dataset/migrations/0011_document_status_meta_paragraph_status_meta_and_more.py b/apps/dataset/migrations/0011_document_status_meta_paragraph_status_meta_and_more.py
index c64a4db20..e47bfd60c 100644
--- a/apps/dataset/migrations/0011_document_status_meta_paragraph_status_meta_and_more.py
+++ b/apps/dataset/migrations/0011_document_status_meta_paragraph_status_meta_and_more.py
@@ -7,6 +7,11 @@ import dataset
from common.event import ListenerManagement
from dataset.models import State, TaskType
+sql = """
+UPDATE "document"
+SET status ="replace"(status, '1', '3')
+"""
+
def updateDocumentStatus(apps, schema_editor):
ParagraphModel = apps.get_model('dataset', 'Paragraph')
@@ -43,5 +48,6 @@ class Migration(migrations.Migration):
name='status',
field=models.CharField(default=dataset.models.data_set.Status.__str__, max_length=20, verbose_name='状态'),
),
+ migrations.RunSQL(sql),
migrations.RunPython(updateDocumentStatus)
]
diff --git a/apps/dataset/serializers/document_serializers.py b/apps/dataset/serializers/document_serializers.py
index 70facd8db..45057d9bc 100644
--- a/apps/dataset/serializers/document_serializers.py
+++ b/apps/dataset/serializers/document_serializers.py
@@ -297,6 +297,9 @@ class DocumentSerializers(ApiMixin, serializers.Serializer):
ListenerManagement.update_status(QuerySet(Document).filter(id__in=document_id_list),
TaskType.EMBEDDING,
State.PENDING)
+ ListenerManagement.update_status(QuerySet(Paragraph).filter(document_id__in=document_id_list),
+ TaskType.EMBEDDING,
+ State.PENDING)
embedding_by_document_list.delay(document_id_list, model_id)
else:
update_embedding_dataset_id(pid_list, target_dataset_id)
diff --git a/apps/dataset/task/generate.py b/apps/dataset/task/generate.py
index e81039744..6a085c448 100644
--- a/apps/dataset/task/generate.py
+++ b/apps/dataset/task/generate.py
@@ -51,21 +51,28 @@ def get_generate_problem(llm_model, prompt, post_apply=lambda: None, is_the_task
return generate_problem
+def get_is_the_task_interrupted(document_id):
+ def is_the_task_interrupted():
+ document = QuerySet(Document).filter(id=document_id).first()
+ if document is None or Status(document.status)[TaskType.GENERATE_PROBLEM] == State.REVOKE:
+ return True
+ return False
+
+ return is_the_task_interrupted
+
+
@celery_app.task(base=QueueOnce, once={'keys': ['document_id']},
name='celery:generate_related_by_document')
def generate_related_by_document_id(document_id, model_id, prompt):
try:
+ is_the_task_interrupted = get_is_the_task_interrupted(document_id)
+ if is_the_task_interrupted():
+ return
ListenerManagement.update_status(QuerySet(Document).filter(id=document_id),
TaskType.GENERATE_PROBLEM,
State.STARTED)
llm_model = get_llm_model(model_id)
- def is_the_task_interrupted():
- document = QuerySet(Document).filter(id=document_id).first()
- if document is None or Status(document.status)[TaskType.GENERATE_PROBLEM] == State.REVOKE:
- return True
- return False
-
# 生成问题函数
generate_problem = get_generate_problem(llm_model, prompt,
ListenerManagement.get_aggregation_document_status(
@@ -82,6 +89,12 @@ def generate_related_by_document_id(document_id, model_id, prompt):
name='celery:generate_related_by_paragraph_list')
def generate_related_by_paragraph_id_list(document_id, paragraph_id_list, model_id, prompt):
try:
+ is_the_task_interrupted = get_is_the_task_interrupted(document_id)
+ if is_the_task_interrupted():
+ ListenerManagement.update_status(QuerySet(Document).filter(id=document_id),
+ TaskType.GENERATE_PROBLEM,
+ State.REVOKED)
+ return
ListenerManagement.update_status(QuerySet(Document).filter(id=document_id),
TaskType.GENERATE_PROBLEM,
State.STARTED)
diff --git a/apps/embedding/task/embedding.py b/apps/embedding/task/embedding.py
index b6d5dfb75..3e63c26b2 100644
--- a/apps/embedding/task/embedding.py
+++ b/apps/embedding/task/embedding.py
@@ -102,6 +102,7 @@ def embedding_by_dataset(dataset_id, model_id):
max_kb.info(f"数据集文档:{[d.name for d in document_list]}")
for document in document_list:
try:
+ print(document.id, model_id)
embedding_by_document.delay(document.id, model_id)
except Exception as e:
pass
diff --git a/apps/smartdoc/settings/lib.py b/apps/smartdoc/settings/lib.py
index e7b6d39dd..a4c1aaabb 100644
--- a/apps/smartdoc/settings/lib.py
+++ b/apps/smartdoc/settings/lib.py
@@ -32,9 +32,11 @@ CELERY_WORKER_REDIRECT_STDOUTS = True
CELERY_WORKER_REDIRECT_STDOUTS_LEVEL = "INFO"
CELERY_TASK_SOFT_TIME_LIMIT = 3600
CELERY_WORKER_CANCEL_LONG_RUNNING_TASKS_ON_CONNECTION_LOSS = True
+CELERY_ACKS_LATE = True
+celery_once_path = os.path.join(celery_data_dir, "celery_once")
CELERY_ONCE = {
'backend': 'celery_once.backends.File',
- 'settings': {'location': os.path.join(celery_data_dir, "celery_once")}
+ 'settings': {'location': celery_once_path}
}
CELERY_BROKER_CONNECTION_RETRY_ON_STARTUP = True
CELERY_LOG_DIR = os.path.join(PROJECT_DIR, 'logs', 'celery')
diff --git a/ui/src/views/document/component/StatusTable.vue b/ui/src/views/document/component/StatusTable.vue
index 506f1bf98..a96e935bc 100644
--- a/ui/src/views/document/component/StatusTable.vue
+++ b/ui/src/views/document/component/StatusTable.vue
@@ -24,13 +24,19 @@
- 完成
- {{
- Object.keys(status.aggs ? status.aggs : {})
- .filter((k) => k == State.SUCCESS)
- .map((k) => status.aggs[k])
- .reduce((x: any, y: any) => x + y, 0)
- }}/{{ Object.values(status.aggs ? status.aggs : {}).reduce((x: any, y: any) => x + y, 0) }}
+
+ 完成
+ {{
+ Object.keys(status.aggs ? status.aggs : {})
+ .filter((k) => k == State.SUCCESS)
+ .map((k) => status.aggs[k])
+ .reduce((x: any, y: any) => x + y, 0)
+ }}/{{
+ Object.values(status.aggs ? status.aggs : {}).reduce((x: any, y: any) => x + y, 0)
+ }}
{{
diff --git a/ui/src/views/document/index.vue b/ui/src/views/document/index.vue
index 4777dd106..d0c342d91 100644
--- a/ui/src/views/document/index.vue
+++ b/ui/src/views/document/index.vue
@@ -235,7 +235,25 @@
-
+
+
+
+
+
+
@@ -255,9 +273,20 @@
-
+
- 生成关联问题
+ 取消生成问题
+
+
+
+ 生成问题
@@ -286,7 +315,11 @@
@@ -318,7 +351,9 @@
>
From 5df4925dbefeefad7ce34aa3c8e040f00dd062ca Mon Sep 17 00:00:00 2001
From: wxg0103 <727495428@qq.com>
Date: Wed, 27 Nov 2024 10:34:01 +0800
Subject: [PATCH 08/11] =?UTF-8?q?refactor:=20=E4=BC=98=E5=8C=96oauth2=20?=
=?UTF-8?q?=E8=AE=A4=E8=AF=81=E7=9A=84=E9=A1=B5=E9=9D=A2?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
--bug=1049381 --user=王孝刚 【系统管理】认证设置- OAUTH 2 认证设置样式优化 https://www.tapd.cn/57709429/s/1618325
---
ui/src/locales/lang/zh_CN/index.ts | 20 ++++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/ui/src/locales/lang/zh_CN/index.ts b/ui/src/locales/lang/zh_CN/index.ts
index 62ba593c1..817db2633 100644
--- a/ui/src/locales/lang/zh_CN/index.ts
+++ b/ui/src/locales/lang/zh_CN/index.ts
@@ -40,7 +40,7 @@ export default {
validateUrlPlaceholder: '请输入验证地址',
redirectUrl: '回调地址',
redirectUrlPlaceholder: '请输入回调地址',
- enableAuthentication: '启用CAS认证',
+ enableAuthentication: '启用 CAS 认证',
saveSuccess: '保存成功',
save: '保存'
},
@@ -49,18 +49,18 @@ export default {
authEndpoint: '授权端地址',
authEndpointPlaceholder: '请输入授权端地址',
tokenEndpoint: 'Token端地址',
- tokenEndpointPlaceholder: '请输入Token端地址',
+ tokenEndpointPlaceholder: '请输入 Token 端地址',
userInfoEndpoint: '用户信息端地址',
userInfoEndpointPlaceholder: '请输入用户信息端地址',
- clientId: '客户端ID',
- clientIdPlaceholder: '请输入客户端ID',
+ clientId: '客户端 ID',
+ clientIdPlaceholder: '请输入客户端 ID',
clientSecret: '客户端密钥',
clientSecretPlaceholder: '请输入客户端密钥',
logoutEndpoint: '注销端地址',
logoutEndpointPlaceholder: '请输入注销端地址',
redirectUrl: '回调地址',
redirectUrlPlaceholder: '请输入回调地址',
- enableAuthentication: '启用OIDC认证'
+ enableAuthentication: '启用 OIDC 认证'
},
jump_tip: '即将跳转至认证源页面进行认证',
jump: '跳转',
@@ -68,21 +68,21 @@ export default {
title: 'OAUTH2 设置',
authEndpoint: '授权端地址',
authEndpointPlaceholder: '请输入授权端地址',
- tokenEndpoint: 'Token端地址',
- tokenEndpointPlaceholder: '请输入Token端地址',
+ tokenEndpoint: 'Token 端地址',
+ tokenEndpointPlaceholder: '请输入 Token 端地址',
userInfoEndpoint: '用户信息端地址',
userInfoEndpointPlaceholder: '请输入用户信息端地址',
scope: '连接范围',
scopePlaceholder: '请输入连接范围',
- clientId: '客户端ID',
- clientIdPlaceholder: '请输入客户端ID',
+ clientId: '客户端 ID',
+ clientIdPlaceholder: '请输入客户端 ID',
clientSecret: '客户端密钥',
clientSecretPlaceholder: '请输入客户端密钥',
redirectUrl: '回调地址',
redirectUrlPlaceholder: '请输入回调地址',
filedMapping: '字段映射',
filedMappingPlaceholder: '请输入字段映射',
- enableAuthentication: '启用OAUTH2认证',
+ enableAuthentication: '启用 OAUTH2 认证',
save: '保存',
saveSuccess: '保存成功'
}
From 33ed8aa4aeab060e8f6cc160fc79d9a94dab1507 Mon Sep 17 00:00:00 2001
From: wangdan-fit2cloud
Date: Wed, 27 Nov 2024 11:29:05 +0800
Subject: [PATCH 09/11] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E5=8E=86?=
=?UTF-8?q?=E5=8F=B2=E8=81=8A=E5=A4=A9=E8=AE=B0=E5=BD=95=E5=AD=97=E6=AE=B5?=
=?UTF-8?q?=E6=A0=BC=E5=BC=8F=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../workflow/nodes/image-understand/index.vue | 29 +++++++++----------
1 file changed, 13 insertions(+), 16 deletions(-)
diff --git a/ui/src/workflow/nodes/image-understand/index.vue b/ui/src/workflow/nodes/image-understand/index.vue
index c3f1184ad..ecde49434 100644
--- a/ui/src/workflow/nodes/image-understand/index.vue
+++ b/ui/src/workflow/nodes/image-understand/index.vue
@@ -56,7 +56,7 @@
>
{{ item.name }}
公用
+ >公用
@@ -113,7 +113,7 @@
通过调整提示词内容,可以引导大模型聊天方向,该提示词会被固定在上下文的开头,可以使用变量。
+ >通过调整提示词内容,可以引导大模型聊天方向,该提示词会被固定在上下文的开头,可以使用变量。
@@ -131,9 +131,9 @@
@@ -143,9 +143,13 @@
:value-on-clear="0"
controls-position="right"
class="w-full"
+ :step="1"
+ :step-strictly="true"
/>
- {
@@ -268,9 +271,7 @@ function getProvider() {
})
}
-const model_change = (model_id?: string) => {
-
-}
+const model_change = (model_id?: string) => {}
function submitSystemDialog(val: string) {
set(props.nodeModel.properties.node_data, 'system', val)
@@ -286,10 +287,6 @@ onMounted(() => {
set(props.nodeModel, 'validate', validate)
})
-
-
-
\ No newline at end of file
+
From 59f5c8ac769bedbd39dcc56047fdc06f3f2ebed5 Mon Sep 17 00:00:00 2001
From: CaptainB
Date: Wed, 27 Nov 2024 12:17:08 +0800
Subject: [PATCH 10/11] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E6=96=87?=
=?UTF-8?q?=E6=A1=A3=E6=8F=90=E5=8F=96=E6=8A=A5=E9=94=99=E6=B2=A1=E6=9C=89?=
=?UTF-8?q?=E6=98=BE=E7=A4=BA=E7=9A=84=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../impl/base_document_extract_node.py | 6 ++++--
apps/common/handle/impl/doc_split_handle.py | 2 +-
apps/common/handle/impl/html_split_handle.py | 2 +-
apps/common/handle/impl/pdf_split_handle.py | 2 +-
apps/common/handle/impl/text_split_handle.py | 2 +-
5 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/apps/application/flow/step_node/document_extract_node/impl/base_document_extract_node.py b/apps/application/flow/step_node/document_extract_node/impl/base_document_extract_node.py
index 2b30a9320..4e7b2f660 100644
--- a/apps/application/flow/step_node/document_extract_node/impl/base_document_extract_node.py
+++ b/apps/application/flow/step_node/document_extract_node/impl/base_document_extract_node.py
@@ -29,18 +29,20 @@ class BaseDocumentExtractNode(IDocumentExtractNode):
# 回到文件头
buffer.seek(0)
file_content = split_handle.get_content(buffer)
- content.append( '## ' + doc['name'] + '\n' + file_content)
+ content.append('## ' + doc['name'] + '\n' + file_content)
break
return NodeResult({'content': splitter.join(content)}, {})
def get_details(self, index: int, **kwargs):
+ # 不保存content全部内容,因为content内容可能会很大
+ content = (self.context.get('content')[:500] + '...') if len(self.context.get('content')) > 0 else ''
return {
'name': self.node.properties.get('stepName'),
"index": index,
'run_time': self.context.get('run_time'),
'type': self.node.type,
- 'content': self.context.get('content')[:500] + '...', # 不保存content全部内容,因为content内容可能会很大
+ 'content': content,
'status': self.status,
'err_message': self.err_message,
'document_list': self.context.get('document_list')
diff --git a/apps/common/handle/impl/doc_split_handle.py b/apps/common/handle/impl/doc_split_handle.py
index 350a3921a..6ac6f43f9 100644
--- a/apps/common/handle/impl/doc_split_handle.py
+++ b/apps/common/handle/impl/doc_split_handle.py
@@ -198,4 +198,4 @@ class DocSplitHandle(BaseSplitHandle):
return self.to_md(doc, image_list, get_image_id_func())
except BaseException as e:
traceback.print_exception(e)
- return ''
\ No newline at end of file
+ return f'{e}'
\ No newline at end of file
diff --git a/apps/common/handle/impl/html_split_handle.py b/apps/common/handle/impl/html_split_handle.py
index 688904567..bb69e0af0 100644
--- a/apps/common/handle/impl/html_split_handle.py
+++ b/apps/common/handle/impl/html_split_handle.py
@@ -70,4 +70,4 @@ class HTMLSplitHandle(BaseSplitHandle):
return html2text(content)
except BaseException as e:
traceback.print_exception(e)
- return ''
\ No newline at end of file
+ return f'{e}'
\ No newline at end of file
diff --git a/apps/common/handle/impl/pdf_split_handle.py b/apps/common/handle/impl/pdf_split_handle.py
index b759c6d6a..21d243058 100644
--- a/apps/common/handle/impl/pdf_split_handle.py
+++ b/apps/common/handle/impl/pdf_split_handle.py
@@ -321,4 +321,4 @@ class PdfSplitHandle(BaseSplitHandle):
return self.handle_pdf_content(file, pdf_document)
except BaseException as e:
traceback.print_exception(e)
- return ''
\ No newline at end of file
+ return f'{e}'
\ No newline at end of file
diff --git a/apps/common/handle/impl/text_split_handle.py b/apps/common/handle/impl/text_split_handle.py
index 984c4e1e9..1ae22f95f 100644
--- a/apps/common/handle/impl/text_split_handle.py
+++ b/apps/common/handle/impl/text_split_handle.py
@@ -57,4 +57,4 @@ class TextSplitHandle(BaseSplitHandle):
return buffer.decode(detect(buffer)['encoding'])
except BaseException as e:
traceback.print_exception(e)
- return ''
\ No newline at end of file
+ return f'{e}'
\ No newline at end of file
From b7905198a2d9771d7c725c67dbb6ee9089d07f21 Mon Sep 17 00:00:00 2001
From: CaptainB
Date: Wed, 27 Nov 2024 12:27:27 +0800
Subject: [PATCH 11/11] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E6=96=87?=
=?UTF-8?q?=E4=BB=B6=E9=85=8D=E7=BD=AE=E5=85=A8=E6=B2=A1=E5=8B=BE=E9=80=89?=
=?UTF-8?q?=E7=9A=84=E6=83=85=E5=86=B5=E4=B8=8B=E8=BF=98=E8=83=BD=E4=B8=8A?=
=?UTF-8?q?=E4=BC=A0=E6=96=87=E4=BB=B6=E7=9A=84=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
--bug=1049620 --user=刘瑞斌 【应用编排】文件上传设置没勾选文档也没勾选图片,发布后对话还是能上传文档和图片 https://www.tapd.cn/57709429/s/1618519
---
.../components/ai-chat/component/chat-input-operate/index.vue | 3 +++
1 file changed, 3 insertions(+)
diff --git a/ui/src/components/ai-chat/component/chat-input-operate/index.vue b/ui/src/components/ai-chat/component/chat-input-operate/index.vue
index b6dd4bf1e..7ea236c04 100644
--- a/ui/src/components/ai-chat/component/chat-input-operate/index.vue
+++ b/ui/src/components/ai-chat/component/chat-input-operate/index.vue
@@ -217,6 +217,9 @@ const getAcceptList = () => {
accepts = [...accepts, ...videoExtensions]
}
// console.log(accepts)
+ if (accepts.length === 0) {
+ return '.请在文件上传配置中选择文件类型'
+ }
return accepts.map((ext: any) => '.' + ext).join(',')
}