diff --git a/apps/application/flow/step_node/form_node/impl/base_form_node.py b/apps/application/flow/step_node/form_node/impl/base_form_node.py
index db179f588..69bb69372 100644
--- a/apps/application/flow/step_node/form_node/impl/base_form_node.py
+++ b/apps/application/flow/step_node/form_node/impl/base_form_node.py
@@ -46,6 +46,8 @@ class BaseFormNode(IFormNode):
if form_data is not None:
self.context['is_submit'] = True
self.context['form_data'] = form_data
+ for key in form_data:
+ self.context[key] = form_data.get(key)
else:
self.context['is_submit'] = False
form_setting = {"form_field_list": form_field_list, "runtime_node_id": self.runtime_node_id,
diff --git a/apps/common/handle/impl/table/xlsx_parse_table_handle.py b/apps/common/handle/impl/table/xlsx_parse_table_handle.py
index a98c6ae75..b92bbd345 100644
--- a/apps/common/handle/impl/table/xlsx_parse_table_handle.py
+++ b/apps/common/handle/impl/table/xlsx_parse_table_handle.py
@@ -90,7 +90,8 @@ class XlsxSplitHandle(BaseParseTableHandle):
for sheetname in workbook.sheetnames:
sheet = workbook[sheetname] if sheetname else workbook.active
rows = self.fill_merged_cells(sheet, image_dict)
-
+ if len(rows) == 0:
+ continue
# 提取表头和内容
headers = [f"{key}" for key, value in rows[0].items()]
diff --git a/apps/common/management/commands/services/command.py b/apps/common/management/commands/services/command.py
index c5b719272..0c97d4af3 100644
--- a/apps/common/management/commands/services/command.py
+++ b/apps/common/management/commands/services/command.py
@@ -1,3 +1,5 @@
+import math
+
from django.core.management.base import BaseCommand
from django.db.models import TextChoices
@@ -93,7 +95,8 @@ class BaseActionCommand(BaseCommand):
'services', nargs='+', choices=Services.export_services_values(), help='Service',
)
parser.add_argument('-d', '--daemon', nargs="?", const=True)
- parser.add_argument('-w', '--worker', type=int, nargs="?", default=3 if os.cpu_count() > 3 else os.cpu_count())
+ parser.add_argument('-w', '--worker', type=int, nargs="?",
+ default=3 if os.cpu_count() > 6 else math.floor(os.cpu_count() / 2))
parser.add_argument('-f', '--force', nargs="?", const=True)
def initial_util(self, *args, **options):
diff --git a/apps/common/util/common.py b/apps/common/util/common.py
index cbf6b0011..8571c91e3 100644
--- a/apps/common/util/common.py
+++ b/apps/common/util/common.py
@@ -102,3 +102,12 @@ def valid_license(model=None, count=None, message=None):
return run
return inner
+
+
+def bulk_create_in_batches(model, data, batch_size=1000):
+ if len(data) == 0:
+ return
+ for i in range(0, len(data), batch_size):
+ batch = data[i:i + batch_size]
+ model.objects.bulk_create(batch)
+
diff --git a/apps/dataset/serializers/dataset_serializers.py b/apps/dataset/serializers/dataset_serializers.py
index a0c171559..000737649 100644
--- a/apps/dataset/serializers/dataset_serializers.py
+++ b/apps/dataset/serializers/dataset_serializers.py
@@ -15,6 +15,7 @@ from functools import reduce
from typing import Dict, List
from urllib.parse import urlparse
+from celery_once import AlreadyQueued, QueueOnce
from django.contrib.postgres.fields import ArrayField
from django.core import validators
from django.db import transaction, models
@@ -732,6 +733,7 @@ class DataSetSerializers(serializers.ModelSerializer):
delete_embedding_by_dataset(self.data.get('id'))
return True
+ @transaction.atomic
def re_embedding(self, with_valid=True):
if with_valid:
self.is_valid(raise_exception=True)
@@ -743,7 +745,10 @@ class DataSetSerializers(serializers.ModelSerializer):
State.PENDING)
ListenerManagement.get_aggregation_document_status_by_dataset_id(self.data.get('id'))()
embedding_model_id = get_embedding_model_id_by_dataset_id(self.data.get('id'))
- embedding_by_dataset.delay(self.data.get('id'), embedding_model_id)
+ try:
+ embedding_by_dataset.delay(self.data.get('id'), embedding_model_id)
+ except AlreadyQueued as e:
+ raise AppApiException(500, "向量化任务发送失败,请稍后再试!")
def list_application(self, with_valid=True):
if with_valid:
diff --git a/apps/dataset/serializers/document_serializers.py b/apps/dataset/serializers/document_serializers.py
index 5c8457302..d90d21e15 100644
--- a/apps/dataset/serializers/document_serializers.py
+++ b/apps/dataset/serializers/document_serializers.py
@@ -41,7 +41,7 @@ from common.handle.impl.table.xls_parse_table_handle import XlsSplitHandle
from common.handle.impl.table.xlsx_parse_table_handle import XlsxSplitHandle
from common.handle.impl.text_split_handle import TextSplitHandle
from common.mixins.api_mixin import ApiMixin
-from common.util.common import post, flat_map
+from common.util.common import post, flat_map, bulk_create_in_batches
from common.util.field_message import ErrMessage
from common.util.file_util import get_file_content
from common.util.fork import Fork
@@ -301,6 +301,8 @@ class DocumentSerializers(ApiMixin, serializers.Serializer):
ListenerManagement.update_status(QuerySet(Paragraph).filter(document_id__in=document_id_list),
TaskType.EMBEDDING,
State.PENDING)
+ ListenerManagement.get_aggregation_document_status_by_query_set(
+ QuerySet(Document).filter(id__in=document_id_list))()
embedding_by_document_list.delay(document_id_list, model_id)
else:
update_embedding_dataset_id(pid_list, target_dataset_id)
@@ -621,6 +623,7 @@ class DocumentSerializers(ApiMixin, serializers.Serializer):
_document.save()
return self.one()
+ @transaction.atomic
def refresh(self, with_valid=True):
if with_valid:
self.is_valid(raise_exception=True)
@@ -952,12 +955,11 @@ class DocumentSerializers(ApiMixin, serializers.Serializer):
# 插入文档
QuerySet(Document).bulk_create(document_model_list) if len(document_model_list) > 0 else None
# 批量插入段落
- QuerySet(Paragraph).bulk_create(paragraph_model_list) if len(paragraph_model_list) > 0 else None
+ bulk_create_in_batches(Paragraph, paragraph_model_list, batch_size=1000)
# 批量插入问题
- QuerySet(Problem).bulk_create(problem_model_list) if len(problem_model_list) > 0 else None
+ bulk_create_in_batches(Problem, problem_model_list, batch_size=1000)
# 批量插入关联问题
- QuerySet(ProblemParagraphMapping).bulk_create(problem_paragraph_mapping_list) if len(
- problem_paragraph_mapping_list) > 0 else None
+ bulk_create_in_batches(ProblemParagraphMapping, problem_paragraph_mapping_list, batch_size=1000)
# 查询文档
query_set = QuerySet(model=Document)
if len(document_model_list) == 0:
diff --git a/ui/src/components/ai-chat/component/answer-content/index.vue b/ui/src/components/ai-chat/component/answer-content/index.vue
index 4d1c7a684..96bcd0cdd 100644
--- a/ui/src/components/ai-chat/component/answer-content/index.vue
+++ b/ui/src/components/ai-chat/component/answer-content/index.vue
@@ -18,7 +18,7 @@
:chat_record_id="answer_text.chat_record_id"
:child_node="answer_text.child_node"
:runtime_node_id="answer_text.runtime_node_id"
- :loading="loading"
+ :disabled="loading || type == 'log'"
v-else-if="answer_text.content"
:source="answer_text.content"
:send-message="chatMessage"
diff --git a/ui/src/components/ai-chat/component/prologue-content/index.vue b/ui/src/components/ai-chat/component/prologue-content/index.vue
index 2bb030f23..75d70477d 100644
--- a/ui/src/components/ai-chat/component/prologue-content/index.vue
+++ b/ui/src/components/ai-chat/component/prologue-content/index.vue
@@ -6,7 +6,7 @@