fix: update paragraph ordering and adjust position during bulk creation

This commit is contained in:
CaptainB 2025-06-17 12:09:12 +08:00
parent 00e486c3fe
commit 119f678224
9 changed files with 195 additions and 18 deletions

View File

@ -310,4 +310,50 @@ class ParagraphMigrateAPI(APIMixin):
@staticmethod
def get_request():
return BatchSerializer
return BatchSerializer
class ParagraphAdjustOrderAPI(APIMixin):
@staticmethod
def get_parameters():
return [
OpenApiParameter(
name="workspace_id",
description="工作空间id",
type=OpenApiTypes.STR,
location='path',
required=True,
),
OpenApiParameter(
name="knowledge_id",
description="知识库id",
type=OpenApiTypes.STR,
location='path',
required=True,
),
OpenApiParameter(
name="document_id",
description="文档id",
type=OpenApiTypes.STR,
location='path',
required=True,
),
OpenApiParameter(
name="paragraph_id",
description="段落id",
type=OpenApiTypes.STR,
location='query',
required=True,
),
OpenApiParameter(
name="new_position",
description="新的顺序",
type=OpenApiTypes.INT,
location='query',
required=True,
),
]
@staticmethod
def get_response():
return DefaultResultSerializer

View File

@ -0,0 +1,29 @@
# Generated by Django 5.2.3 on 2025-06-17 03:11
import knowledge.models.knowledge
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('knowledge', '0004_alter_document_status_alter_paragraph_status_and_more'),
]
operations = [
migrations.AddField(
model_name='paragraph',
name='position',
field=models.IntegerField(db_index=True, default=0, verbose_name='段落顺序'),
),
migrations.AlterField(
model_name='document',
name='status',
field=models.CharField(default=knowledge.models.knowledge.Status.__str__, max_length=20, verbose_name='状态'),
),
migrations.AlterField(
model_name='paragraph',
name='status',
field=models.CharField(default=knowledge.models.knowledge.Status.__str__, max_length=20, verbose_name='状态'),
),
]

View File

@ -174,6 +174,7 @@ class Paragraph(AppModelMixin):
status_meta = models.JSONField(verbose_name="状态数据", default=default_status_meta)
hit_num = models.IntegerField(verbose_name="命中次数", default=0)
is_active = models.BooleanField(default=True)
position = models.IntegerField(verbose_name="段落顺序", default=0, db_index=True)
class Meta:
db_table = "paragraph"

View File

@ -13,6 +13,7 @@ from celery_once import AlreadyQueued
from django.core import validators
from django.db import transaction, models
from django.db.models import QuerySet
from django.db.models.aggregates import Max
from django.db.models.functions import Substr, Reverse
from django.http import HttpResponse
from django.utils.translation import gettext_lazy as _, gettext, get_language, to_locale
@ -417,6 +418,7 @@ class DocumentSerializers(serializers.Serializer):
if first.type != KnowledgeType.WEB:
raise AppApiException(500, _('Synchronization is only supported for web site types'))
@transaction.atomic
def sync(self, with_valid=True, with_embedding=True):
if with_valid:
self.is_valid(raise_exception=True)
@ -454,7 +456,13 @@ class DocumentSerializers(serializers.Serializer):
problem_model_list, problem_paragraph_mapping_list = ProblemParagraphManage(
problem_paragraph_object_list, document.knowledge_id).to_problem_model_list()
# 批量插入段落
QuerySet(Paragraph).bulk_create(paragraph_model_list) if len(paragraph_model_list) > 0 else None
if len(paragraph_model_list) > 0:
max_position = Paragraph.objects.filter(document_id=document_id).aggregate(
max_position=Max('position')
)['max_position'] or 0
for i, paragraph in enumerate(paragraph_model_list):
paragraph.position = max_position + i + 1
QuerySet(Paragraph).bulk_create(paragraph_model_list)
# 批量插入问题
QuerySet(Problem).bulk_create(problem_model_list) if len(problem_model_list) > 0 else None
# 插入关联问题
@ -757,7 +765,13 @@ class DocumentSerializers(serializers.Serializer):
# 插入文档
document_model.save()
# 批量插入段落
QuerySet(Paragraph).bulk_create(paragraph_model_list) if len(paragraph_model_list) > 0 else None
if len(paragraph_model_list) > 0:
max_position = Paragraph.objects.filter(document_id=document_model.id).aggregate(
max_position=Max('position')
)['max_position'] or 0
for i, paragraph in enumerate(paragraph_model_list):
paragraph.position = max_position + i + 1
QuerySet(Paragraph).bulk_create(paragraph_model_list)
# 批量插入问题
QuerySet(Problem).bulk_create(problem_model_list) if len(problem_model_list) > 0 else None
# 批量插入关联问题
@ -1031,7 +1045,15 @@ class DocumentSerializers(serializers.Serializer):
# 插入文档
QuerySet(Document).bulk_create(document_model_list) if len(document_model_list) > 0 else None
# 批量插入段落
bulk_create_in_batches(Paragraph, paragraph_model_list, batch_size=1000)
if len(paragraph_model_list) > 0:
for document in document_model_list:
max_position = Paragraph.objects.filter(document_id=document.id).aggregate(
max_position=Max('position')
)['max_position'] or 0
sub_list = [p for p in paragraph_model_list if p.document_id == document.id]
for i, paragraph in enumerate(sub_list):
paragraph.position = max_position + i + 1
QuerySet(Paragraph).bulk_create(sub_list if len(sub_list) > 0 else [])
# 批量插入问题
bulk_create_in_batches(Problem, problem_model_list, batch_size=1000)
# 批量插入关联问题

View File

@ -5,7 +5,8 @@ from typing import Dict
import uuid_utils.compat as uuid
from celery_once import AlreadyQueued
from django.db import transaction
from django.db.models import QuerySet, Count
from django.db.models import QuerySet, Count, F
from django.db.models.aggregates import Max
from django.utils.translation import gettext_lazy as _
from rest_framework import serializers
@ -28,7 +29,7 @@ from knowledge.task.generate import generate_related_by_paragraph_id_list
class ParagraphSerializer(serializers.ModelSerializer):
class Meta:
model = Paragraph
fields = ['id', 'content', 'is_active', 'document_id', 'title', 'create_time', 'update_time']
fields = ['id', 'content', 'is_active', 'document_id', 'title', 'create_time', 'update_time', 'position']
class ParagraphInstanceSerializer(serializers.Serializer):
@ -244,6 +245,7 @@ class ParagraphSerializers(serializers.Serializer):
knowledge_id=self.data.get('knowledge_id')).exists():
raise AppApiException(500, _('The document id is incorrect'))
@transaction.atomic
def save(self, instance: Dict, with_valid=True, with_embedding=True):
if with_valid:
ParagraphSerializers(data=instance).is_valid(raise_exception=True)
@ -257,7 +259,18 @@ class ParagraphSerializers(serializers.Serializer):
ProblemParagraphManage(problem_paragraph_object_list, knowledge_id)
.to_problem_model_list())
# 插入段落
paragraph_problem_model.get('paragraph').save()
max_position = Paragraph.objects.filter(document_id=document_id).aggregate(
max_position=Max('position')
)['max_position'] or 0
paragraph.position = max_position + 1
paragraph.save()
# 调整位置
ParagraphSerializers.AdjustPosition(data={
'paragraph_id': str(paragraph.id),
'knowledge_id': knowledge_id,
'document_id': document_id,
'workspace_id': self.data.get('workspace_id')
}).adjust_position(position=instance.get('position', max_position + 1))
# 插入問題
QuerySet(Problem).bulk_create(problem_model_list) if len(problem_model_list) > 0 else None
# 插入问题关联关系
@ -319,7 +332,7 @@ class ParagraphSerializers(serializers.Serializer):
**{'title__icontains': self.data.get('title')})
if 'content' in self.data:
query_set = query_set.filter(**{'content__icontains': self.data.get('content')})
query_set = query_set.order_by('create_time', 'id')
query_set = query_set.order_by('position', 'create_time')
return query_set
def list(self):
@ -541,6 +554,42 @@ class ParagraphSerializers(serializers.Serializer):
return problem, True
return None
class AdjustPosition(serializers.Serializer):
workspace_id = serializers.CharField(required=True, label=_('workspace id'))
knowledge_id = serializers.UUIDField(required=True, label=_('knowledge id'))
document_id = serializers.UUIDField(required=True, label=_('document id'))
paragraph_id = serializers.UUIDField(required=True, label=_('paragraph id'))
@transaction.atomic
def adjust_position(self, new_position):
"""
调整段落顺序
:param new_position: 新的顺序值
"""
self.is_valid(raise_exception=True)
try:
new_position = int(new_position)
except (TypeError, ValueError):
raise serializers.ValidationError(_('new_position must be an integer'))
# 获取当前段落
paragraph = Paragraph.objects.get(id=self.data.get('paragraph_id'))
old_position = paragraph.position
if old_position < new_position:
# 如果新顺序在当前顺序之后,更新受影响段落的顺序
Paragraph.objects.filter(
position__gt=old_position, position__lte=new_position
).update(position=F('position') - 1)
elif old_position > new_position:
# 如果新顺序在当前顺序之前,更新受影响段落的顺序
Paragraph.objects.filter(
position__lt=old_position, position__gte=new_position
).update(position=F('position') + 1)
# 更新当前段落的顺序
paragraph.position = new_position
paragraph.save()
def delete_problems_and_mappings(paragraph_ids):
problem_paragraph_mappings = ProblemParagraphMapping.objects.filter(paragraph_id__in=paragraph_ids)

View File

@ -75,6 +75,9 @@ urlpatterns = [
views.ParagraphView.Association.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/paragraph/unassociation',
views.ParagraphView.UnAssociation.as_view()),
path(
'workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/paragraph/adjust_position',
views.ParagraphView.AdjustPosition.as_view()),
path(
'workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/paragraph/<str:paragraph_id>',
views.ParagraphView.Operate.as_view()),

View File

@ -361,7 +361,7 @@ class DocumentView(APIView):
get_operation_object=lambda r, keywords: get_knowledge_document_operation_object(
get_knowledge_operation_object(keywords.get('knowledge_id')),
{'name': f'[{",".join([document.get("name") for document in r.data])}]',
'document_list': r.data}),
'document_list': r.data}),
)
def put(self, request: Request, workspace_id: str, knowledge_id: str):
return result.success(DocumentSerializers.Batch(

View File

@ -11,7 +11,7 @@ from common.result import result
from common.utils.common import query_params_to_single_dict
from knowledge.api.paragraph import ParagraphReadAPI, ParagraphCreateAPI, ParagraphBatchDeleteAPI, ParagraphEditAPI, \
ParagraphGetAPI, ProblemCreateAPI, UnAssociationAPI, AssociationAPI, ParagraphPageAPI, \
ParagraphBatchGenerateRelatedAPI, ParagraphMigrateAPI
ParagraphBatchGenerateRelatedAPI, ParagraphMigrateAPI, ParagraphAdjustOrderAPI
from knowledge.serializers.common import get_knowledge_operation_object
from knowledge.serializers.paragraph import ParagraphSerializers
from knowledge.views import get_knowledge_document_operation_object, get_document_operation_object
@ -61,7 +61,7 @@ class ParagraphView(APIView):
get_operation_object=lambda r, keywords: get_knowledge_document_operation_object(
get_knowledge_operation_object(keywords.get('knowledge_id')),
get_document_operation_object(keywords.get('document_id'))
),
),
)
def post(self, request: Request, workspace_id: str, knowledge_id: str, document_id: str):
return result.success(ParagraphSerializers.Create(
@ -110,7 +110,7 @@ class ParagraphView(APIView):
get_operation_object=lambda r, keywords: get_knowledge_document_operation_object(
get_knowledge_operation_object(keywords.get('knowledge_id')),
get_document_operation_object(keywords.get('document_id'))
),
),
)
def put(self, request: Request, workspace_id: str, knowledge_id: str, document_id: str,
target_knowledge_id: str, target_document_id):
@ -146,7 +146,7 @@ class ParagraphView(APIView):
get_operation_object=lambda r, keywords: get_knowledge_document_operation_object(
get_knowledge_operation_object(keywords.get('knowledge_id')),
get_document_operation_object(keywords.get('document_id'))
),
),
)
def put(self, request: Request, workspace_id: str, knowledge_id: str, document_id: str):
return result.success(ParagraphSerializers.Batch(
@ -175,7 +175,7 @@ class ParagraphView(APIView):
get_operation_object=lambda r, keywords: get_knowledge_document_operation_object(
get_knowledge_operation_object(keywords.get('knowledge_id')),
get_document_operation_object(keywords.get('document_id'))
),
),
)
def put(self, request: Request, workspace_id: str, knowledge_id: str, document_id: str, paragraph_id: str):
o = ParagraphSerializers.Operate(
@ -231,7 +231,7 @@ class ParagraphView(APIView):
get_operation_object=lambda r, keywords: get_knowledge_document_operation_object(
get_knowledge_operation_object(keywords.get('knowledge_id')),
get_document_operation_object(keywords.get('document_id'))
),
),
)
def delete(self, request: Request, workspace_id: str, knowledge_id: str, document_id: str, paragraph_id: str):
o = ParagraphSerializers.Operate(
@ -267,7 +267,7 @@ class ParagraphView(APIView):
get_operation_object=lambda r, keywords: get_knowledge_document_operation_object(
get_knowledge_operation_object(keywords.get('knowledge_id')),
get_document_operation_object(keywords.get('document_id'))
),
),
)
def post(self, request: Request, workspace_id: str, knowledge_id: str, document_id: str, paragraph_id: str):
return result.success(ParagraphSerializers.Problem(
@ -359,7 +359,7 @@ class ParagraphView(APIView):
get_operation_object=lambda r, keywords: get_knowledge_document_operation_object(
get_knowledge_operation_object(keywords.get('knowledge_id')),
get_document_operation_object(keywords.get('document_id'))
),
),
)
def put(self, request: Request, workspace_id: str, knowledge_id: str, document_id: str):
return result.success(ParagraphSerializers.Association(
@ -400,3 +400,30 @@ class ParagraphView(APIView):
)
d.is_valid(raise_exception=True)
return result.success(d.page(current_page, page_size))
class AdjustPosition(APIView):
authentication_classes = [TokenAuth]
@extend_schema(
methods=['PUT'],
summary=_('Adjust paragraph position'),
description=_('Adjust paragraph position'),
operation_id=_('Adjust paragraph position'), # type: ignore
parameters=ParagraphAdjustOrderAPI.get_parameters(),
request=ParagraphAdjustOrderAPI.get_request(),
responses=ParagraphAdjustOrderAPI.get_response(),
tags=[_('Knowledge Base/Documentation/Paragraph')] # type: ignore
)
@has_permissions(
PermissionConstants.KNOWLEDGE_DOCUMENT_EDIT.get_workspace_knowledge_permission(),
RoleConstants.WORKSPACE_MANAGE.get_workspace_role()
)
def put(self, request: Request, workspace_id: str, knowledge_id: str, document_id: str):
return result.success(ParagraphSerializers.AdjustPosition(
data={
'workspace_id': workspace_id,
'knowledge_id': knowledge_id,
'document_id': document_id,
'paragraph_id': request.query_params.get('paragraph_id'),
}
).adjust_position(request.query_params.get('new_position')))

View File

@ -17,7 +17,7 @@ export default defineConfig(({ mode }) => {
const prefix = process.env.VITE_DYNAMIC_PREFIX || ENV.VITE_BASE_PATH
const proxyConf: Record<string, string | ProxyOptions> = {}
proxyConf['/api'] = {
target: 'http://43.166.1.146:8080',
target: 'http://127.0.0.1:8080',
changeOrigin: true,
rewrite: (path: string) => path.replace(ENV.VITE_BASE_PATH, '/'),
}