feat: add DocumentSplitPatternAPI for segment ID retrieval with workspace and knowledge ID parameters

This commit is contained in:
CaptainB 2025-05-06 17:56:49 +08:00
parent 6f5645e16c
commit bd865ceafc
4 changed files with 63 additions and 1 deletions

View File

@ -315,3 +315,28 @@ class DocumentTreeReadAPI(APIMixin):
required=False,
),
]
class DocumentSplitPatternAPI(APIMixin):
@staticmethod
def get_parameters():
return [
OpenApiParameter(
name="workspace_id",
description="工作空间id",
type=OpenApiTypes.STR,
location='path',
required=True,
),
OpenApiParameter(
name="knowledge_id",
description="知识库id",
type=OpenApiTypes.STR,
location='path',
required=True,
),
]
@staticmethod
def get_response():
return DefaultResultSerializer

View File

@ -624,6 +624,26 @@ class DocumentSerializers(serializers.Serializer):
return result
return [result]
class SplitPattern(serializers.Serializer):
workspace_id = serializers.CharField(required=True, label=_('workspace id'))
knowledge_id = serializers.UUIDField(required=True, label=_('knowledge id'))
@staticmethod
def list():
return [
{'key': "#", 'value': '(?<=^)# .*|(?<=\\n)# .*'},
{'key': '##', 'value': '(?<=\\n)(?<!#)## (?!#).*|(?<=^)(?<!#)## (?!#).*'},
{'key': '###', 'value': "(?<=\\n)(?<!#)### (?!#).*|(?<=^)(?<!#)### (?!#).*"},
{'key': '####', 'value': "(?<=\\n)(?<!#)#### (?!#).*|(?<=^)(?<!#)#### (?!#).*"},
{'key': '#####', 'value': "(?<=\\n)(?<!#)##### (?!#).*|(?<=^)(?<!#)##### (?!#).*"},
{'key': '######', 'value': "(?<=\\n)(?<!#)###### (?!#).*|(?<=^)(?<!#)###### (?!#).*"},
{'key': '-', 'value': '(?<! )- .*'},
{'key': _('space'), 'value': '(?<! ) (?! )'},
{'key': _('semicolon'), 'value': '(?<!)(?!)'}, {'key': _('comma'), 'value': '(?<!)(?!)'},
{'key': _('period'), 'value': '(?<!。)。(?!。)'}, {'key': _('enter'), 'value': '(?<!\\n)\\n(?!\\n)'},
{'key': _('blank line'), 'value': '(?<!\\n)\\n\\n(?!\\n)'}
]
class Batch(serializers.Serializer):
workspace_id = serializers.UUIDField(required=True, label=_('workspace id'))
knowledge_id = serializers.UUIDField(required=True, label=_('knowledge id'))

View File

@ -10,6 +10,7 @@ urlpatterns = [
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>', views.KnowledgeView.Operate.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document', views.DocumentView.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/split', views.DocumentView.Split.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/split_pattern', views.DocumentView.SplitPattern.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/batch', views.DocumentView.Batch.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/web', views.WebDocumentView.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/qa', views.QaDocumentView.as_view()),

View File

@ -11,7 +11,7 @@ from common.result import result
from knowledge.api.document import DocumentSplitAPI, DocumentBatchAPI, DocumentBatchCreateAPI, DocumentCreateAPI, \
DocumentReadAPI, DocumentEditAPI, DocumentDeleteAPI, TableDocumentCreateAPI, QaDocumentCreateAPI, \
WebDocumentCreateAPI, CancelTaskAPI, BatchCancelTaskAPI, SyncWebAPI, RefreshAPI, BatchEditHitHandlingAPI, \
DocumentTreeReadAPI
DocumentTreeReadAPI, DocumentSplitPatternAPI
from knowledge.serializers.document import DocumentSerializers
@ -140,6 +140,22 @@ class DocumentView(APIView):
'knowledge_id': knowledge_id,
}).parse(split_data))
class SplitPattern(APIView):
authentication_classes = [TokenAuth]
@extend_schema(
summary=_('Get a list of segment IDs'),
description=_('Get a list of segment IDs'),
operation_id=_('Get a list of segment IDs'),
parameters=DocumentSplitPatternAPI.get_parameters(),
responses=DocumentSplitPatternAPI.get_response(),
tags=[_('Knowledge Base/Documentation')]
)
def get(self, request: Request, workspace_id: str, knowledge_id: str):
return result.success(DocumentSerializers.SplitPattern(
data={'knowledge_id': knowledge_id, 'workspace_id': workspace_id}
).list())
class BatchEditHitHandling(APIView):
authentication_classes = [TokenAuth]