fix: Filter special character

2025-12-26 01:33:05 +00:00 · 2025-12-11 16:22:03 +08:00 · 2025-12-11 16:22:03 +08:00 · 9347696676
parent 638f90e69f
commit 9347696676
2 changed files with 25 additions and 12 deletions
--- a/apps/application/flow/step_node/knowledge_write_node/impl/base_knowledge_write_node.py
+++ b/apps/application/flow/step_node/knowledge_write_node/impl/base_knowledge_write_node.py
@ -17,7 +17,7 @@ from django.utils.translation import gettext_lazy as _
 from application.flow.i_step_node import NodeResult
 from application.flow.step_node.knowledge_write_node.i_knowledge_write_node import IKnowledgeWriteNode
 from common.chunk import text_to_chunk
-from common.utils.common import bulk_create_in_batches
+from common.utils.common import bulk_create_in_batches, filter_special_character
 from knowledge.models import Document, KnowledgeType, Paragraph, File, FileSourceType, Problem, ProblemParagraphMapping, \
    Tag, DocumentTag
 from knowledge.serializers.common import ProblemParagraphObject, ProblemParagraphManage
@ -83,10 +83,11 @@ def get_paragraph_problem_model(knowledge_id: str, document_id: str, instance: D
    paragraph = Paragraph(
        id=uuid.uuid7(),
        document_id=document_id,
-        content=instance.get("content"),
+        content=filter_special_character(instance.get("content")),
        knowledge_id=knowledge_id,
        title=instance.get("title") if 'title' in instance else '',
-        chunks=instance.get('chunks') if 'chunks' in instance else text_to_chunk(instance.get("content")),
+        chunks=[filter_special_character(c) for c in (instance.get('chunks') if 'chunks' in instance else text_to_chunk(
+            instance.get("content")))],
    )

    problem_paragraph_object_list = [ProblemParagraphObject(
@ -145,11 +146,11 @@ def get_document_paragraph_model(knowledge_id: str, instance: Dict):
        instance.get('paragraphs') if 'paragraphs' in instance else []
    )

-def save_knowledge_tags(knowledge_id: str, tags: List[Dict[str,Any]]):

+def save_knowledge_tags(knowledge_id: str, tags: List[Dict[str, Any]]):
    existed_tags_dict = {
        (key, value): str(tag_id)
-        for key,value,tag_id in QuerySet(Tag).filter(knowledge_id=knowledge_id).values_list("key", "value", "id")
+        for key, value, tag_id in QuerySet(Tag).filter(knowledge_id=knowledge_id).values_list("key", "value", "id")
    }

    tag_model_list = []
@ -158,7 +159,7 @@ def save_knowledge_tags(knowledge_id: str, tags: List[Dict[str,Any]]):
        key = tag.get("key")
        value = tag.get("value")

-        if (key,value) not in existed_tags_dict:
+        if (key, value) not in existed_tags_dict:
            tag_model = Tag(
                id=uuid.uuid7(),
                knowledge_id=knowledge_id,
@ -166,15 +167,16 @@ def save_knowledge_tags(knowledge_id: str, tags: List[Dict[str,Any]]):
                value=value
            )
            tag_model_list.append(tag_model)
-            new_tag_dict[(key,value)] = str(tag_model.id)
+            new_tag_dict[(key, value)] = str(tag_model.id)

    if tag_model_list:
        Tag.objects.bulk_create(tag_model_list)

-    all_tag_dict={**existed_tags_dict,**new_tag_dict}
+    all_tag_dict = {**existed_tags_dict, **new_tag_dict}

    return all_tag_dict, new_tag_dict

+
 def batch_add_document_tag(document_tag_map: Dict[str, List[str]]):
    """
    批量添加文档-标签关联
@ -199,12 +201,13 @@ def batch_add_document_tag(document_tag_map: Dict[str, List[str]]):
        )
        for doc_id, tag_ids in document_tag_map.items()
        for tag_id in tag_ids
-        if (doc_id,tag_id) not in existed_relations
+        if (doc_id, tag_id) not in existed_relations
    ]

    if new_relations:
        QuerySet(DocumentTag).bulk_create(new_relations)

+
 class BaseKnowledgeWriteNode(IKnowledgeWriteNode):

    def save_context(self, details, workflow_manage):
@ -241,7 +244,7 @@ class BaseKnowledgeWriteNode(IKnowledgeWriteNode):
            for tag in single_document_tag_list:
                tag_key = (tag['key'], tag['value'])
                if tag_key not in knowledge_tag_dict:
-                    knowledge_tag_dict[tag_key]= tag
+                    knowledge_tag_dict[tag_key] = tag

            if single_document_tag_list:
                document_tags_map[str(document_instance.id)] = single_document_tag_list
@ -259,9 +262,9 @@ class BaseKnowledgeWriteNode(IKnowledgeWriteNode):
            # 为每个文档添加其对应的标签
            for doc_id, doc_tags in document_tags_map.items():
                doc_tag_ids = [
-                    all_tag_dict[(tag.get("key"),tag.get("value"))]
+                    all_tag_dict[(tag.get("key"), tag.get("value"))]
                    for tag in doc_tags
-                    if (tag.get("key"),tag.get("value")) in all_tag_dict
+                    if (tag.get("key"), tag.get("value")) in all_tag_dict
                ]
                if doc_tag_ids:
                    document_tag_id_map[doc_id] = doc_tag_ids
--- a/apps/common/utils/common.py
+++ b/apps/common/utils/common.py
@ -340,3 +340,13 @@ def generate_uuid(tag: str):

 def filter_workspace(query_list):
    return [q for q in query_list if q.name != "workspace_id"]
+
+
+def filter_special_character(_str):
+    """
+    过滤特殊字符
+    """
+    s_list = ["\\u0000"]
+    for t in s_list:
+        _str = _str.replace(t, '')
+    return _str