mirror of
https://github.com/1Panel-dev/MaxKB.git
synced 2025-12-26 01:33:05 +00:00
fix: Filter special character
This commit is contained in:
parent
638f90e69f
commit
9347696676
|
|
@ -17,7 +17,7 @@ from django.utils.translation import gettext_lazy as _
|
|||
from application.flow.i_step_node import NodeResult
|
||||
from application.flow.step_node.knowledge_write_node.i_knowledge_write_node import IKnowledgeWriteNode
|
||||
from common.chunk import text_to_chunk
|
||||
from common.utils.common import bulk_create_in_batches
|
||||
from common.utils.common import bulk_create_in_batches, filter_special_character
|
||||
from knowledge.models import Document, KnowledgeType, Paragraph, File, FileSourceType, Problem, ProblemParagraphMapping, \
|
||||
Tag, DocumentTag
|
||||
from knowledge.serializers.common import ProblemParagraphObject, ProblemParagraphManage
|
||||
|
|
@ -83,10 +83,11 @@ def get_paragraph_problem_model(knowledge_id: str, document_id: str, instance: D
|
|||
paragraph = Paragraph(
|
||||
id=uuid.uuid7(),
|
||||
document_id=document_id,
|
||||
content=instance.get("content"),
|
||||
content=filter_special_character(instance.get("content")),
|
||||
knowledge_id=knowledge_id,
|
||||
title=instance.get("title") if 'title' in instance else '',
|
||||
chunks=instance.get('chunks') if 'chunks' in instance else text_to_chunk(instance.get("content")),
|
||||
chunks=[filter_special_character(c) for c in (instance.get('chunks') if 'chunks' in instance else text_to_chunk(
|
||||
instance.get("content")))],
|
||||
)
|
||||
|
||||
problem_paragraph_object_list = [ProblemParagraphObject(
|
||||
|
|
@ -145,11 +146,11 @@ def get_document_paragraph_model(knowledge_id: str, instance: Dict):
|
|||
instance.get('paragraphs') if 'paragraphs' in instance else []
|
||||
)
|
||||
|
||||
def save_knowledge_tags(knowledge_id: str, tags: List[Dict[str,Any]]):
|
||||
|
||||
def save_knowledge_tags(knowledge_id: str, tags: List[Dict[str, Any]]):
|
||||
existed_tags_dict = {
|
||||
(key, value): str(tag_id)
|
||||
for key,value,tag_id in QuerySet(Tag).filter(knowledge_id=knowledge_id).values_list("key", "value", "id")
|
||||
for key, value, tag_id in QuerySet(Tag).filter(knowledge_id=knowledge_id).values_list("key", "value", "id")
|
||||
}
|
||||
|
||||
tag_model_list = []
|
||||
|
|
@ -158,7 +159,7 @@ def save_knowledge_tags(knowledge_id: str, tags: List[Dict[str,Any]]):
|
|||
key = tag.get("key")
|
||||
value = tag.get("value")
|
||||
|
||||
if (key,value) not in existed_tags_dict:
|
||||
if (key, value) not in existed_tags_dict:
|
||||
tag_model = Tag(
|
||||
id=uuid.uuid7(),
|
||||
knowledge_id=knowledge_id,
|
||||
|
|
@ -166,15 +167,16 @@ def save_knowledge_tags(knowledge_id: str, tags: List[Dict[str,Any]]):
|
|||
value=value
|
||||
)
|
||||
tag_model_list.append(tag_model)
|
||||
new_tag_dict[(key,value)] = str(tag_model.id)
|
||||
new_tag_dict[(key, value)] = str(tag_model.id)
|
||||
|
||||
if tag_model_list:
|
||||
Tag.objects.bulk_create(tag_model_list)
|
||||
|
||||
all_tag_dict={**existed_tags_dict,**new_tag_dict}
|
||||
all_tag_dict = {**existed_tags_dict, **new_tag_dict}
|
||||
|
||||
return all_tag_dict, new_tag_dict
|
||||
|
||||
|
||||
def batch_add_document_tag(document_tag_map: Dict[str, List[str]]):
|
||||
"""
|
||||
批量添加文档-标签关联
|
||||
|
|
@ -199,12 +201,13 @@ def batch_add_document_tag(document_tag_map: Dict[str, List[str]]):
|
|||
)
|
||||
for doc_id, tag_ids in document_tag_map.items()
|
||||
for tag_id in tag_ids
|
||||
if (doc_id,tag_id) not in existed_relations
|
||||
if (doc_id, tag_id) not in existed_relations
|
||||
]
|
||||
|
||||
if new_relations:
|
||||
QuerySet(DocumentTag).bulk_create(new_relations)
|
||||
|
||||
|
||||
class BaseKnowledgeWriteNode(IKnowledgeWriteNode):
|
||||
|
||||
def save_context(self, details, workflow_manage):
|
||||
|
|
@ -241,7 +244,7 @@ class BaseKnowledgeWriteNode(IKnowledgeWriteNode):
|
|||
for tag in single_document_tag_list:
|
||||
tag_key = (tag['key'], tag['value'])
|
||||
if tag_key not in knowledge_tag_dict:
|
||||
knowledge_tag_dict[tag_key]= tag
|
||||
knowledge_tag_dict[tag_key] = tag
|
||||
|
||||
if single_document_tag_list:
|
||||
document_tags_map[str(document_instance.id)] = single_document_tag_list
|
||||
|
|
@ -259,9 +262,9 @@ class BaseKnowledgeWriteNode(IKnowledgeWriteNode):
|
|||
# 为每个文档添加其对应的标签
|
||||
for doc_id, doc_tags in document_tags_map.items():
|
||||
doc_tag_ids = [
|
||||
all_tag_dict[(tag.get("key"),tag.get("value"))]
|
||||
all_tag_dict[(tag.get("key"), tag.get("value"))]
|
||||
for tag in doc_tags
|
||||
if (tag.get("key"),tag.get("value")) in all_tag_dict
|
||||
if (tag.get("key"), tag.get("value")) in all_tag_dict
|
||||
]
|
||||
if doc_tag_ids:
|
||||
document_tag_id_map[doc_id] = doc_tag_ids
|
||||
|
|
|
|||
|
|
@ -340,3 +340,13 @@ def generate_uuid(tag: str):
|
|||
|
||||
def filter_workspace(query_list):
|
||||
return [q for q in query_list if q.name != "workspace_id"]
|
||||
|
||||
|
||||
def filter_special_character(_str):
|
||||
"""
|
||||
过滤特殊字符
|
||||
"""
|
||||
s_list = ["\\u0000"]
|
||||
for t in s_list:
|
||||
_str = _str.replace(t, '')
|
||||
return _str
|
||||
|
|
|
|||
Loading…
Reference in New Issue