mirror of
https://github.com/1Panel-dev/MaxKB.git
synced 2025-12-26 01:33:05 +00:00
feat: Chunks stored
This commit is contained in:
parent
1d60741b4f
commit
e5a2c576dc
|
|
@ -16,6 +16,7 @@ from rest_framework import serializers
|
|||
from django.utils.translation import gettext_lazy as _
|
||||
from application.flow.i_step_node import NodeResult
|
||||
from application.flow.step_node.knowledge_write_node.i_knowledge_write_node import IKnowledgeWriteNode
|
||||
from common.chunk import text_to_chunk
|
||||
from common.utils.common import bulk_create_in_batches
|
||||
from knowledge.models import Document, KnowledgeType, Paragraph, File, FileSourceType, Problem, ProblemParagraphMapping
|
||||
from knowledge.serializers.common import ProblemParagraphObject, ProblemParagraphManage
|
||||
|
|
@ -67,14 +68,14 @@ def link_file(source_file_id, document_id):
|
|||
# 保存文件内容和元数据
|
||||
new_file.save(file_content)
|
||||
|
||||
|
||||
def get_paragraph_problem_model(knowledge_id: str, document_id: str, instance: Dict):
|
||||
paragraph = Paragraph(
|
||||
id=uuid.uuid7(),
|
||||
document_id=document_id,
|
||||
content=instance.get("content"),
|
||||
knowledge_id=knowledge_id,
|
||||
title=instance.get("title") if 'title' in instance else ''
|
||||
title=instance.get("title") if 'title' in instance else '',
|
||||
chunks = instance.get('chunks') if 'chunks' in instance else text_to_chunk(instance.get("content")),
|
||||
)
|
||||
|
||||
problem_paragraph_object_list = [ProblemParagraphObject(
|
||||
|
|
|
|||
|
|
@ -0,0 +1,19 @@
|
|||
# Generated by Django 5.2.8 on 2025-11-24 07:09
|
||||
|
||||
import django.contrib.postgres.fields
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('knowledge', '0005_knowledgeaction'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='paragraph',
|
||||
name='chunks',
|
||||
field=django.contrib.postgres.fields.ArrayField(base_field=models.CharField(), default=list, size=None, verbose_name='块'),
|
||||
),
|
||||
]
|
||||
|
|
@ -3,6 +3,7 @@ import zipfile
|
|||
from enum import Enum
|
||||
|
||||
import uuid_utils.compat as uuid
|
||||
from django.contrib.postgres.fields import ArrayField
|
||||
from django.contrib.postgres.search import SearchVectorField
|
||||
from django.db import models
|
||||
from django.db.models import QuerySet
|
||||
|
|
@ -242,6 +243,7 @@ class Paragraph(AppModelMixin):
|
|||
hit_num = models.IntegerField(verbose_name="命中次数", default=0)
|
||||
is_active = models.BooleanField(default=True, db_index=True)
|
||||
position = models.IntegerField(verbose_name="段落顺序", default=0, db_index=True)
|
||||
chunks = ArrayField(verbose_name="块", base_field=models.CharField(), default=list)
|
||||
|
||||
class Meta:
|
||||
db_table = "paragraph"
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ lock = threading.Lock()
|
|||
def chunk_data(data: Dict):
|
||||
if str(data.get('source_type')) == str(SourceType.PARAGRAPH.value):
|
||||
text = data.get('text')
|
||||
chunk_list = text_to_chunk(text)
|
||||
chunk_list = data.get('chunks') if data.get('chunks') else text_to_chunk(text)
|
||||
return [{**data, 'text': chunk} for chunk in chunk_list]
|
||||
return [data]
|
||||
|
||||
|
|
@ -63,7 +63,8 @@ class BaseVectorStore(ABC):
|
|||
BaseVectorStore.vector_exists = True
|
||||
return True
|
||||
|
||||
def save(self, text, source_type: SourceType, knowledge_id: str, document_id: str, paragraph_id: str, source_id: str,
|
||||
def save(self, text, source_type: SourceType, knowledge_id: str, document_id: str, paragraph_id: str,
|
||||
source_id: str,
|
||||
is_active: bool,
|
||||
embedding: Embeddings):
|
||||
"""
|
||||
|
|
@ -104,7 +105,8 @@ class BaseVectorStore(ABC):
|
|||
break
|
||||
|
||||
@abstractmethod
|
||||
def _save(self, text, source_type: SourceType, knowledge_id: str, document_id: str, paragraph_id: str, source_id: str,
|
||||
def _save(self, text, source_type: SourceType, knowledge_id: str, document_id: str, paragraph_id: str,
|
||||
source_id: str,
|
||||
is_active: bool,
|
||||
embedding: Embeddings):
|
||||
pass
|
||||
|
|
|
|||
Loading…
Reference in New Issue