mirror of
https://github.com/1Panel-dev/MaxKB.git
synced 2025-12-26 18:32:48 +00:00
168 lines
6.1 KiB
Python
168 lines
6.1 KiB
Python
# coding=utf-8
|
||
"""
|
||
@project: maxkb
|
||
@Author:虎
|
||
@file: data_set.py
|
||
@date:2023/9/21 9:35
|
||
@desc: 数据集
|
||
"""
|
||
import uuid
|
||
|
||
from django.db import models
|
||
from django.db.models.signals import pre_delete
|
||
from django.dispatch import receiver
|
||
|
||
from common.db.sql_execute import select_one
|
||
from common.mixins.app_model_mixin import AppModelMixin
|
||
from setting.models import Model
|
||
from users.models import User
|
||
|
||
|
||
class Status(models.TextChoices):
|
||
"""订单类型"""
|
||
embedding = 0, '导入中'
|
||
success = 1, '已完成'
|
||
error = 2, '导入失败'
|
||
queue_up = 3, '排队中'
|
||
generating = 4, '生成问题中'
|
||
|
||
|
||
class Type(models.TextChoices):
|
||
base = 0, '通用类型'
|
||
|
||
web = 1, 'web站点类型'
|
||
|
||
|
||
class HitHandlingMethod(models.TextChoices):
|
||
optimization = 'optimization', '模型优化'
|
||
directly_return = 'directly_return', '直接返回'
|
||
|
||
|
||
def default_model():
|
||
return uuid.UUID('42f63a3d-427e-11ef-b3ec-a8a1595801ab')
|
||
|
||
|
||
class DataSet(AppModelMixin):
|
||
"""
|
||
数据集表
|
||
"""
|
||
id = models.UUIDField(primary_key=True, max_length=128, default=uuid.uuid1, editable=False, verbose_name="主键id")
|
||
name = models.CharField(max_length=150, verbose_name="数据集名称")
|
||
desc = models.CharField(max_length=256, verbose_name="数据库描述")
|
||
user = models.ForeignKey(User, on_delete=models.DO_NOTHING, verbose_name="所属用户")
|
||
type = models.CharField(verbose_name='类型', max_length=1, choices=Type.choices,
|
||
default=Type.base)
|
||
embedding_mode = models.ForeignKey(Model, on_delete=models.DO_NOTHING, verbose_name="向量模型",
|
||
default=default_model)
|
||
meta = models.JSONField(verbose_name="元数据", default=dict)
|
||
|
||
class Meta:
|
||
db_table = "dataset"
|
||
|
||
|
||
class Document(AppModelMixin):
|
||
"""
|
||
文档表
|
||
"""
|
||
id = models.UUIDField(primary_key=True, max_length=128, default=uuid.uuid1, editable=False, verbose_name="主键id")
|
||
dataset = models.ForeignKey(DataSet, on_delete=models.DO_NOTHING)
|
||
name = models.CharField(max_length=150, verbose_name="文档名称")
|
||
char_length = models.IntegerField(verbose_name="文档字符数 冗余字段")
|
||
status = models.CharField(verbose_name='状态', max_length=1, choices=Status.choices,
|
||
default=Status.queue_up)
|
||
is_active = models.BooleanField(default=True)
|
||
|
||
type = models.CharField(verbose_name='类型', max_length=1, choices=Type.choices,
|
||
default=Type.base)
|
||
hit_handling_method = models.CharField(verbose_name='命中处理方式', max_length=20,
|
||
choices=HitHandlingMethod.choices,
|
||
default=HitHandlingMethod.optimization)
|
||
directly_return_similarity = models.FloatField(verbose_name='直接回答相似度', default=0.9)
|
||
|
||
meta = models.JSONField(verbose_name="元数据", default=dict)
|
||
|
||
class Meta:
|
||
db_table = "document"
|
||
|
||
|
||
class Paragraph(AppModelMixin):
|
||
"""
|
||
段落表
|
||
"""
|
||
id = models.UUIDField(primary_key=True, max_length=128, default=uuid.uuid1, editable=False, verbose_name="主键id")
|
||
document = models.ForeignKey(Document, on_delete=models.DO_NOTHING, db_constraint=False)
|
||
dataset = models.ForeignKey(DataSet, on_delete=models.DO_NOTHING)
|
||
content = models.CharField(max_length=102400, verbose_name="段落内容")
|
||
title = models.CharField(max_length=256, verbose_name="标题", default="")
|
||
status = models.CharField(verbose_name='状态', max_length=1, choices=Status.choices,
|
||
default=Status.embedding)
|
||
hit_num = models.IntegerField(verbose_name="命中次数", default=0)
|
||
is_active = models.BooleanField(default=True)
|
||
|
||
class Meta:
|
||
db_table = "paragraph"
|
||
|
||
|
||
class Problem(AppModelMixin):
|
||
"""
|
||
问题表
|
||
"""
|
||
id = models.UUIDField(primary_key=True, max_length=128, default=uuid.uuid1, editable=False, verbose_name="主键id")
|
||
dataset = models.ForeignKey(DataSet, on_delete=models.DO_NOTHING, db_constraint=False)
|
||
content = models.CharField(max_length=256, verbose_name="问题内容")
|
||
hit_num = models.IntegerField(verbose_name="命中次数", default=0)
|
||
|
||
class Meta:
|
||
db_table = "problem"
|
||
|
||
|
||
class ProblemParagraphMapping(AppModelMixin):
|
||
id = models.UUIDField(primary_key=True, max_length=128, default=uuid.uuid1, editable=False, verbose_name="主键id")
|
||
dataset = models.ForeignKey(DataSet, on_delete=models.DO_NOTHING, db_constraint=False)
|
||
document = models.ForeignKey(Document, on_delete=models.DO_NOTHING)
|
||
problem = models.ForeignKey(Problem, on_delete=models.DO_NOTHING, db_constraint=False)
|
||
paragraph = models.ForeignKey(Paragraph, on_delete=models.DO_NOTHING, db_constraint=False)
|
||
|
||
class Meta:
|
||
db_table = "problem_paragraph_mapping"
|
||
|
||
|
||
class Image(AppModelMixin):
|
||
id = models.UUIDField(primary_key=True, max_length=128, default=uuid.uuid1, editable=False, verbose_name="主键id")
|
||
image = models.BinaryField(verbose_name="图片数据")
|
||
image_name = models.CharField(max_length=256, verbose_name="图片名称", default="")
|
||
|
||
class Meta:
|
||
db_table = "image"
|
||
|
||
|
||
class File(AppModelMixin):
|
||
id = models.UUIDField(primary_key=True, max_length=128, default=uuid.uuid1, editable=False, verbose_name="主键id")
|
||
|
||
file_name = models.CharField(max_length=256, verbose_name="文件名称", default="")
|
||
|
||
loid = models.IntegerField(verbose_name="loid")
|
||
|
||
meta = models.JSONField(verbose_name="文件关联数据", default=dict)
|
||
|
||
|
||
class Meta:
|
||
db_table = "file"
|
||
|
||
def save(
|
||
self, bytea=None, force_insert=False, force_update=False, using=None, update_fields=None
|
||
):
|
||
result = select_one("SELECT lo_from_bytea(%s, %s::bytea) as loid", [0, bytea])
|
||
self.loid = result['loid']
|
||
super().save()
|
||
|
||
def get_byte(self):
|
||
result = select_one(f'SELECT lo_get({self.loid}) as "data"', [])
|
||
return result['data']
|
||
|
||
|
||
|
||
@receiver(pre_delete, sender=File)
|
||
def on_delete_file(sender, instance, **kwargs):
|
||
select_one(f'SELECT lo_unlink({instance.loid})', [])
|