mirror of
https://github.com/1Panel-dev/MaxKB.git
synced 2025-12-26 01:33:05 +00:00
57 lines
2.2 KiB
Python
57 lines
2.2 KiB
Python
# Generated by Django 4.1.13 on 2024-04-16 11:43
|
|
|
|
import django.contrib.postgres.search
|
|
from django.db import migrations
|
|
|
|
from common.util.common import sub_array
|
|
from common.util.ts_vecto_util import to_ts_vector
|
|
from dataset.models import Status
|
|
from embedding.models import Embedding
|
|
|
|
|
|
def update_embedding_search_vector(embedding, paragraph_list):
|
|
paragraphs = [paragraph for paragraph in paragraph_list if paragraph.id == embedding.get('paragraph')]
|
|
if len(paragraphs) > 0:
|
|
content = paragraphs[0].title + paragraphs[0].content
|
|
return Embedding(id=embedding.get('id'), search_vector=to_ts_vector(content))
|
|
return Embedding(id=embedding.get('id'), search_vector="")
|
|
|
|
|
|
def save_keywords(apps, schema_editor):
|
|
document = apps.get_model("dataset", "Document")
|
|
embedding = apps.get_model("embedding", "Embedding")
|
|
paragraph = apps.get_model('dataset', 'Paragraph')
|
|
db_alias = schema_editor.connection.alias
|
|
document_list = document.objects.using(db_alias).all()
|
|
for document in document_list:
|
|
document.status = Status.embedding
|
|
document.save()
|
|
paragraph_list = paragraph.objects.using(db_alias).filter(document=document).all()
|
|
embedding_list = embedding.objects.using(db_alias).filter(document=document).values('id', 'search_vector',
|
|
'paragraph')
|
|
embedding_update_list = [update_embedding_search_vector(embedding, paragraph_list) for embedding
|
|
in embedding_list]
|
|
child_array = sub_array(embedding_update_list, 50)
|
|
for c in child_array:
|
|
try:
|
|
embedding.objects.using(db_alias).bulk_update(c, ['search_vector'])
|
|
except Exception as e:
|
|
print(e)
|
|
document.status = Status.success
|
|
document.save()
|
|
|
|
|
|
class Migration(migrations.Migration):
|
|
dependencies = [
|
|
('embedding', '0001_initial'),
|
|
]
|
|
|
|
operations = [
|
|
migrations.AddField(
|
|
model_name='embedding',
|
|
name='search_vector',
|
|
field=django.contrib.postgres.search.SearchVectorField(default='', verbose_name='分词'),
|
|
),
|
|
migrations.RunPython(save_keywords)
|
|
]
|