From 62c959f905f576cfef51300f73a8393887f28903 Mon Sep 17 00:00:00 2001 From: shaohuzhang1 <80892890+shaohuzhang1@users.noreply.github.com> Date: Mon, 29 Apr 2024 14:09:58 +0800 Subject: [PATCH] =?UTF-8?q?perf:=20=E4=BC=98=E5=8C=96=E5=8E=86=E5=8F=B2?= =?UTF-8?q?=E6=95=B0=E6=8D=AE=E5=88=86=E8=AF=8D=E5=A4=84=E7=90=86=E5=85=9C?= =?UTF-8?q?=E5=BA=95=E6=93=8D=E4=BD=9C=20(#313)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../0002_embedding_search_vector.py | 45 ++++++++++--------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/apps/embedding/migrations/0002_embedding_search_vector.py b/apps/embedding/migrations/0002_embedding_search_vector.py index 3ed58d582..7d06d6046 100644 --- a/apps/embedding/migrations/0002_embedding_search_vector.py +++ b/apps/embedding/migrations/0002_embedding_search_vector.py @@ -18,27 +18,30 @@ def update_embedding_search_vector(embedding, paragraph_list): def save_keywords(apps, schema_editor): - document = apps.get_model("dataset", "Document") - embedding = apps.get_model("embedding", "Embedding") - paragraph = apps.get_model('dataset', 'Paragraph') - db_alias = schema_editor.connection.alias - document_list = document.objects.using(db_alias).all() - for document in document_list: - document.status = Status.embedding - document.save() - paragraph_list = paragraph.objects.using(db_alias).filter(document=document).all() - embedding_list = embedding.objects.using(db_alias).filter(document=document).values('id', 'search_vector', - 'paragraph') - embedding_update_list = [update_embedding_search_vector(embedding, paragraph_list) for embedding - in embedding_list] - child_array = sub_array(embedding_update_list, 50) - for c in child_array: - try: - embedding.objects.using(db_alias).bulk_update(c, ['search_vector']) - except Exception as e: - print(e) - document.status = Status.success - document.save() + try: + document = apps.get_model("dataset", "Document") + embedding = apps.get_model("embedding", "Embedding") + paragraph = apps.get_model('dataset', 'Paragraph') + db_alias = schema_editor.connection.alias + document_list = document.objects.using(db_alias).all() + for document in document_list: + document.status = Status.embedding + document.save() + paragraph_list = paragraph.objects.using(db_alias).filter(document=document).all() + embedding_list = embedding.objects.using(db_alias).filter(document=document).values('id', 'search_vector', + 'paragraph') + embedding_update_list = [update_embedding_search_vector(embedding, paragraph_list) for embedding + in embedding_list] + child_array = sub_array(embedding_update_list, 50) + for c in child_array: + try: + embedding.objects.using(db_alias).bulk_update(c, ['search_vector']) + except Exception as e: + print(e) + document.status = Status.success + document.save() + except Exception as e: + print(e) class Migration(migrations.Migration):