From 9c8d7fc269258fb01d5bb830ebf70debba375c31 Mon Sep 17 00:00:00 2001 From: wxg0103 <727495428@qq.com> Date: Fri, 10 Jan 2025 16:08:45 +0800 Subject: [PATCH] fix: When deleting conversation logs, they should be deleted by conversation record, not by session. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --bug=1051378 --user=王孝刚 【应用】对话日志删除时应该按对话记录删除,不能按会话删除 https://www.tapd.cn/57709429/s/1645443 --- apps/common/job/clean_chat_job.py | 45 +++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/apps/common/job/clean_chat_job.py b/apps/common/job/clean_chat_job.py index 347f1a8a8..f2e5a08ff 100644 --- a/apps/common/job/clean_chat_job.py +++ b/apps/common/job/clean_chat_job.py @@ -4,16 +4,14 @@ import logging import datetime from django.db import transaction -from django.db.models.fields.json import KeyTextTransform from django.utils import timezone from apscheduler.schedulers.background import BackgroundScheduler from django_apscheduler.jobstores import DjangoJobStore -from application.models import Application, Chat -from django.db.models import Q +from application.models import Application, Chat, ChatRecord +from django.db.models import Q, Max from common.lock.impl.file_lock import FileLock from dataset.models import File -from django.db.models.functions import Cast -from django.db import models +from django.db import connection scheduler = BackgroundScheduler() scheduler.add_jobstore(DjangoJobStore(), "default") @@ -32,19 +30,38 @@ def clean_chat_log_job(): query_conditions = Q() for app_id, cutoff_date in cutoff_dates.items(): - query_conditions |= Q(application_id=app_id, create_time__lt=cutoff_date) - + query_conditions |= Q(chat__application_id=app_id, create_time__lt=cutoff_date) batch_size = 500 while True: with transaction.atomic(): - logs_to_delete = Chat.objects.filter(query_conditions).values_list('id', flat=True)[:batch_size] - count = logs_to_delete.count() - logs_to_delete_str = [str(uuid) for uuid in logs_to_delete] - if count == 0: + chat_records = ChatRecord.objects.filter(query_conditions).select_related('chat').only('id', 'chat_id', + 'create_time')[ + :batch_size] + if not chat_records: break - deleted_count, _ = Chat.objects.filter(id__in=logs_to_delete).delete() - # 删除对应的文件 - File.objects.filter(meta__chat_id__in=logs_to_delete_str).delete() + chat_record_ids = [record.id for record in chat_records] + chat_ids = {record.chat_id for record in chat_records} + + # 计算每个 chat_id 的最大 create_time + max_create_times = ChatRecord.objects.filter(id__in=chat_record_ids).values('chat_id').annotate( + max_create_time=Max('create_time')) + + # 收集需要删除的文件 + files_to_delete = [] + for record in chat_records: + max_create_time = next( + (item['max_create_time'] for item in max_create_times if item['chat_id'] == record.chat_id), None) + if max_create_time: + files_to_delete.extend( + File.objects.filter(meta__chat_id=str(record.chat_id), create_time__lt=max_create_time) + ) + # 删除 ChatRecord + deleted_count = ChatRecord.objects.filter(id__in=chat_record_ids).delete()[0] + + # 删除没有关联 ChatRecord 的 Chat + Chat.objects.filter(chatrecord__isnull=True, id__in=chat_ids).delete() + File.objects.filter(loid__in=[file.loid for file in files_to_delete]).delete() + if deleted_count < batch_size: break