From b85ba190db3c636029f67ff0c269cd2eb5d891a1 Mon Sep 17 00:00:00 2001 From: CaptainB Date: Wed, 16 Jul 2025 20:45:27 +0800 Subject: [PATCH] feat: add compression for file storage and retrieval in knowledge.py --- apps/knowledge/models/knowledge.py | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/apps/knowledge/models/knowledge.py b/apps/knowledge/models/knowledge.py index d8b030290..072068071 100644 --- a/apps/knowledge/models/knowledge.py +++ b/apps/knowledge/models/knowledge.py @@ -1,3 +1,5 @@ +import io +import zipfile from enum import Enum import uuid_utils.compat as uuid @@ -276,19 +278,38 @@ class File(AppModelMixin): def save(self, bytea=None, force_insert=False, force_update=False, using=None, update_fields=None): sha256_hash = get_sha256_hash(bytea) + # 创建压缩文件 + zip_buffer = io.BytesIO() + with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file: + # 设置压缩级别为最高(9) + zipinfo = zipfile.ZipInfo(self.file_name) + zipinfo.compress_type = zipfile.ZIP_DEFLATED + zip_file.writestr(zipinfo, bytea, compresslevel=9) + # 获取压缩后的数据 + compressed_data = zip_buffer.getvalue() f = QuerySet(File).filter(sha256_hash=sha256_hash).first() if f is not None: self.loid = f.loid else: result = select_one("SELECT lo_from_bytea(%s, %s::bytea) as loid", [0, bytea]) self.loid = result['loid'] - self.file_size = len(bytea) + self.file_size = len(compressed_data) self.sha256_hash = sha256_hash + # 可以在元数据中记录原始大小 + if 'original_size' not in self.meta: + self.meta['original_size'] = len(bytea) super().save() def get_bytes(self): result = select_one(f'SELECT lo_get({self.loid}) as "data"', []) - return result['data'] + compressed_data = result['data'] + try: + # 解压数据 + with zipfile.ZipFile(io.BytesIO(compressed_data)) as zip_file: + return zip_file.read(self.file_name) + except Exception as e: + # 如果数据不是zip格式,直接返回原始数据 + return compressed_data @receiver(pre_delete, sender=File)