feat: add compression for file storage and retrieval in knowledge.py

This commit is contained in:
CaptainB 2025-07-16 20:45:27 +08:00
parent 8a9e0abe3b
commit b85ba190db

View File

@ -1,3 +1,5 @@
import io
import zipfile
from enum import Enum
import uuid_utils.compat as uuid
@ -276,19 +278,38 @@ class File(AppModelMixin):
def save(self, bytea=None, force_insert=False, force_update=False, using=None, update_fields=None):
sha256_hash = get_sha256_hash(bytea)
# 创建压缩文件
zip_buffer = io.BytesIO()
with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
# 设置压缩级别为最高(9)
zipinfo = zipfile.ZipInfo(self.file_name)
zipinfo.compress_type = zipfile.ZIP_DEFLATED
zip_file.writestr(zipinfo, bytea, compresslevel=9)
# 获取压缩后的数据
compressed_data = zip_buffer.getvalue()
f = QuerySet(File).filter(sha256_hash=sha256_hash).first()
if f is not None:
self.loid = f.loid
else:
result = select_one("SELECT lo_from_bytea(%s, %s::bytea) as loid", [0, bytea])
self.loid = result['loid']
self.file_size = len(bytea)
self.file_size = len(compressed_data)
self.sha256_hash = sha256_hash
# 可以在元数据中记录原始大小
if 'original_size' not in self.meta:
self.meta['original_size'] = len(bytea)
super().save()
def get_bytes(self):
result = select_one(f'SELECT lo_get({self.loid}) as "data"', [])
return result['data']
compressed_data = result['data']
try:
# 解压数据
with zipfile.ZipFile(io.BytesIO(compressed_data)) as zip_file:
return zip_file.read(self.file_name)
except Exception as e:
# 如果数据不是zip格式直接返回原始数据
return compressed_data
@receiver(pre_delete, sender=File)