From 3b5a02307c5a4cabb637b4f8782e14a07caf6940 Mon Sep 17 00:00:00 2001 From: CaptainB Date: Tue, 29 Apr 2025 11:47:50 +0800 Subject: [PATCH] refactor: celery config --- ...nt_paragraph_embedding_problem_and_more.py | 104 ++++++++++++++ apps/maxkb/settings/__init__.py | 2 + apps/maxkb/settings/base.py | 1 + apps/maxkb/settings/lib.py | 48 +++++++ apps/maxkb/settings/logging.py | 128 ++++++++++++++++++ 5 files changed, 283 insertions(+) create mode 100644 apps/knowledge/migrations/0002_document_paragraph_embedding_problem_and_more.py create mode 100644 apps/maxkb/settings/lib.py create mode 100644 apps/maxkb/settings/logging.py diff --git a/apps/knowledge/migrations/0002_document_paragraph_embedding_problem_and_more.py b/apps/knowledge/migrations/0002_document_paragraph_embedding_problem_and_more.py new file mode 100644 index 000000000..cfe94b5f8 --- /dev/null +++ b/apps/knowledge/migrations/0002_document_paragraph_embedding_problem_and_more.py @@ -0,0 +1,104 @@ +# Generated by Django 5.2 on 2025-04-29 03:28 + +import django.contrib.postgres.search +import django.db.models.deletion +import knowledge.models.knowledge +import uuid_utils.compat +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('knowledge', '0001_initial'), + ] + + operations = [ + migrations.CreateModel( + name='Document', + fields=[ + ('create_time', models.DateTimeField(auto_now_add=True, verbose_name='创建时间')), + ('update_time', models.DateTimeField(auto_now=True, verbose_name='修改时间')), + ('id', models.UUIDField(default=uuid_utils.compat.uuid7, editable=False, primary_key=True, serialize=False, verbose_name='主键id')), + ('name', models.CharField(max_length=150, verbose_name='文档名称')), + ('char_length', models.IntegerField(verbose_name='文档字符数 冗余字段')), + ('status', models.CharField(default=knowledge.models.knowledge.Status.__str__, max_length=20, verbose_name='状态')), + ('status_meta', models.JSONField(default=knowledge.models.knowledge.default_status_meta, verbose_name='状态统计数据')), + ('is_active', models.BooleanField(default=True)), + ('type', models.IntegerField(choices=[(0, '通用类型'), (1, 'web站点类型'), (2, '飞书类型'), (3, '语雀类型')], default=0, verbose_name='类型')), + ('hit_handling_method', models.CharField(choices=[('optimization', '模型优化'), ('directly_return', '直接返回')], default='optimization', max_length=20, verbose_name='命中处理方式')), + ('directly_return_similarity', models.FloatField(default=0.9, verbose_name='直接回答相似度')), + ('meta', models.JSONField(default=dict, verbose_name='元数据')), + ('knowledge', models.ForeignKey(on_delete=django.db.models.deletion.DO_NOTHING, to='knowledge.knowledge', verbose_name='知识库id')), + ], + options={ + 'db_table': 'document', + }, + ), + migrations.CreateModel( + name='Paragraph', + fields=[ + ('create_time', models.DateTimeField(auto_now_add=True, verbose_name='创建时间')), + ('update_time', models.DateTimeField(auto_now=True, verbose_name='修改时间')), + ('id', models.UUIDField(default=uuid_utils.compat.uuid7, editable=False, primary_key=True, serialize=False, verbose_name='主键id')), + ('content', models.CharField(max_length=102400, verbose_name='段落内容')), + ('title', models.CharField(default='', max_length=256, verbose_name='标题')), + ('status', models.CharField(default=knowledge.models.knowledge.Status.__str__, max_length=20, verbose_name='状态')), + ('status_meta', models.JSONField(default=knowledge.models.knowledge.default_status_meta, verbose_name='状态数据')), + ('hit_num', models.IntegerField(default=0, verbose_name='命中次数')), + ('is_active', models.BooleanField(default=True)), + ('document', models.ForeignKey(db_constraint=False, on_delete=django.db.models.deletion.DO_NOTHING, to='knowledge.document')), + ('knowledge', models.ForeignKey(on_delete=django.db.models.deletion.DO_NOTHING, to='knowledge.knowledge')), + ], + options={ + 'db_table': 'paragraph', + }, + ), + migrations.CreateModel( + name='Embedding', + fields=[ + ('id', models.CharField(max_length=128, primary_key=True, serialize=False, verbose_name='主键id')), + ('source_id', models.CharField(max_length=128, verbose_name='资源id')), + ('source_type', models.CharField(choices=[(0, '问题'), (1, '段落'), (2, '标题')], default=0, max_length=5, verbose_name='资源类型')), + ('is_active', models.BooleanField(default=True, max_length=1, verbose_name='是否可用')), + ('embedding', knowledge.models.knowledge.VectorField(verbose_name='向量')), + ('search_vector', django.contrib.postgres.search.SearchVectorField(default='', verbose_name='分词')), + ('meta', models.JSONField(default=dict, verbose_name='元数据')), + ('document', models.ForeignKey(db_constraint=False, on_delete=django.db.models.deletion.DO_NOTHING, to='knowledge.document', verbose_name='文档关联')), + ('knowledge', models.ForeignKey(db_constraint=False, on_delete=django.db.models.deletion.DO_NOTHING, to='knowledge.knowledge', verbose_name='文档关联')), + ('paragraph', models.ForeignKey(db_constraint=False, on_delete=django.db.models.deletion.DO_NOTHING, to='knowledge.paragraph', verbose_name='段落关联')), + ], + options={ + 'db_table': 'embedding', + }, + ), + migrations.CreateModel( + name='Problem', + fields=[ + ('create_time', models.DateTimeField(auto_now_add=True, verbose_name='创建时间')), + ('update_time', models.DateTimeField(auto_now=True, verbose_name='修改时间')), + ('id', models.UUIDField(default=uuid_utils.compat.uuid7, editable=False, primary_key=True, serialize=False, verbose_name='主键id')), + ('content', models.CharField(max_length=256, verbose_name='问题内容')), + ('hit_num', models.IntegerField(default=0, verbose_name='命中次数')), + ('knowledge', models.ForeignKey(db_constraint=False, on_delete=django.db.models.deletion.DO_NOTHING, to='knowledge.knowledge')), + ], + options={ + 'db_table': 'problem', + }, + ), + migrations.CreateModel( + name='ProblemParagraphMapping', + fields=[ + ('create_time', models.DateTimeField(auto_now_add=True, verbose_name='创建时间')), + ('update_time', models.DateTimeField(auto_now=True, verbose_name='修改时间')), + ('id', models.UUIDField(default=uuid_utils.compat.uuid7, editable=False, primary_key=True, serialize=False, verbose_name='主键id')), + ('document', models.ForeignKey(on_delete=django.db.models.deletion.DO_NOTHING, to='knowledge.document')), + ('knowledge', models.ForeignKey(db_constraint=False, on_delete=django.db.models.deletion.DO_NOTHING, to='knowledge.knowledge')), + ('paragraph', models.ForeignKey(db_constraint=False, on_delete=django.db.models.deletion.DO_NOTHING, to='knowledge.paragraph')), + ('problem', models.ForeignKey(db_constraint=False, on_delete=django.db.models.deletion.DO_NOTHING, to='knowledge.problem')), + ], + options={ + 'db_table': 'problem_paragraph_mapping', + }, + ), + ] diff --git a/apps/maxkb/settings/__init__.py b/apps/maxkb/settings/__init__.py index 43753b734..8333fa1bd 100644 --- a/apps/maxkb/settings/__init__.py +++ b/apps/maxkb/settings/__init__.py @@ -7,4 +7,6 @@ @desc: """ from .base import * +from .logging import * from .auth import * +from .lib import * \ No newline at end of file diff --git a/apps/maxkb/settings/base.py b/apps/maxkb/settings/base.py index c6a75353c..a87635514 100644 --- a/apps/maxkb/settings/base.py +++ b/apps/maxkb/settings/base.py @@ -44,6 +44,7 @@ INSTALLED_APPS = [ 'common', 'system_manage', 'models_provider', + 'django_celery_beat' ] MIDDLEWARE = [ diff --git a/apps/maxkb/settings/lib.py b/apps/maxkb/settings/lib.py new file mode 100644 index 000000000..ef74174ad --- /dev/null +++ b/apps/maxkb/settings/lib.py @@ -0,0 +1,48 @@ +# coding=utf-8 +""" + @project: MaxKB + @Author:虎 + @file: lib.py + @date:2024/8/16 17:12 + @desc: +""" +import os +import shutil + +from maxkb.const import CONFIG, PROJECT_DIR + +# celery相关配置 +celery_data_dir = os.path.join(PROJECT_DIR, 'data', 'celery_task') +if not os.path.exists(celery_data_dir) or not os.path.isdir(celery_data_dir): + os.makedirs(celery_data_dir) +broker_path = os.path.join(celery_data_dir, "celery_db.sqlite3") +backend_path = os.path.join(celery_data_dir, "celery_results.sqlite3") +# 使用sql_lite 当做broker 和 响应接收 +CELERY_BROKER_URL = f'sqla+sqlite:///{broker_path}' +CELERY_result_backend = f'db+sqlite:///{backend_path}' +CELERY_timezone = CONFIG.TIME_ZONE +CELERY_ENABLE_UTC = False +CELERY_task_serializer = 'pickle' +CELERY_result_serializer = 'pickle' +CELERY_accept_content = ['json', 'pickle'] +CELERY_RESULT_EXPIRES = 600 +CELERY_WORKER_TASK_LOG_FORMAT = '%(asctime).19s %(message)s' +CELERY_WORKER_LOG_FORMAT = '%(asctime).19s %(message)s' +CELERY_TASK_EAGER_PROPAGATES = True +CELERY_WORKER_REDIRECT_STDOUTS = True +CELERY_WORKER_REDIRECT_STDOUTS_LEVEL = "INFO" +CELERY_TASK_SOFT_TIME_LIMIT = 3600 +CELERY_WORKER_CANCEL_LONG_RUNNING_TASKS_ON_CONNECTION_LOSS = True +CELERY_ACKS_LATE = True +celery_once_path = os.path.join(celery_data_dir, "celery_once") +try: + if os.path.exists(celery_once_path) and os.path.isdir(celery_once_path): + shutil.rmtree(celery_once_path) +except Exception as e: + pass +CELERY_ONCE = { + 'backend': 'celery_once.backends.File', + 'settings': {'location': celery_once_path} +} +CELERY_BROKER_CONNECTION_RETRY_ON_STARTUP = True +CELERY_LOG_DIR = os.path.join(PROJECT_DIR, 'logs', 'celery') diff --git a/apps/maxkb/settings/logging.py b/apps/maxkb/settings/logging.py new file mode 100644 index 000000000..9c3df8c15 --- /dev/null +++ b/apps/maxkb/settings/logging.py @@ -0,0 +1,128 @@ +# -*- coding: utf-8 -*- +# +import os + +from ..const import PROJECT_DIR, CONFIG + +LOG_DIR = os.path.join(PROJECT_DIR, 'data', 'logs') +MAX_KB_LOG_FILE = os.path.join(LOG_DIR, 'max_kb.log') +DRF_EXCEPTION_LOG_FILE = os.path.join(LOG_DIR, 'drf_exception.log') +UNEXPECTED_EXCEPTION_LOG_FILE = os.path.join(LOG_DIR, 'unexpected_exception.log') +LOG_LEVEL = "DEBUG" + +LOGGING = { + 'version': 1, + 'disable_existing_loggers': False, + 'formatters': { + 'verbose': { + 'format': '%(levelname)s %(asctime)s %(module)s %(process)d %(thread)d %(message)s' + }, + 'main': { + 'datefmt': '%Y-%m-%d %H:%M:%S', + 'format': '%(asctime)s [%(module)s %(levelname)s] %(message)s', + }, + 'exception': { + 'datefmt': '%Y-%m-%d %H:%M:%S', + 'format': '\n%(asctime)s [%(levelname)s] %(message)s', + }, + 'simple': { + 'format': '%(levelname)s %(message)s' + }, + 'syslog': { + 'format': 'jumpserver: %(message)s' + }, + 'msg': { + 'format': '%(message)s' + } + }, + 'handlers': { + 'null': { + 'level': 'DEBUG', + 'class': 'logging.NullHandler', + }, + 'console': { + 'level': 'DEBUG', + 'class': 'logging.StreamHandler', + 'formatter': 'main' + }, + 'file': { + 'encoding': 'utf8', + 'level': 'DEBUG', + 'class': 'logging.handlers.RotatingFileHandler', + 'maxBytes': 1024 * 1024 * 100, + 'backupCount': 7, + 'formatter': 'main', + 'filename': MAX_KB_LOG_FILE, + }, + 'drf_exception': { + 'encoding': 'utf8', + 'level': 'DEBUG', + 'class': 'logging.handlers.RotatingFileHandler', + 'formatter': 'exception', + 'maxBytes': 1024 * 1024 * 100, + 'backupCount': 7, + 'filename': DRF_EXCEPTION_LOG_FILE, + }, + 'unexpected_exception': { + 'encoding': 'utf8', + 'level': 'DEBUG', + 'class': 'logging.handlers.RotatingFileHandler', + 'formatter': 'exception', + 'maxBytes': 1024 * 1024 * 100, + 'backupCount': 7, + 'filename': UNEXPECTED_EXCEPTION_LOG_FILE, + }, + 'syslog': { + 'level': 'INFO', + 'class': 'logging.NullHandler', + 'formatter': 'syslog' + }, + }, + 'loggers': { + 'django': { + 'handlers': ['null'], + 'propagate': False, + 'level': LOG_LEVEL, + }, + 'django.request': { + 'handlers': ['console', 'file', 'syslog'], + 'level': LOG_LEVEL, + 'propagate': False, + }, + 'sqlalchemy': { + 'handlers': ['console', 'file', 'syslog'], + 'level': "ERROR", + 'propagate': False, + }, + 'django.db.backends': { + 'handlers': ['console', 'file', 'syslog'], + 'propagate': False, + 'level': LOG_LEVEL, + }, + 'django.server': { + 'handlers': ['console', 'file', 'syslog'], + 'level': LOG_LEVEL, + 'propagate': False, + }, + 'max_kb_error': { + 'handlers': ['console', 'unexpected_exception'], + 'level': LOG_LEVEL, + 'propagate': False, + }, + 'max_kb': { + 'handlers': ['console', 'file'], + 'level': LOG_LEVEL, + 'propagate': False, + }, + 'common.event': { + 'handlers': ['console', 'file'], + 'level': "DEBUG", + 'propagate': False, + }, + } +} + +SYSLOG_ENABLE = CONFIG.SYSLOG_ENABLE + +if not os.path.isdir(LOG_DIR): + os.makedirs(LOG_DIR, mode=0o755)