feat: 日志打印,嵌入脚本

2025-12-26 01:33:05 +00:00 · 2023-12-21 12:16:39 +08:00 · 2023-12-21 12:16:39 +08:00 · b6f7537c2b
parent 303251f6cb
commit b6f7537c2b
7 changed files with 75 additions and 44 deletions
--- a/apps/common/event/init.py
+++ b/apps/common/event/init.py
@ -13,3 +13,4 @@ from .listener_chat_message import *
 def run():
    listener_manage.ListenerManagement().run()
    listener_chat_message.ListenerChatMessage().run()
+    QuerySet(Document).filter(status=Status.embedding).update(**{'status': Status.error})
--- a/apps/common/event/listener_manage.py
+++ b/apps/common/event/listener_manage.py
@ -6,7 +6,9 @@
    @date：2023/10/20 14:01
    @desc:
 """
+import logging
 import os
+import traceback

 import django.db.models
 from blinker import signal
@ -20,6 +22,9 @@ from dataset.models import Paragraph, Status, Document
 from embedding.models import SourceType
 from smartdoc.conf import PROJECT_DIR

+max_kb_error = logging.getLogger("max_kb_error")
+max_kb = logging.getLogger("max_kb")
+

 class ListenerManagement:
    embedding_by_problem_signal = signal("embedding_by_problem")
@ -46,6 +51,7 @@ class ListenerManagement:
        :param paragraph_id: 段落id
        :return: None
        """
+        max_kb.info(f"开始--->向量化段落:{paragraph_id}")
        status = Status.success
        try:
            data_list = native_search(
@ -59,8 +65,11 @@ class ListenerManagement:
            # 批量向量化
            VectorStore.get_embedding_vector().batch_save(data_list)
        except Exception as e:
+            max_kb_error.error(f'向量化段落:{paragraph_id}出现错误{str(e)}{traceback.format_exc()}')
            status = Status.error
-        QuerySet(Paragraph).filter(id=paragraph_id).update(**{'status': status})
+        finally:
+            QuerySet(Paragraph).filter(id=paragraph_id).update(**{'status': status})
+            max_kb.info(f'结束--->向量化段落:{paragraph_id}')

    @staticmethod
    @poxy
@ -70,6 +79,7 @@ class ListenerManagement:
        :param document_id: 文档id
        :return: None
        """
+        max_kb.info(f"开始--->向量化文档:{document_id}")
        status = Status.success
        try:
            data_list = native_search(
@ -83,10 +93,13 @@ class ListenerManagement:
            # 批量向量化
            VectorStore.get_embedding_vector().batch_save(data_list)
        except Exception as e:
+            max_kb_error.error(f'向量化文档:{document_id}出现错误{str(e)}{traceback.format_exc()}')
            status = Status.error
-        # 修改状态
-        QuerySet(Document).filter(id=document_id).update(**{'status': status})
-        QuerySet(Paragraph).filter(document_id=document_id).update(**{'status': status})
+        finally:
+            # 修改状态
+            QuerySet(Document).filter(id=document_id).update(**{'status': status})
+            QuerySet(Paragraph).filter(document_id=document_id).update(**{'status': status})
+            max_kb.info(f"结束--->向量化文档:{document_id}")

    @staticmethod
    @poxy
@ -96,9 +109,15 @@ class ListenerManagement:
        :param dataset_id: 知识库id
        :return: None
        """
-        document_list = QuerySet(Document).filter(dataset_id=dataset_id)
-        for document in document_list:
-            ListenerManagement.embedding_by_document(document.id)
+        max_kb.info(f"向量化数据集{dataset_id}")
+        try:
+            document_list = QuerySet(Document).filter(dataset_id=dataset_id)
+            for document in document_list:
+                ListenerManagement.embedding_by_document(document.id)
+        except Exception as e:
+            max_kb_error.error(f'向量化数据集:{dataset_id}出现错误{str(e)}{traceback.format_exc()}')
+        finally:
+            max_kb.info(f"结束--->向量化数据集:{dataset_id}")

    @staticmethod
    def delete_embedding_by_document(document_id):
--- a/apps/common/handle/handle_exception.py
+++ b/apps/common/handle/handle_exception.py
@ -6,15 +6,16 @@
    @date：2023/9/5 19:29
    @desc:
 """
-import django.core.exceptions
-from psycopg2 import IntegrityError
+import logging
+import traceback
+
 from rest_framework.exceptions import ValidationError, ErrorDetail, APIException
 from rest_framework.views import exception_handler

 from common.exception.app_exception import AppApiException
 from common.response import result

-import traceback
+
 def to_result(key, args, parent_key=None):
    """
    将校验异常 args转换为统一数据
@ -59,7 +60,6 @@ def handle_exception(exc, context):
    exception_class = exc.__class__
    # 先调用REST framework默认的异常处理方法获得标准错误响应对象
    response = exception_handler(exc, context)
-    traceback.print_exc()
    # 在此处补充自定义的异常处理
    if issubclass(exception_class, ValidationError):
        return validation_error_to_result(exc)
@ -68,5 +68,6 @@ def handle_exception(exc, context):
    if issubclass(exception_class, APIException):
        return result.error(exc.detail)
    if response is None:
+        logging.getLogger("max_kb_error").error(f'{str(exc)}:{traceback.format_exc()}')
        return result.error(str(exc))
    return response
--- a/apps/common/util/common.py
+++ b/apps/common/util/common.py
@ -11,7 +11,7 @@ from functools import reduce
 from typing import Dict, List


-def sub_array(array: List, item_num=30):
+def sub_array(array: List, item_num=10):
    result = []
    temp = []
    for item in array:
--- a/apps/embedding/vector/base_vector.py
+++ b/apps/embedding/vector/base_vector.py
@ -6,6 +6,7 @@
    @date：2023/10/18 19:16
    @desc:
 """
+import threading
 from abc import ABC, abstractmethod
 from typing import List, Dict

@ -15,6 +16,8 @@ from common.config.embedding_config import EmbeddingModel
 from common.util.common import sub_array
 from embedding.models import SourceType

+lock = threading.Lock()
+

 class BaseVectorStore(ABC):
    vector_exists = False
@ -65,25 +68,37 @@ class BaseVectorStore(ABC):
        :param trample_num  点踩数量
        :return:  bool
        """
-        if embedding is None:
-            embedding = EmbeddingModel.get_embedding_model()
-        self.save_pre_handler()
-        self._save(text, source_type, dataset_id, document_id, paragraph_id, source_id, is_active, star_num,
-                   trample_num, embedding)
+        # 获取锁
+        lock.acquire()
+        try:
+            if embedding is None:
+                embedding = EmbeddingModel.get_embedding_model()
+            self.save_pre_handler()
+            self._save(text, source_type, dataset_id, document_id, paragraph_id, source_id, is_active, star_num,
+                       trample_num, embedding)
+        finally:
+            # 释放锁
+            lock.release()

    def batch_save(self, data_list: List[Dict], embedding=None):
-        """
-        批量插入
-        :param data_list: 数据列表
-        :param embedding: 向量化处理器
-        :return: bool
-        """
-        if embedding is None:
-            embedding = EmbeddingModel.get_embedding_model()
-        self.save_pre_handler()
-        result = sub_array(data_list)
-        for child_array in result:
-            self._batch_save(child_array, embedding)
+        # 获取锁
+        lock.acquire()
+        try:
+            """
+            批量插入
+            :param data_list: 数据列表
+            :param embedding: 向量化处理器
+            :return: bool
+            """
+            if embedding is None:
+                embedding = EmbeddingModel.get_embedding_model()
+            self.save_pre_handler()
+            result = sub_array(data_list)
+            for child_array in result:
+                self._batch_save(child_array, embedding)
+        finally:
+            # 释放锁
+            lock.release()
        return True

    @abstractmethod
--- a/apps/smartdoc/settings/logging.py
+++ b/apps/smartdoc/settings/logging.py
@ -5,11 +5,9 @@ import os
 from ..const import PROJECT_DIR, CONFIG

 LOG_DIR = os.path.join(PROJECT_DIR, 'data', 'logs')
-QA_BOT_LOG_FILE = os.path.join(LOG_DIR, 'smart_doc.log')
+MAX_KB_LOG_FILE = os.path.join(LOG_DIR, 'max_kb.log')
 DRF_EXCEPTION_LOG_FILE = os.path.join(LOG_DIR, 'drf_exception.log')
 UNEXPECTED_EXCEPTION_LOG_FILE = os.path.join(LOG_DIR, 'unexpected_exception.log')
-ANSIBLE_LOG_FILE = os.path.join(LOG_DIR, 'ansible.log')
-GUNICORN_LOG_FILE = os.path.join(LOG_DIR, 'gunicorn.log')
 LOG_LEVEL = "DEBUG"

 LOGGING = {
@ -54,16 +52,7 @@ LOGGING = {
            'maxBytes': 1024 * 1024 * 100,
            'backupCount': 7,
            'formatter': 'main',
-            'filename': QA_BOT_LOG_FILE,
-        },
-        'ansible_logs': {
-            'encoding': 'utf8',
-            'level': 'DEBUG',
-            'class': 'logging.handlers.RotatingFileHandler',
-            'formatter': 'main',
-            'maxBytes': 1024 * 1024 * 100,
-            'backupCount': 7,
-            'filename': ANSIBLE_LOG_FILE,
+            'filename': MAX_KB_LOG_FILE,
        },
        'drf_exception': {
            'encoding': 'utf8',
@ -115,9 +104,15 @@ LOGGING = {
            'level': LOG_LEVEL,
            'propagate': False,
        },
-        'smartdoc': {
+        'max_kb_error': {
+            'handlers': ['console', 'unexpected_exception'],
+            'level': LOG_LEVEL,
+            'propagate': False,
+        },
+        'max_kb': {
            'handlers': ['console', 'file'],
            'level': LOG_LEVEL,
+            'propagate': False,
        },
    }
 }
--- a/ui/public/embeb.js
+++ b/ui/public/embeb.js
@ -86,4 +86,4 @@
        document.body.append(chat_button);
   } else console.error('invalid parameter')
 }
- document.body.onload = embedChatbot
+window.onload = embedChatbot