diff --git a/apps/common/event/__init__.py b/apps/common/event/__init__.py index cb278a862..e00026645 100644 --- a/apps/common/event/__init__.py +++ b/apps/common/event/__init__.py @@ -13,3 +13,4 @@ from .listener_chat_message import * def run(): listener_manage.ListenerManagement().run() listener_chat_message.ListenerChatMessage().run() + QuerySet(Document).filter(status=Status.embedding).update(**{'status': Status.error}) diff --git a/apps/common/event/listener_manage.py b/apps/common/event/listener_manage.py index a73a53ef4..28143cab7 100644 --- a/apps/common/event/listener_manage.py +++ b/apps/common/event/listener_manage.py @@ -6,7 +6,9 @@ @date:2023/10/20 14:01 @desc: """ +import logging import os +import traceback import django.db.models from blinker import signal @@ -20,6 +22,9 @@ from dataset.models import Paragraph, Status, Document from embedding.models import SourceType from smartdoc.conf import PROJECT_DIR +max_kb_error = logging.getLogger("max_kb_error") +max_kb = logging.getLogger("max_kb") + class ListenerManagement: embedding_by_problem_signal = signal("embedding_by_problem") @@ -46,6 +51,7 @@ class ListenerManagement: :param paragraph_id: 段落id :return: None """ + max_kb.info(f"开始--->向量化段落:{paragraph_id}") status = Status.success try: data_list = native_search( @@ -59,8 +65,11 @@ class ListenerManagement: # 批量向量化 VectorStore.get_embedding_vector().batch_save(data_list) except Exception as e: + max_kb_error.error(f'向量化段落:{paragraph_id}出现错误{str(e)}{traceback.format_exc()}') status = Status.error - QuerySet(Paragraph).filter(id=paragraph_id).update(**{'status': status}) + finally: + QuerySet(Paragraph).filter(id=paragraph_id).update(**{'status': status}) + max_kb.info(f'结束--->向量化段落:{paragraph_id}') @staticmethod @poxy @@ -70,6 +79,7 @@ class ListenerManagement: :param document_id: 文档id :return: None """ + max_kb.info(f"开始--->向量化文档:{document_id}") status = Status.success try: data_list = native_search( @@ -83,10 +93,13 @@ class ListenerManagement: # 批量向量化 VectorStore.get_embedding_vector().batch_save(data_list) except Exception as e: + max_kb_error.error(f'向量化文档:{document_id}出现错误{str(e)}{traceback.format_exc()}') status = Status.error - # 修改状态 - QuerySet(Document).filter(id=document_id).update(**{'status': status}) - QuerySet(Paragraph).filter(document_id=document_id).update(**{'status': status}) + finally: + # 修改状态 + QuerySet(Document).filter(id=document_id).update(**{'status': status}) + QuerySet(Paragraph).filter(document_id=document_id).update(**{'status': status}) + max_kb.info(f"结束--->向量化文档:{document_id}") @staticmethod @poxy @@ -96,9 +109,15 @@ class ListenerManagement: :param dataset_id: 知识库id :return: None """ - document_list = QuerySet(Document).filter(dataset_id=dataset_id) - for document in document_list: - ListenerManagement.embedding_by_document(document.id) + max_kb.info(f"向量化数据集{dataset_id}") + try: + document_list = QuerySet(Document).filter(dataset_id=dataset_id) + for document in document_list: + ListenerManagement.embedding_by_document(document.id) + except Exception as e: + max_kb_error.error(f'向量化数据集:{dataset_id}出现错误{str(e)}{traceback.format_exc()}') + finally: + max_kb.info(f"结束--->向量化数据集:{dataset_id}") @staticmethod def delete_embedding_by_document(document_id): diff --git a/apps/common/handle/handle_exception.py b/apps/common/handle/handle_exception.py index e12befb6e..32784861b 100644 --- a/apps/common/handle/handle_exception.py +++ b/apps/common/handle/handle_exception.py @@ -6,15 +6,16 @@ @date:2023/9/5 19:29 @desc: """ -import django.core.exceptions -from psycopg2 import IntegrityError +import logging +import traceback + from rest_framework.exceptions import ValidationError, ErrorDetail, APIException from rest_framework.views import exception_handler from common.exception.app_exception import AppApiException from common.response import result -import traceback + def to_result(key, args, parent_key=None): """ 将校验异常 args转换为统一数据 @@ -59,7 +60,6 @@ def handle_exception(exc, context): exception_class = exc.__class__ # 先调用REST framework默认的异常处理方法获得标准错误响应对象 response = exception_handler(exc, context) - traceback.print_exc() # 在此处补充自定义的异常处理 if issubclass(exception_class, ValidationError): return validation_error_to_result(exc) @@ -68,5 +68,6 @@ def handle_exception(exc, context): if issubclass(exception_class, APIException): return result.error(exc.detail) if response is None: + logging.getLogger("max_kb_error").error(f'{str(exc)}:{traceback.format_exc()}') return result.error(str(exc)) return response diff --git a/apps/common/util/common.py b/apps/common/util/common.py index e162859bf..d9aff2571 100644 --- a/apps/common/util/common.py +++ b/apps/common/util/common.py @@ -11,7 +11,7 @@ from functools import reduce from typing import Dict, List -def sub_array(array: List, item_num=30): +def sub_array(array: List, item_num=10): result = [] temp = [] for item in array: diff --git a/apps/embedding/vector/base_vector.py b/apps/embedding/vector/base_vector.py index c63efb019..9113bb52d 100644 --- a/apps/embedding/vector/base_vector.py +++ b/apps/embedding/vector/base_vector.py @@ -6,6 +6,7 @@ @date:2023/10/18 19:16 @desc: """ +import threading from abc import ABC, abstractmethod from typing import List, Dict @@ -15,6 +16,8 @@ from common.config.embedding_config import EmbeddingModel from common.util.common import sub_array from embedding.models import SourceType +lock = threading.Lock() + class BaseVectorStore(ABC): vector_exists = False @@ -65,25 +68,37 @@ class BaseVectorStore(ABC): :param trample_num 点踩数量 :return: bool """ - if embedding is None: - embedding = EmbeddingModel.get_embedding_model() - self.save_pre_handler() - self._save(text, source_type, dataset_id, document_id, paragraph_id, source_id, is_active, star_num, - trample_num, embedding) + # 获取锁 + lock.acquire() + try: + if embedding is None: + embedding = EmbeddingModel.get_embedding_model() + self.save_pre_handler() + self._save(text, source_type, dataset_id, document_id, paragraph_id, source_id, is_active, star_num, + trample_num, embedding) + finally: + # 释放锁 + lock.release() def batch_save(self, data_list: List[Dict], embedding=None): - """ - 批量插入 - :param data_list: 数据列表 - :param embedding: 向量化处理器 - :return: bool - """ - if embedding is None: - embedding = EmbeddingModel.get_embedding_model() - self.save_pre_handler() - result = sub_array(data_list) - for child_array in result: - self._batch_save(child_array, embedding) + # 获取锁 + lock.acquire() + try: + """ + 批量插入 + :param data_list: 数据列表 + :param embedding: 向量化处理器 + :return: bool + """ + if embedding is None: + embedding = EmbeddingModel.get_embedding_model() + self.save_pre_handler() + result = sub_array(data_list) + for child_array in result: + self._batch_save(child_array, embedding) + finally: + # 释放锁 + lock.release() return True @abstractmethod diff --git a/apps/smartdoc/settings/logging.py b/apps/smartdoc/settings/logging.py index 76c7c85df..2627f1201 100644 --- a/apps/smartdoc/settings/logging.py +++ b/apps/smartdoc/settings/logging.py @@ -5,11 +5,9 @@ import os from ..const import PROJECT_DIR, CONFIG LOG_DIR = os.path.join(PROJECT_DIR, 'data', 'logs') -QA_BOT_LOG_FILE = os.path.join(LOG_DIR, 'smart_doc.log') +MAX_KB_LOG_FILE = os.path.join(LOG_DIR, 'max_kb.log') DRF_EXCEPTION_LOG_FILE = os.path.join(LOG_DIR, 'drf_exception.log') UNEXPECTED_EXCEPTION_LOG_FILE = os.path.join(LOG_DIR, 'unexpected_exception.log') -ANSIBLE_LOG_FILE = os.path.join(LOG_DIR, 'ansible.log') -GUNICORN_LOG_FILE = os.path.join(LOG_DIR, 'gunicorn.log') LOG_LEVEL = "DEBUG" LOGGING = { @@ -54,16 +52,7 @@ LOGGING = { 'maxBytes': 1024 * 1024 * 100, 'backupCount': 7, 'formatter': 'main', - 'filename': QA_BOT_LOG_FILE, - }, - 'ansible_logs': { - 'encoding': 'utf8', - 'level': 'DEBUG', - 'class': 'logging.handlers.RotatingFileHandler', - 'formatter': 'main', - 'maxBytes': 1024 * 1024 * 100, - 'backupCount': 7, - 'filename': ANSIBLE_LOG_FILE, + 'filename': MAX_KB_LOG_FILE, }, 'drf_exception': { 'encoding': 'utf8', @@ -115,9 +104,15 @@ LOGGING = { 'level': LOG_LEVEL, 'propagate': False, }, - 'smartdoc': { + 'max_kb_error': { + 'handlers': ['console', 'unexpected_exception'], + 'level': LOG_LEVEL, + 'propagate': False, + }, + 'max_kb': { 'handlers': ['console', 'file'], 'level': LOG_LEVEL, + 'propagate': False, }, } } diff --git a/ui/public/embeb.js b/ui/public/embeb.js index b4f79f010..d80a03090 100644 --- a/ui/public/embeb.js +++ b/ui/public/embeb.js @@ -86,4 +86,4 @@ document.body.append(chat_button); } else console.error('invalid parameter') } - document.body.onload = embedChatbot +window.onload = embedChatbot