diff --git a/apps/common/management/commands/gunicorn.py b/apps/common/management/commands/gunicorn.py index 0d4f86438..436a604b4 100644 --- a/apps/common/management/commands/gunicorn.py +++ b/apps/common/management/commands/gunicorn.py @@ -30,9 +30,6 @@ class Command(BaseCommand): def handle(self, *args, **options): log_format = '%(h)s %(t)s %(L)ss "%(r)s" %(s)s %(b)s ' - print(options.get('worker_connections')) - print(options.get('threads')) - print(options) cmd = [ 'gunicorn', 'smartdoc.wsgi:application', '-b', options.get('b') if options.get('b') is not None else '0.0.0.0:8080', diff --git a/apps/common/util/split_model.py b/apps/common/util/split_model.py index ce8a6946e..c747cb1fc 100644 --- a/apps/common/util/split_model.py +++ b/apps/common/util/split_model.py @@ -280,11 +280,11 @@ def filter_special_char(content: str): class SplitModel: - def __init__(self, content_level_pattern, with_filter=True, limit=4096): + def __init__(self, content_level_pattern, with_filter=True, limit=100000): self.content_level_pattern = content_level_pattern self.with_filter = with_filter - if limit is None or limit > 4096: - limit = 4096 + if limit is None or limit > 100000: + limit = 100000 if limit < 50: limit = 50 self.limit = limit @@ -375,7 +375,7 @@ default_split_pattern = { } -def get_split_model(filename: str, with_filter: bool = False, limit: int = 4096): +def get_split_model(filename: str, with_filter: bool = False, limit: int = 100000): """ 根据文件名称获取分段模型 :param limit: 每段大小 diff --git a/apps/dataset/serializers/document_serializers.py b/apps/dataset/serializers/document_serializers.py index f89c12dc0..0977db7e1 100644 --- a/apps/dataset/serializers/document_serializers.py +++ b/apps/dataset/serializers/document_serializers.py @@ -788,7 +788,7 @@ class DocumentSerializers(ApiMixin, serializers.Serializer): file_list = self.data.get("file") return list( map(lambda f: file_to_paragraph(f, self.data.get("patterns", None), self.data.get("with_filter", None), - self.data.get("limit", None)), file_list)) + self.data.get("limit", 4096)), file_list)) class SplitPattern(ApiMixin, serializers.Serializer): @staticmethod