mirror of
https://github.com/1Panel-dev/MaxKB.git
synced 2025-12-26 01:33:05 +00:00
fix: 修复上传文档,高级分段设置分段长度为10w字符,生成预览还是4096个字符一段 (#884)
This commit is contained in:
parent
485eeb6ac1
commit
d935e9a836
|
|
@ -30,9 +30,6 @@ class Command(BaseCommand):
|
|||
|
||||
def handle(self, *args, **options):
|
||||
log_format = '%(h)s %(t)s %(L)ss "%(r)s" %(s)s %(b)s '
|
||||
print(options.get('worker_connections'))
|
||||
print(options.get('threads'))
|
||||
print(options)
|
||||
cmd = [
|
||||
'gunicorn', 'smartdoc.wsgi:application',
|
||||
'-b', options.get('b') if options.get('b') is not None else '0.0.0.0:8080',
|
||||
|
|
|
|||
|
|
@ -280,11 +280,11 @@ def filter_special_char(content: str):
|
|||
|
||||
class SplitModel:
|
||||
|
||||
def __init__(self, content_level_pattern, with_filter=True, limit=4096):
|
||||
def __init__(self, content_level_pattern, with_filter=True, limit=100000):
|
||||
self.content_level_pattern = content_level_pattern
|
||||
self.with_filter = with_filter
|
||||
if limit is None or limit > 4096:
|
||||
limit = 4096
|
||||
if limit is None or limit > 100000:
|
||||
limit = 100000
|
||||
if limit < 50:
|
||||
limit = 50
|
||||
self.limit = limit
|
||||
|
|
@ -375,7 +375,7 @@ default_split_pattern = {
|
|||
}
|
||||
|
||||
|
||||
def get_split_model(filename: str, with_filter: bool = False, limit: int = 4096):
|
||||
def get_split_model(filename: str, with_filter: bool = False, limit: int = 100000):
|
||||
"""
|
||||
根据文件名称获取分段模型
|
||||
:param limit: 每段大小
|
||||
|
|
|
|||
|
|
@ -788,7 +788,7 @@ class DocumentSerializers(ApiMixin, serializers.Serializer):
|
|||
file_list = self.data.get("file")
|
||||
return list(
|
||||
map(lambda f: file_to_paragraph(f, self.data.get("patterns", None), self.data.get("with_filter", None),
|
||||
self.data.get("limit", None)), file_list))
|
||||
self.data.get("limit", 4096)), file_list))
|
||||
|
||||
class SplitPattern(ApiMixin, serializers.Serializer):
|
||||
@staticmethod
|
||||
|
|
|
|||
Loading…
Reference in New Issue