mirror of
https://github.com/1Panel-dev/MaxKB.git
synced 2025-12-26 01:33:05 +00:00
25 lines
620 B
Python
25 lines
620 B
Python
# coding=utf-8
|
||
"""
|
||
@project: MaxKB
|
||
@Author:虎
|
||
@file: mark_chunk_handle.py
|
||
@date:2024/7/23 16:52
|
||
@desc:
|
||
"""
|
||
import re
|
||
from typing import List
|
||
|
||
from common.chunk.i_chunk_handle import IChunkHandle
|
||
|
||
split_chunk_pattern = "!|。|\n|;|;"
|
||
|
||
|
||
class MarkChunkHandle(IChunkHandle):
|
||
def handle(self, chunk_list: List[str]):
|
||
result = []
|
||
for chunk in chunk_list:
|
||
base_chunk = re.split(split_chunk_pattern, chunk)
|
||
base_chunk = [chunk.strip() for chunk in base_chunk if len(chunk.strip()) > 0]
|
||
result = [*result, *base_chunk]
|
||
return result
|