This commit is contained in:
liqiang-fit2cloud 2024-11-29 08:46:47 +08:00
commit 8727de92d7
28 changed files with 187 additions and 86 deletions

View File

@ -23,5 +23,5 @@ class IDocumentExtractNode(INode):
self.node_params_serializer.data.get('document_list')[1:])
return self.execute(document=res, **self.flow_params_serializer.data)
def execute(self, document, **kwargs) -> NodeResult:
def execute(self, document, chat_id, **kwargs) -> NodeResult:
pass

View File

@ -1,23 +1,64 @@
# coding=utf-8
import io
import mimetypes
from django.core.files.uploadedfile import InMemoryUploadedFile
from django.db.models import QuerySet
from application.flow.i_step_node import NodeResult
from application.flow.step_node.document_extract_node.i_document_extract_node import IDocumentExtractNode
from dataset.models import File
from dataset.serializers.document_serializers import split_handles, parse_table_handle_list, FileBufferHandle
from dataset.serializers.file_serializers import FileSerializer
def bytes_to_uploaded_file(file_bytes, file_name="file.txt"):
content_type, _ = mimetypes.guess_type(file_name)
if content_type is None:
# 如果未能识别,设置为默认的二进制文件类型
content_type = "application/octet-stream"
# 创建一个内存中的字节流对象
file_stream = io.BytesIO(file_bytes)
# 获取文件大小
file_size = len(file_bytes)
# 创建 InMemoryUploadedFile 对象
uploaded_file = InMemoryUploadedFile(
file=file_stream,
field_name=None,
name=file_name,
content_type=content_type,
size=file_size,
charset=None,
)
return uploaded_file
splitter = '\n`-----------------------------------`\n'
class BaseDocumentExtractNode(IDocumentExtractNode):
def execute(self, document, **kwargs):
def execute(self, document, chat_id, **kwargs):
get_buffer = FileBufferHandle().get_buffer
self.context['document_list'] = document
content = []
splitter = '\n`-----------------------------------`\n'
if document is None or not isinstance(document, list):
return NodeResult({'content': content}, {})
return NodeResult({'content': ''}, {})
application = self.workflow_manage.work_flow_post_handler.chat_info.application
# doc文件中的图片保存
def save_image(image_list):
for image in image_list:
meta = {
'debug': False if application.id else True,
'chat_id': chat_id,
'application_id': str(application.id) if application.id else None,
'file_id': str(image.id)
}
file = bytes_to_uploaded_file(image.image, image.image_name)
FileSerializer(data={'file': file, 'meta': meta}).upload()
for doc in document:
file = QuerySet(File).filter(id=doc['file_id']).first()
@ -28,21 +69,21 @@ class BaseDocumentExtractNode(IDocumentExtractNode):
if split_handle.support(buffer, get_buffer):
# 回到文件头
buffer.seek(0)
file_content = split_handle.get_content(buffer)
file_content = split_handle.get_content(buffer, save_image)
content.append('## ' + doc['name'] + '\n' + file_content)
break
return NodeResult({'content': splitter.join(content)}, {})
def get_details(self, index: int, **kwargs):
content = self.context.get('content', '').split(splitter)
# 不保存content全部内容因为content内容可能会很大
content = (self.context.get('content')[:500] + '...') if len(self.context.get('content')) > 0 else ''
return {
'name': self.node.properties.get('stepName'),
"index": index,
'run_time': self.context.get('run_time'),
'type': self.node.type,
'content': content,
'content': [file_content[:500] for file_content in content],
'status': self.status,
'err_message': self.err_message,
'document_list': self.context.get('document_list')

View File

@ -147,9 +147,9 @@ class ApplicationWorkflowSerializer(serializers.Serializer):
default_workflow = json.loads(default_workflow_json)
for node in default_workflow.get('nodes'):
if node.get('id') == 'base-node':
node.get('properties')['node_data'] = {"desc": application.get('desc'),
"name": application.get('name'),
"prologue": application.get('prologue')}
node.get('properties')['node_data']['desc'] = application.get('desc')
node.get('properties')['node_data']['name'] = application.get('name')
node.get('properties')['node_data']['prologue'] = application.get('prologue')
return Application(id=uuid.uuid1(),
name=application.get('name'),
desc=application.get('desc'),
@ -160,6 +160,14 @@ class ApplicationWorkflowSerializer(serializers.Serializer):
model_setting={},
problem_optimization=False,
type=ApplicationTypeChoices.WORK_FLOW,
stt_model_enable=application.get('stt_model_enable', False),
stt_model_id=application.get('stt_model', None),
tts_model_id=application.get('tts_model', None),
tts_model_enable=application.get('tts_model_enable', False),
tts_model_params_setting=application.get('tts_model_params_setting', {}),
tts_type=application.get('tts_type', None),
file_upload_enable=application.get('file_upload_enable', False),
file_upload_setting=application.get('file_upload_setting', {}),
work_flow=default_workflow
)
@ -502,6 +510,14 @@ class ApplicationSerializer(serializers.Serializer):
type=ApplicationTypeChoices.SIMPLE,
model_params_setting=application.get('model_params_setting', {}),
problem_optimization_prompt=application.get('problem_optimization_prompt', None),
stt_model_enable=application.get('stt_model_enable', False),
stt_model_id=application.get('stt_model', None),
tts_model_id=application.get('tts_model', None),
tts_model_enable=application.get('tts_model_enable', False),
tts_model_params_setting=application.get('tts_model_params_setting', {}),
tts_type=application.get('tts_type', None),
file_upload_enable=application.get('file_upload_enable', False),
file_upload_setting=application.get('file_upload_setting', {}),
work_flow={}
)

View File

@ -19,5 +19,5 @@ class BaseParseTableHandle(ABC):
pass
@abstractmethod
def get_content(self, file):
def get_content(self, file, save_image):
pass

View File

@ -20,5 +20,5 @@ class BaseSplitHandle(ABC):
pass
@abstractmethod
def get_content(self, file):
def get_content(self, file, save_image):
pass

View File

@ -190,12 +190,16 @@ class DocSplitHandle(BaseSplitHandle):
return True
return False
def get_content(self, file):
def get_content(self, file, save_image):
try:
image_list = []
buffer = file.read()
doc = Document(io.BytesIO(buffer))
return self.to_md(doc, image_list, get_image_id_func())
content = self.to_md(doc, image_list, get_image_id_func())
if len(image_list) > 0:
content = content.replace('/api/image/', '/api/file/')
save_image(image_list)
return content
except BaseException as e:
traceback.print_exception(e)
return f'{e}'

View File

@ -61,7 +61,7 @@ class HTMLSplitHandle(BaseSplitHandle):
'content': split_model.parse(content)
}
def get_content(self, file):
def get_content(self, file, save_image):
buffer = file.read()
try:

View File

@ -309,7 +309,7 @@ class PdfSplitHandle(BaseSplitHandle):
return True
return False
def get_content(self, file):
def get_content(self, file, save_image):
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
# 将上传的文件保存到临时文件中
temp_file.write(file.read())

View File

@ -35,7 +35,7 @@ class CsvSplitHandle(BaseParseTableHandle):
return [{'name': file.name, 'paragraphs': paragraphs}]
def get_content(self, file):
def get_content(self, file, save_image):
buffer = file.read()
try:
return buffer.decode(detect(buffer)['encoding'])

View File

@ -61,7 +61,7 @@ class XlsSplitHandle(BaseParseTableHandle):
return [{'name': file.name, 'paragraphs': []}]
return result
def get_content(self, file):
def get_content(self, file, save_image):
# 打开 .xls 文件
try:
workbook = xlrd.open_workbook(file_contents=file.read(), formatting_info=True)

View File

@ -74,7 +74,7 @@ class XlsxSplitHandle(BaseParseTableHandle):
return result
def get_content(self, file):
def get_content(self, file, save_image):
try:
# 加载 Excel 文件
workbook = load_workbook(file)

View File

@ -51,7 +51,7 @@ class TextSplitHandle(BaseSplitHandle):
'content': split_model.parse(content)
}
def get_content(self, file):
def get_content(self, file, save_image):
buffer = file.read()
try:
return buffer.decode(detect(buffer)['encoding'])

View File

@ -4,6 +4,7 @@ import logging
import datetime
from django.db import transaction
from django.db.models.fields.json import KeyTextTransform
from django.utils import timezone
from apscheduler.schedulers.background import BackgroundScheduler
from django_apscheduler.jobstores import DjangoJobStore
@ -11,6 +12,8 @@ from application.models import Application, Chat
from django.db.models import Q
from common.lock.impl.file_lock import FileLock
from dataset.models import File
from django.db.models.functions import Cast
from django.db import models
scheduler = BackgroundScheduler()
scheduler.add_jobstore(DjangoJobStore(), "default")
@ -40,7 +43,7 @@ def clean_chat_log_job():
break
deleted_count, _ = Chat.objects.filter(id__in=logs_to_delete).delete()
# 删除对应的文件
File.objects.filter(~Q(meta__chat_id__in=logs_to_delete)).delete()
File.objects.filter(meta__chat_id__in=[str(uuid) for uuid in logs_to_delete]).delete()
if deleted_count < batch_size:
break

View File

@ -61,8 +61,9 @@ class FileSerializer(serializers.Serializer):
def upload(self, with_valid=True):
if with_valid:
self.is_valid(raise_exception=True)
file_id = uuid.uuid1()
file = File(id=file_id, file_name=self.data.get('file').name, meta=self.data.get('meta'))
meta = self.data.get('meta')
file_id = meta.get('file_id', uuid.uuid1())
file = File(id=file_id, file_name=self.data.get('file').name, meta=meta)
file.save(self.data.get('file').read())
return f'/api/file/{file_id}'

View File

@ -178,7 +178,7 @@ const putSyncWebDataset: (
}
/**
*
*
* @param dataset_id
*/
const putReEmbeddingDataset: (

View File

@ -218,26 +218,30 @@
<!-- 文档内容提取 -->
<template v-if="item.type === WorkflowType.DocumentExtractNode">
<div class="card-never border-r-4">
<h5 class="p-8-12">
参数输出
<h5 class="p-8-12 flex align-center">
<span class="mr-4">参数输出</span>
<el-tooltip
effect="dark"
content="每个文档仅支持预览500字"
placement="right"
effect="dark"
content="每个文档仅支持预览500字"
placement="right"
>
<AppIcon iconName="app-warning" class="app-warning-icon"></AppIcon>
</el-tooltip>
</h5>
<div class="p-8-12 border-t-dashed lighter">
<el-scrollbar height="150">
<MdPreview
v-if="item.content"
ref="editorRef"
editorId="preview-only"
:modelValue="item.content"
style="background: none"
/>
<template v-else> - </template>
<el-card shadow="never" style="--el-card-padding: 8px" v-for="(file_content, index) in item.content"
:key="index" class="mb-8">
<MdPreview
v-if="file_content"
ref="editorRef"
editorId="preview-only"
:modelValue="file_content"
style="background: none"
/>
<template v-else> - </template>
</el-card>
</el-scrollbar>
</div>
</div>
@ -333,6 +337,7 @@
<template v-if="item.type === WorkflowType.FormNode">
<div class="card-never border-r-4">
<h5 class="p-8-12">参数输入</h5>
<div class="p-8-12 border-t-dashed lighter">
<div v-for="(f, i) in item.form_field_list" :key="i" class="mb-8">
<span class="color-secondary">{{ f.label.label }}:</span>

View File

@ -20,7 +20,12 @@
</div>
<div class="ml-8" v-else>
<a
@click="openLink(item.source_url)"
:href="
item.source_url && !item.source_url.endsWith('/')
? item.source_url + '/'
: item.source_url
"
target="_blank"
class="ellipsis"
:title="item?.document_name?.trim()"
>
@ -98,14 +103,6 @@ const uniqueParagraphList = computed(() => {
}) || []
)
})
function openLink(url: string) {
// url//
if (url && !url.endsWith('/')) {
url += '/'
}
window.open(url, '_blank')
}
</script>
<style lang="scss" scoped>
.source_dataset-button {

View File

@ -20,9 +20,25 @@
<el-text class="flex align-center" style="width: 70%">
<img :src="getImgUrl(data?.document_name?.trim())" alt="" width="20" class="mr-4" />
<span class="ellipsis" :title="data?.document_name?.trim()">
{{ data?.document_name.trim() }}</span
>
<template v-if="meta?.source_url">
<a
:href="
meta?.source_url && !meta?.source_url.endsWith('/')
? meta?.source_url + '/'
: meta?.source_url
"
target="_blank"
class="ellipsis"
:title="data?.document_name?.trim()"
>
{{ data?.document_name?.trim() }}
</a>
</template>
<template v-else>
<span class="ellipsis" :title="data?.document_name?.trim()">
{{ data?.document_name?.trim() }}
</span>
</template>
</el-text>
<div class="flex align-center" style="line-height: 32px">
<AppAvatar class="mr-8 avatar-blue" shape="square" :size="18">
@ -37,6 +53,8 @@
</template>
<script setup lang="ts">
import { getImgUrl } from '@/utils/utils'
import { computed } from 'vue'
const props = defineProps({
data: {
type: Object,
@ -47,11 +65,22 @@ const props = defineProps({
default: 0
}
})
const isMetaObject = computed(() => typeof props.data.meta === 'object')
const parsedMeta = computed(() => {
try {
return JSON.parse(props.data.meta)
} catch (e) {
return {}
}
})
const meta = computed(() => (isMetaObject.value ? props.data.meta : parsedMeta.value))
</script>
<style lang="scss" scoped>
.paragraph-source-card-height {
height: 260px;
}
@media only screen and (max-width: 768px) {
.paragraph-source-card-height {
height: 285px;

View File

@ -19,7 +19,7 @@
</el-tooltip>
<el-divider direction="vertical" />
</span>
<span v-if="applicationId && type == 'log'">
<span v-if="type == 'ai-chat' || type == 'log'">
<el-tooltip effect="dark" content="换个答案" placement="top">
<el-button :disabled="chat_loading" text @click="regeneration">
<el-icon><RefreshRight /></el-icon>

View File

@ -7,35 +7,37 @@
class="mb-16"
style="padding: 0 24px"
>
<el-card shadow="always" class="dialog-card">
<div class="flex align-center cursor w-full" @click="showUserInput = !showUserInput">
<el-card shadow="always" class="dialog-card" style="--el-card-padding: 16px 8px">
<div class="flex align-center cursor w-full" style="padding: 0 8px;" @click="showUserInput = !showUserInput">
<el-icon class="mr-8 arrow-icon" :class="showUserInput ? 'rotate-90' : ''"
><CaretRight
/></el-icon>
用户输入
</div>
<el-collapse-transition>
<div v-show="showUserInput" class="mt-16">
<DynamicsForm
:key="dynamicsFormRefresh"
v-model="form_data_context"
:model="form_data_context"
label-position="top"
require-asterisk-position="right"
:render_data="inputFieldList"
ref="dynamicsFormRef"
/>
<DynamicsForm
v-if="type === 'debug-ai-chat'"
v-model="api_form_data_context"
:model="api_form_data_context"
label-position="top"
require-asterisk-position="right"
:render_data="apiInputFieldList"
ref="dynamicsFormRef2"
/>
</div>
</el-collapse-transition>
<el-scrollbar max-height="160">
<el-collapse-transition>
<div v-show="showUserInput" class="mt-16" style="padding: 0 8px;">
<DynamicsForm
:key="dynamicsFormRefresh"
v-model="form_data_context"
:model="form_data_context"
label-position="top"
require-asterisk-position="right"
:render_data="inputFieldList"
ref="dynamicsFormRef"
/>
<DynamicsForm
v-if="type === 'debug-ai-chat'"
v-model="api_form_data_context"
:model="api_form_data_context"
label-position="top"
require-asterisk-position="right"
:render_data="apiInputFieldList"
ref="dynamicsFormRef2"
/>
</div>
</el-collapse-transition>
</el-scrollbar>
</el-card>
</div>
</template>

View File

@ -38,7 +38,7 @@ export function fileType(name: string) {
*/
const typeList: any = {
txt: ['txt', 'pdf', 'docx', 'csv', 'md', 'html'],
txt: ['txt', 'pdf', 'docx', 'md', 'html'],
table: ['xlsx', 'xls', 'csv'],
QA: ['xlsx', 'csv', 'xls']
}

View File

@ -242,6 +242,9 @@ function clickoutside() {
showPopover.value = false
}
function publicHandle() {
//
saveApplication()
//
workflowRef.value
?.validate()
.then(() => {

View File

@ -148,8 +148,8 @@ async function submit() {
}
if (cloneModelId.value !== BaseFormRef.value.form.embedding_mode_id) {
MsgConfirm(`提示`, `修改知识库向量模型后,需要对知识库重新向量化,是否继续保存?`, {
confirmButtonText: '重新向量化',
MsgConfirm(`提示`, `修改知识库向量模型后,需要对知识库向量化,是否继续保存?`, {
confirmButtonText: '向量化',
confirmButtonClass: 'primary'
})
.then(() => {

View File

@ -129,7 +129,7 @@
action="#"
:auto-upload="false"
:show-file-list="false"
accept=".txt, .md, .csv, .log, .docx, .pdf, .html"
accept=".txt, .md, .log, .docx, .pdf, .html"
:limit="50"
:on-exceed="onExceed"
:on-change="fileHandleChange"

View File

@ -109,7 +109,7 @@
iconName="app-document-refresh"
style="font-size: 16px"
></AppIcon>
重新向量化</el-dropdown-item
向量化</el-dropdown-item
>
<el-dropdown-item
icon="Setting"

View File

@ -23,10 +23,10 @@
迁移
</el-button>
<el-button @click="batchRefresh" :disabled="multipleSelection.length === 0">
重新向量化
向量化
</el-button>
<el-button @click="openGenerateDialog()" :disabled="multipleSelection.length === 0">
生成关联问题
关联问题
</el-button>
<el-button @click="openBatchEditDocument" :disabled="multipleSelection.length === 0">
设置
@ -623,7 +623,7 @@ function batchRefresh() {
}
})
documentApi.batchRefresh(id, arr, loading).then(() => {
MsgSuccess('批量重新向量化成功')
MsgSuccess('批量向量化成功')
multipleTableRef.value?.clearSelection()
})
}
@ -636,7 +636,7 @@ function batchGenerateRelated() {
}
})
documentApi.batchGenerateRelated(id, arr, loading).then(() => {
MsgSuccess('批量生成关联问题成功')
MsgSuccess('批量关联问题成功')
multipleTableRef.value?.clearSelection()
})
}

View File

@ -45,7 +45,7 @@
<img class="mr-12" src="@/assets/icon_file-doc.svg" alt="" />
<div>
<p>文档TXTMDDOCXHTMLCSVXLSXXLSPDF</p>
<el-text class="color-secondary">需要与文档内容提取节点配合使用</el-text>
<el-text class="color-secondary">需要使用文档内容提取节点解析文档内容</el-text>
</div>
</div>
<el-checkbox v-model="form_data.document" />
@ -62,7 +62,7 @@
<img class="mr-12" src="@/assets/icon_file-image.svg" alt="" />
<div>
<p>图片JPGJPEGPNGGIF</p>
<el-text class="color-secondary">所选模型需要支持接收图片</el-text>
<el-text class="color-secondary">需要使用图片理解节点解析图片内容</el-text>
</div>
</div>
<el-checkbox v-model="form_data.image" />

View File

@ -25,7 +25,7 @@
</div>
<el-tooltip effect="dark" placement="right" popper-class="max-w-200">
<template #content>
`设置执行该节点输出的内容,{{ '{ from }' }}为表单的占位符。`
设置执行该节点输出的内容{{ '{ form }' }}为表单的占位符
</template>
<AppIcon iconName="app-warning" class="app-warning-icon"></AppIcon>
</el-tooltip>