mirror of
https://github.com/1Panel-dev/MaxKB.git
synced 2025-12-26 01:33:05 +00:00
fix: update SQL queries to improve similarity calculations and indexing
Some checks are pending
sync2gitee / repo-sync (push) Waiting to run
Some checks are pending
sync2gitee / repo-sync (push) Waiting to run
This commit is contained in:
parent
b3a5dc4a1c
commit
4d18b78d29
|
|
@ -242,7 +242,7 @@ def create_knowledge_index(knowledge_id=None, document_id=None):
|
|||
if len(result) == 0:
|
||||
return
|
||||
dims = result[0]['dims']
|
||||
sql = f"""CREATE INDEX "embedding_hnsw_idx_{k_id}" ON embedding USING hnsw ((embedding::vector({dims})) vector_l2_ops) WHERE knowledge_id = '{k_id}'"""
|
||||
sql = f"""CREATE INDEX "embedding_hnsw_idx_{k_id}" ON embedding USING hnsw ((embedding::vector({dims})) vector_cosine_ops) WHERE knowledge_id = '{k_id}'"""
|
||||
update_execute(sql, [])
|
||||
maxkb_logger.info(f'Created index for knowledge ID: {k_id}')
|
||||
|
||||
|
|
|
|||
|
|
@ -5,15 +5,17 @@ SELECT
|
|||
FROM
|
||||
(
|
||||
SELECT DISTINCT ON
|
||||
( "paragraph_id" ) ( similarity ),* ,
|
||||
similarity AS comprehensive_score
|
||||
( "paragraph_id" ) ( 1 - distince + ts_similarity ) as similarity, *,
|
||||
(1 - distince + ts_similarity) AS comprehensive_score
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
*,
|
||||
(( 1 - ( embedding.embedding <=> %s ) )+ts_rank_cd( embedding.search_vector, websearch_to_tsquery('simple', %s ), 32 )) AS similarity
|
||||
(embedding.embedding::vector(%s) <=> %s) as distince,
|
||||
(ts_rank_cd( embedding.search_vector, websearch_to_tsquery('simple', %s ), 32 )) AS ts_similarity
|
||||
FROM
|
||||
embedding ${embedding_query}
|
||||
ORDER BY distince
|
||||
) TEMP
|
||||
ORDER BY
|
||||
paragraph_id,
|
||||
|
|
|
|||
|
|
@ -5,12 +5,12 @@ SELECT
|
|||
FROM
|
||||
(
|
||||
SELECT DISTINCT ON
|
||||
("paragraph_id") ( similarity ),* ,similarity AS comprehensive_score
|
||||
("paragraph_id") ( 1 - distince ),* ,(1 - distince) AS comprehensive_score
|
||||
FROM
|
||||
( SELECT *, ( 1 - ( embedding.embedding <=> %s ) ) AS similarity FROM embedding ${embedding_query}) TEMP
|
||||
( SELECT *, ( embedding.embedding::vector(%s) <=> %s ) AS distince FROM embedding ${embedding_query} ORDER BY distince) TEMP
|
||||
ORDER BY
|
||||
paragraph_id,
|
||||
similarity DESC
|
||||
distince
|
||||
) DISTINCT_TEMP
|
||||
WHERE comprehensive_score>%s
|
||||
ORDER BY comprehensive_score DESC
|
||||
|
|
|
|||
|
|
@ -172,8 +172,13 @@ class EmbeddingSearch(ISearch):
|
|||
os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql',
|
||||
'embedding_search.sql')),
|
||||
with_table_name=True)
|
||||
embedding_model = select_list(exec_sql,
|
||||
[json.dumps(query_embedding), *exec_params, similarity, top_number])
|
||||
embedding_model = select_list(exec_sql, [
|
||||
len(query_embedding),
|
||||
json.dumps(query_embedding),
|
||||
*exec_params,
|
||||
similarity,
|
||||
top_number
|
||||
])
|
||||
return embedding_model
|
||||
|
||||
def support(self, search_mode: SearchMode):
|
||||
|
|
@ -193,8 +198,12 @@ class KeywordsSearch(ISearch):
|
|||
os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql',
|
||||
'keywords_search.sql')),
|
||||
with_table_name=True)
|
||||
embedding_model = select_list(exec_sql,
|
||||
[to_query(query_text), *exec_params, similarity, top_number])
|
||||
embedding_model = select_list(exec_sql, [
|
||||
to_query(query_text),
|
||||
*exec_params,
|
||||
similarity,
|
||||
top_number
|
||||
])
|
||||
return embedding_model
|
||||
|
||||
def support(self, search_mode: SearchMode):
|
||||
|
|
@ -214,9 +223,13 @@ class BlendSearch(ISearch):
|
|||
os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql',
|
||||
'blend_search.sql')),
|
||||
with_table_name=True)
|
||||
embedding_model = select_list(exec_sql,
|
||||
[json.dumps(query_embedding), to_query(query_text), *exec_params, similarity,
|
||||
top_number])
|
||||
embedding_model = select_list(exec_sql, [
|
||||
len(query_embedding),
|
||||
json.dumps(query_embedding),
|
||||
to_query(query_text),
|
||||
*exec_params, similarity,
|
||||
top_number
|
||||
])
|
||||
return embedding_model
|
||||
|
||||
def support(self, search_mode: SearchMode):
|
||||
|
|
|
|||
Loading…
Reference in New Issue