mirror of
https://github.com/tencentmusic/supersonic.git
synced 2025-12-13 04:57:28 +00:00
add auto-CoT feature (#483)
* 1.refactor the retrieval module. 2.refactor the http service module. 3.upgrade text2sql output format the parse for absolute time related expression in query. * fix bug. * upgrade the config module, now support config llm suppoted by langchain. * fix conflicts. * update text2sql config reload to be compitable with new config format. * modify default config. * 1.add self-consistency feature for text2sql. 2.upgrade llm api call from sync to async. 3.refactor text2sql module. 4. refactor semantical retriever modules. * merege with upstream master * add general retrieve service. * add api service for sql_agent for crud opereations of few-shots examples. * modify requirements * add auto-cot feature --------- Co-authored-by: shaoweigong <shaoweigong@tencent.com>
This commit is contained in:
63
chat/python/services/s2ql/run.py
Normal file
63
chat/python/services/s2ql/run.py
Normal file
@@ -0,0 +1,63 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
|
||||
import asyncio
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
import json
|
||||
|
||||
from s2ql.constructor import FewShotPromptTemplate2
|
||||
from s2ql.sql_agent import Text2DSLAgent, Text2DSLAgentAutoCoT, Text2DSLAgentWrapper
|
||||
|
||||
from instances.llm_instance import llm
|
||||
from instances.chromadb_instance import client as chromadb_client
|
||||
from instances.logging_instance import logger
|
||||
from instances.text2vec_instance import emb_func
|
||||
|
||||
from few_shot_example.s2ql_examplar import examplars as sql_examplars
|
||||
from config.config_parse import (TEXT2DSLAGENT_COLLECTION_NAME, TEXT2DSLAGENTACT_COLLECTION_NAME,
|
||||
TEXT2DSL_EXAMPLE_NUM, TEXT2DSL_FEWSHOTS_NUM, TEXT2DSL_SELF_CONSISTENCY_NUM,
|
||||
ACT_MIN_WINDOWN_SIZE, ACT_MAX_WINDOWN_SIZE)
|
||||
|
||||
|
||||
text2dsl_agent_collection = chromadb_client.get_or_create_collection(name=TEXT2DSLAGENT_COLLECTION_NAME,
|
||||
embedding_function=emb_func,
|
||||
metadata={"hnsw:space": "cosine"})
|
||||
text2dsl_agent_act_collection = chromadb_client.get_or_create_collection(name=TEXT2DSLAGENTACT_COLLECTION_NAME,
|
||||
embedding_function=emb_func,
|
||||
metadata={"hnsw:space": "cosine"})
|
||||
|
||||
text2dsl_agent_example_prompter = FewShotPromptTemplate2(collection=text2dsl_agent_collection,
|
||||
retrieval_key="question",
|
||||
few_shot_seperator='\n\n')
|
||||
text2dsl_agent_act_example_prompter = FewShotPromptTemplate2(collection=text2dsl_agent_act_collection,
|
||||
retrieval_key="question",
|
||||
few_shot_seperator='\n\n')
|
||||
|
||||
text2sql_agent = Text2DSLAgent(num_fewshots=TEXT2DSL_FEWSHOTS_NUM, num_examples=TEXT2DSL_EXAMPLE_NUM, num_self_consistency=TEXT2DSL_SELF_CONSISTENCY_NUM,
|
||||
sql_example_prompter=text2dsl_agent_example_prompter, llm=llm)
|
||||
text2sql_agent_autoCoT = Text2DSLAgentAutoCoT(num_fewshots=TEXT2DSL_FEWSHOTS_NUM, num_examples=TEXT2DSL_EXAMPLE_NUM, num_self_consistency=TEXT2DSL_SELF_CONSISTENCY_NUM,
|
||||
sql_example_prompter=text2dsl_agent_act_example_prompter, llm=llm,
|
||||
auto_cot_min_window_size=ACT_MIN_WINDOWN_SIZE, auto_cot_max_window_size=ACT_MAX_WINDOWN_SIZE)
|
||||
|
||||
sql_ids = [str(i) for i in range(0, len(sql_examplars))]
|
||||
text2sql_agent.reload_setting(sql_ids, sql_examplars, TEXT2DSL_EXAMPLE_NUM, TEXT2DSL_FEWSHOTS_NUM, TEXT2DSL_SELF_CONSISTENCY_NUM)
|
||||
|
||||
if text2sql_agent_autoCoT.count_examples()==0:
|
||||
source_dir_path = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
example_dir_path = os.path.join(source_dir_path, 'few_shot_example')
|
||||
example_json_file = os.path.join(example_dir_path, 's2ql_examplar3_transformed.json')
|
||||
with open(example_json_file, 'r') as f:
|
||||
transformed_sql_examplar_list = json.load(f)
|
||||
|
||||
transformed_sql_examplar_ids = [str(i) for i in range(0, len(transformed_sql_examplar_list))]
|
||||
text2sql_agent_autoCoT.reload_setting_autoCoT(transformed_sql_examplar_ids, transformed_sql_examplar_list, TEXT2DSL_EXAMPLE_NUM, TEXT2DSL_FEWSHOTS_NUM, TEXT2DSL_SELF_CONSISTENCY_NUM)
|
||||
|
||||
|
||||
text2sql_agent_router = Text2DSLAgentWrapper(sql_agent_act=text2sql_agent_autoCoT)
|
||||
|
||||
Reference in New Issue
Block a user