add auto-CoT feature (#483)

* 1.refactor the retrieval module. 2.refactor the http service module. 3.upgrade text2sql output format the parse for absolute time related expression in query.

* fix bug.

* upgrade the config module, now support config llm suppoted by langchain.

* fix conflicts.

* update text2sql config reload to be compitable with new config format.

* modify default config.

* 1.add self-consistency feature for text2sql. 2.upgrade llm api call from sync to async. 3.refactor text2sql module. 4. refactor semantical retriever modules.

* merege with upstream master

* add general retrieve service.

* add api service for sql_agent for crud opereations of few-shots examples.

* modify requirements

* add auto-cot feature

---------

Co-authored-by: shaoweigong <shaoweigong@tencent.com>
This commit is contained in:
codescracker
2023-12-11 16:07:49 +08:00
committed by GitHub
parent 3ae720ef30
commit d79f73eab6
21 changed files with 1822 additions and 565 deletions

View File

@@ -36,12 +36,12 @@ CHROMA_DB_PERSIST_DIR = config.get(chroma_db_section_name, 'CHROMA_DB_PERSIST_DI
PRESET_QUERY_COLLECTION_NAME = config.get(chroma_db_section_name, 'PRESET_QUERY_COLLECTION_NAME')
SOLVED_QUERY_COLLECTION_NAME = config.get(chroma_db_section_name, 'SOLVED_QUERY_COLLECTION_NAME')
TEXT2DSLAGENT_COLLECTION_NAME = config.get(chroma_db_section_name, 'TEXT2DSLAGENT_COLLECTION_NAME')
TEXT2DSLAGENTCS_COLLECTION_NAME = config.get(chroma_db_section_name, 'TEXT2DSLAGENTCS_COLLECTION_NAME')
TEXT2DSLAGENTACT_COLLECTION_NAME = config.get(chroma_db_section_name, 'TEXT2DSLAGENTACT_COLLECTION_NAME')
TEXT2DSL_EXAMPLE_NUM = int(config.get(chroma_db_section_name, 'TEXT2DSL_EXAMPLE_NUM'))
TEXT2DSL_FEWSHOTS_NUM = int(config.get(chroma_db_section_name, 'TEXT2DSL_FEWSHOTS_NUM'))
TEXT2DSL_SELF_CONSISTENCY_NUM = int(config.get(chroma_db_section_name, 'TEXT2DSL_SELF_CONSISTENCY_NUM'))
TEXT2DSL_IS_SHORTCUT = eval(config.get(chroma_db_section_name, 'TEXT2DSL_IS_SHORTCUT'))
TEXT2DSL_IS_SELF_CONSISTENCY = eval(config.get(chroma_db_section_name, 'TEXT2DSL_IS_SELF_CONSISTENCY'))
ACT_MIN_WINDOWN_SIZE = int(config.get(chroma_db_section_name, 'ACT_MIN_WINDOWN_SIZE'))
ACT_MAX_WINDOWN_SIZE = int(config.get(chroma_db_section_name, 'ACT_MAX_WINDOWN_SIZE'))
CHROMA_DB_PERSIST_PATH = os.path.join(PROJECT_DIR_PATH, CHROMA_DB_PERSIST_DIR)
text2vec_section_name = "Text2Vec"
@@ -63,8 +63,15 @@ if __name__ == "__main__":
logger.info(f"LLMPARSER_HOST: {LLMPARSER_HOST}")
logger.info(f"LLMPARSER_PORT: {LLMPARSER_PORT}")
logger.info(f"llm_config_dict: {llm_config_dict}")
logger.info(f"LLM_PROVIDER_NAME: {LLM_PROVIDER_NAME}")
logger.info(f"PRESET_QUERY_COLLECTION_NAME: {PRESET_QUERY_COLLECTION_NAME}")
logger.info(f"SOLVED_QUERY_COLLECTION_NAME: {SOLVED_QUERY_COLLECTION_NAME}")
logger.info(f"TEXT2DSLAGENT_COLLECTION_NAME: {TEXT2DSLAGENT_COLLECTION_NAME}")
logger.info(f"TEXT2DSLAGENTACT_COLLECTION_NAME: {TEXT2DSLAGENTACT_COLLECTION_NAME}")
logger.info(f"TEXT2DSL_EXAMPLE_NUM: {TEXT2DSL_EXAMPLE_NUM}")
logger.info(f"TEXT2DSL_FEWSHOTS_NUM: {TEXT2DSL_FEWSHOTS_NUM}")
logger.info(f"TEXT2DSL_SELF_CONSISTENCY_NUM: {TEXT2DSL_SELF_CONSISTENCY_NUM}")
logger.info(f"TEXT2DSL_IS_SHORTCUT: {TEXT2DSL_IS_SHORTCUT}")
logger.info(f"TEXT2DSL_IS_SELF_CONSISTENCY: {TEXT2DSL_IS_SELF_CONSISTENCY}")
logger.info(f"ACT_MIN_WINDOWN_SIZE: {ACT_MIN_WINDOWN_SIZE}")
logger.info(f"ACT_MAX_WINDOWN_SIZE: {ACT_MAX_WINDOWN_SIZE}")

View File

@@ -7,12 +7,12 @@ CHROMA_DB_PERSIST_DIR = chm_db
PRESET_QUERY_COLLECTION_NAME = preset_query_collection
SOLVED_QUERY_COLLECTION_NAME = solved_query_collection
TEXT2DSLAGENT_COLLECTION_NAME = text2dsl_agent_collection
TEXT2DSLAGENTCS_COLLECTION_NAME = text2dsl_agent_cs_collection
TEXT2DSLAGENTACT_COLLECTION_NAME = text2dsl_agent_act_collection
TEXT2DSL_EXAMPLE_NUM = 15
TEXT2DSL_FEWSHOTS_NUM = 10
TEXT2DSL_SELF_CONSISTENCY_NUM = 5
TEXT2DSL_IS_SHORTCUT = False
TEXT2DSL_IS_SELF_CONSISTENCY = False
ACT_MIN_WINDOWN_SIZE = 6
ACT_MAX_WINDOWN_SIZE = 10
[Text2Vec]
HF_TEXT2VEC_MODEL_NAME = GanymedeNil/text2vec-large-chinese