Add feature to s2sql that allow few-shots example tied to data model. (#571)

* 1.refactor the retrieval module. 2.refactor the http service module. 3.upgrade text2sql output format the parse for absolute time related expression in query.

* fix bug.

* upgrade the config module, now support config llm suppoted by langchain.

* fix conflicts.

* update text2sql config reload to be compitable with new config format.

* modify default config.

* 1.add self-consistency feature for text2sql. 2.upgrade llm api call from sync to async. 3.refactor text2sql module. 4. refactor semantical retriever modules.

* merege with upstream master

* add general retrieve service.

* add api service for sql_agent for crud opereations of few-shots examples.

* modify requirements

* add auto-cot feature

* 1. output log to a fixed log file.  2.allow few-shots examples tied to data model, and add strategy that extend examples when retrieved examples tied to a data model is not enough. 3. fix misformat in s2ql args.

* add prior_ext to output.

---------

Co-authored-by: shaoweigong <shaoweigong@tencent.com>
This commit is contained in:
codescracker
2023-12-27 19:39:50 +08:00
committed by GitHub
parent cf2b4bfb5c
commit b706c4efb4
6 changed files with 512 additions and 68 deletions

View File

@@ -8,8 +8,6 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from instances.logging_instance import logger
def type_convert(input_str: str):
try:
@@ -27,6 +25,11 @@ config_path = os.path.join(CONFIG_DIR_PATH, config_file)
config = configparser.ConfigParser()
config.read(config_path)
log_dir = "log"
LOG_DIR_PATH = os.path.join(PROJECT_DIR_PATH, log_dir)
log_file = "run.log"
LOG_FILE_PATH = os.path.join(LOG_DIR_PATH, log_file)
llm_parser_section_name = "LLMParser"
LLMPARSER_HOST = config.get(llm_parser_section_name, 'LLMPARSER_HOST')
LLMPARSER_PORT = int(config.get(llm_parser_section_name, 'LLMPARSER_PORT'))
@@ -57,21 +60,22 @@ for option in config.options(llm_model_section_name):
if __name__ == "__main__":
logger.info(f"PROJECT_DIR_PATH: {PROJECT_DIR_PATH}")
logger.info(f"EMB_MODEL_PATH: {HF_TEXT2VEC_MODEL_NAME}")
logger.info(f"CHROMA_DB_PERSIST_PATH: {CHROMA_DB_PERSIST_PATH}")
logger.info(f"LLMPARSER_HOST: {LLMPARSER_HOST}")
logger.info(f"LLMPARSER_PORT: {LLMPARSER_PORT}")
logger.info(f"llm_config_dict: {llm_config_dict}")
logger.info(f"LLM_PROVIDER_NAME: {LLM_PROVIDER_NAME}")
logger.info(f"PRESET_QUERY_COLLECTION_NAME: {PRESET_QUERY_COLLECTION_NAME}")
logger.info(f"SOLVED_QUERY_COLLECTION_NAME: {SOLVED_QUERY_COLLECTION_NAME}")
logger.info(f"TEXT2DSLAGENT_COLLECTION_NAME: {TEXT2DSLAGENT_COLLECTION_NAME}")
logger.info(f"TEXT2DSLAGENTACT_COLLECTION_NAME: {TEXT2DSLAGENTACT_COLLECTION_NAME}")
logger.info(f"TEXT2DSL_EXAMPLE_NUM: {TEXT2DSL_EXAMPLE_NUM}")
logger.info(f"TEXT2DSL_FEWSHOTS_NUM: {TEXT2DSL_FEWSHOTS_NUM}")
logger.info(f"TEXT2DSL_SELF_CONSISTENCY_NUM: {TEXT2DSL_SELF_CONSISTENCY_NUM}")
logger.info(f"ACT_MIN_WINDOWN_SIZE: {ACT_MIN_WINDOWN_SIZE}")
logger.info(f"ACT_MAX_WINDOWN_SIZE: {ACT_MAX_WINDOWN_SIZE}")
print(f"PROJECT_DIR_PATH: {PROJECT_DIR_PATH}")
print(f"EMB_MODEL_PATH: {HF_TEXT2VEC_MODEL_NAME}")
print(f"CHROMA_DB_PERSIST_PATH: {CHROMA_DB_PERSIST_PATH}")
print(f"LLMPARSER_HOST: {LLMPARSER_HOST}")
print(f"LLMPARSER_PORT: {LLMPARSER_PORT}")
print(f"llm_config_dict: {llm_config_dict}")
print(f"LLM_PROVIDER_NAME: {LLM_PROVIDER_NAME}")
print(f"PRESET_QUERY_COLLECTION_NAME: {PRESET_QUERY_COLLECTION_NAME}")
print(f"SOLVED_QUERY_COLLECTION_NAME: {SOLVED_QUERY_COLLECTION_NAME}")
print(f"TEXT2DSLAGENT_COLLECTION_NAME: {TEXT2DSLAGENT_COLLECTION_NAME}")
print(f"TEXT2DSLAGENTACT_COLLECTION_NAME: {TEXT2DSLAGENTACT_COLLECTION_NAME}")
print(f"TEXT2DSL_EXAMPLE_NUM: {TEXT2DSL_EXAMPLE_NUM}")
print(f"TEXT2DSL_FEWSHOTS_NUM: {TEXT2DSL_FEWSHOTS_NUM}")
print(f"TEXT2DSL_SELF_CONSISTENCY_NUM: {TEXT2DSL_SELF_CONSISTENCY_NUM}")
print(f"ACT_MIN_WINDOWN_SIZE: {ACT_MIN_WINDOWN_SIZE}")
print(f"ACT_MAX_WINDOWN_SIZE: {ACT_MAX_WINDOWN_SIZE}")
print(f"LOG_FILE_PATH: {LOG_FILE_PATH}")