From da5e7b9b75f7c1779df769c7f42dc75f8e242e24 Mon Sep 17 00:00:00 2001 From: mainmain <57514971+mainmainer@users.noreply.github.com> Date: Sun, 4 Feb 2024 20:16:07 +0800 Subject: [PATCH] (improvement) revise evaluation and fix null pointer (#715) --- .../service/impl/ConfigServiceImpl.java | 12 +- evaluation/README_CN.md | 4 +- evaluation/build_models.py | 354 ++++++++++++++++++ evaluation/build_pred_result.py | 27 +- evaluation/config/config.yaml | 2 - evaluation/error_case.json | 17 - .../persistence/dataobject/DimensionDO.java | 2 +- .../persistence/dataobject/DomainDO.java | 2 +- .../persistence/dataobject/MetricDO.java | 8 +- .../persistence/dataobject/ModelDO.java | 2 +- .../repository/impl/StatRepositoryImpl.java | 13 +- .../supersonic/BenchMarkDemoDataLoader.java | 4 + .../tencent/supersonic/ChatDemoLoader.java | 3 +- .../supersonic/HeadlessDemoLoader.java | 2 +- .../data/dictionary/custom/DimValue_10_20.txt | 8 - .../data/dictionary/custom/DimValue_10_22.txt | 8 - .../data/dictionary/custom/DimValue_5_10.txt | 8 +- .../data/dictionary/custom/DimValue_5_11.txt | 12 +- .../data/dictionary/custom/DimValue_6_12.txt | 8 +- .../data/dictionary/custom/DimValue_6_13.txt | 4 +- .../data/dictionary/custom/DimValue_7_16.txt | 4 +- .../data/dictionary/custom/DimValue_8_18.txt | 8 +- .../data/dictionary/custom/DimValue_8_19.txt | 4 +- .../data/dictionary/custom/DimValue_8_21.txt | 12 +- .../data/dictionary/custom/DimValue_9_15.txt | 5 - .../data/dictionary/custom/DimValue_9_16.txt | 4 - .../data/dictionary/custom/DimValue_9_18.txt | 7 - .../data/dictionary/custom/DimValue_9_19.txt | 7 - .../src/main/resources/hanlp.properties | 2 +- 29 files changed, 431 insertions(+), 122 deletions(-) create mode 100644 evaluation/build_models.py delete mode 100644 evaluation/error_case.json delete mode 100644 launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_10_20.txt delete mode 100644 launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_10_22.txt delete mode 100644 launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_9_15.txt delete mode 100644 launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_9_16.txt delete mode 100644 launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_9_18.txt delete mode 100644 launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_9_19.txt diff --git a/chat/server/src/main/java/com/tencent/supersonic/chat/server/service/impl/ConfigServiceImpl.java b/chat/server/src/main/java/com/tencent/supersonic/chat/server/service/impl/ConfigServiceImpl.java index 02f430678..0ab6160f5 100644 --- a/chat/server/src/main/java/com/tencent/supersonic/chat/server/service/impl/ConfigServiceImpl.java +++ b/chat/server/src/main/java/com/tencent/supersonic/chat/server/service/impl/ConfigServiceImpl.java @@ -124,19 +124,23 @@ public class ConfigServiceImpl implements ConfigService { Long modelId = chatConfig.getModelId(); List blackDimIdList = new ArrayList<>(); - if (Objects.nonNull(chatConfig.getChatAggConfig()) && Objects.nonNull(chatConfig.getChatAggConfig())) { + if (Objects.nonNull(chatConfig.getChatAggConfig()) + && Objects.nonNull(chatConfig.getChatAggConfig().getVisibility())) { blackDimIdList.addAll(chatConfig.getChatAggConfig().getVisibility().getBlackDimIdList()); } - if (Objects.nonNull(chatConfig.getChatDetailConfig()) && Objects.nonNull(chatConfig.getChatDetailConfig())) { + if (Objects.nonNull(chatConfig.getChatDetailConfig()) + && Objects.nonNull(chatConfig.getChatDetailConfig().getVisibility())) { blackDimIdList.addAll(chatConfig.getChatDetailConfig().getVisibility().getBlackDimIdList()); } List filterDimIdList = blackDimIdList.stream().distinct().collect(Collectors.toList()); List blackMetricIdList = new ArrayList<>(); - if (Objects.nonNull(chatConfig.getChatAggConfig()) && Objects.nonNull(chatConfig.getChatAggConfig())) { + if (Objects.nonNull(chatConfig.getChatAggConfig()) + && Objects.nonNull(chatConfig.getChatAggConfig().getVisibility())) { blackMetricIdList.addAll(chatConfig.getChatAggConfig().getVisibility().getBlackMetricIdList()); } - if (Objects.nonNull(chatConfig.getChatDetailConfig()) && Objects.nonNull(chatConfig.getChatDetailConfig())) { + if (Objects.nonNull(chatConfig.getChatDetailConfig()) + && Objects.nonNull(chatConfig.getChatDetailConfig().getVisibility())) { blackMetricIdList.addAll(chatConfig.getChatDetailConfig().getVisibility().getBlackMetricIdList()); } List filterMetricIdList = blackMetricIdList.stream().distinct().collect(Collectors.toList()); diff --git a/evaluation/README_CN.md b/evaluation/README_CN.md index df4914227..61ec9f2ea 100644 --- a/evaluation/README_CN.md +++ b/evaluation/README_CN.md @@ -1,8 +1,8 @@ # 评测流程 1. 正常启动项目(必须包括LLM服务) -2. 执行evalution.sh脚本,主要包括构建表数据、获取模型预测结果,执行对比逻辑。可以在命令行看到执行准确率,错误case会写到同目录的error_case.json文件中。 +2. 执行evalution.sh脚本,主要包括构建表数据、数据建模、获取模型预测结果,执行对比逻辑。可以在命令行看到执行准确率,错误case会写到同目录的error_case.json文件中。 # 评测意义 -制定评估工具对于提示词或代码更改的影响至关重要,方便supersonic快速对接其他模型、更改配置,可以帮助我们了解这些变化是否会提高或降低准确率、响应速度。 +制定评估工具方便supersonic快速对接其他模型、更改参数配置,对于提示词或代码更改的影响至关重要,可以帮助我们了解这些变化是否会提高或降低准确率、响应速度。 diff --git a/evaluation/build_models.py b/evaluation/build_models.py new file mode 100644 index 000000000..69da4561c --- /dev/null +++ b/evaluation/build_models.py @@ -0,0 +1,354 @@ +import sqlite3 +import os +import requests +import datetime +import yaml +import json +import time +import jwt + + +def get_authorization(): + exp = time.time() + 1000 + token= jwt.encode({"token_user_name": "admin","exp": exp}, "secret", algorithm="HS512") + return "Bearer "+token + +def get_url_pre(): + current_directory = os.path.dirname(os.path.abspath(__file__)) + config_file=current_directory+"/config/config.yaml" + with open(config_file, 'r') as file: + config = yaml.safe_load(file) + return config["url"] + +def get_list(url): + authorization=get_authorization() + header = {} + header["Authorization"] =authorization + resp=requests.get(url=url, headers=header) + json_data=resp.json() + if json_data["code"]==200: + return json_data["data"] + else: + return None +def build_domain(): + dict_info={} + json_data='{"name":"DuSQL_互联网企业","bizName":"internet","sensitiveLevel":0,"parentId":0,"isOpen":0,"viewers":["admin","tom","jack"],"viewOrgs":["1"],"admins":["admin"],"adminOrgs":[],"admin":"admin","viewer":"admin,tom,jack","viewOrg":"1","adminOrg":""}' + json_dict=json.loads(json_data) + url=get_url_pre()+"/api/semantic/domain/getDomainList" + domain_list=get_list(url) + build=False + if domain_list is None : + build=True + else: + exist=False + for domain in domain_list: + if domain["bizName"]=="internet": + exist=True + break + if not exist: + build=True + if build: + url=get_url_pre()+"/api/semantic/domain/createDomain" + authorization=get_authorization() + header = {} + header["Authorization"] =authorization + resp=requests.post(url=url, headers=header,json=json_dict) + url=get_url_pre()+"/api/semantic/domain/getDomainList" + domain_list=get_list(url) + domain_id=domain_list[len(domain_list)-1]["id"] + dict_info["build"]=build + dict_info["domain_id"]=domain_id + return dict_info +def build_model_1(domain_id): + json_data='{"name":"公司","bizName":"company","description":"公司","sensitiveLevel":0,"databaseId":1,"domainId":4,"modelDetail":{"queryType":"sql_query","sqlQuery":"SELECT imp_date,company_id,company_name,headquarter_address,company_established_time,founder,ceo,annual_turnover,employee_count FROM company","identifiers":[{"name":"公司id","type":"primary","bizName":"company_id","isCreateDimension":0,"fieldName":"company_id"}],"dimensions":[{"name":"","type":"time","dateFormat":"yyyy-MM-dd","typeParams":{"isPrimary":"false","timeGranularity":"none"},"isCreateDimension":0,"bizName":"imp_date","isTag":0,"fieldName":"imp_date"},{"name":"公司名称","type":"categorical","dateFormat":"yyyy-MM-dd","isCreateDimension":1,"bizName":"company_name","isTag":0,"fieldName":"company_name"},{"name":"总部地点","type":"categorical","dateFormat":"yyyy-MM-dd","isCreateDimension":1,"bizName":"headquarter_address","isTag":0,"fieldName":"headquarter_address"},{"name":"公司成立时间","type":"categorical","dateFormat":"yyyy-MM-dd","isCreateDimension":1,"bizName":"company_established_time","isTag":0,"fieldName":"company_established_time"},{"name":"创始人","type":"categorical","dateFormat":"yyyy-MM-dd","isCreateDimension":1,"bizName":"founder","isTag":0,"fieldName":"founder"},{"name":"首席执行官","type":"categorical","dateFormat":"yyyy-MM-dd","isCreateDimension":1,"bizName":"ceo","isTag":0,"fieldName":"ceo"}],"measures":[{"name":"年营业额","agg":"SUM","bizName":"annual_turnover","isCreateMetric":1},{"name":"员工数","agg":"SUM","bizName":"employee_count","isCreateMetric":1}],"fields":[{"fieldName":"company_id"},{"fieldName":"imp_date"},{"fieldName":"company_established_time"},{"fieldName":"founder"},{"fieldName":"headquarter_address"},{"fieldName":"ceo"},{"fieldName":"company_name"}]},"viewers":["admin","tom","jack"],"viewOrgs":["1"],"admins":["admin"],"adminOrgs":[],"admin":"admin","viewer":"admin,tom,jack","viewOrg":"1","timeDimension":[{"name":"","type":"time","dateFormat":"yyyy-MM-dd","typeParams":{"isPrimary":"false","timeGranularity":"none"},"isCreateDimension":0,"bizName":"imp_date","isTag":0,"fieldName":"imp_date"}],"adminOrg":""}' + json_dict=json.loads(json_data) + json_dict["domainId"]=domain_id + url=get_url_pre()+"/api/semantic/model/getModelList/"+str(domain_id) + model_list=get_list(url) + build=False + if model_list is None : + build=True + else: + exist=False + for model in model_list: + if model["bizName"]=="company": + exist=True + break + if not exist: + build=True + if build: + url=get_url_pre()+"/api/semantic/model/createModel" + authorization=get_authorization() + header = {} + header["Authorization"] =authorization + resp=requests.post(url=url, headers=header,json=json_dict) + url=get_url_pre()+"/api/semantic/model/getModelList/"+str(domain_id) + model_list=get_list(url) + model_id=model_list[len(model_list)-1]["id"] + return model_id + +def build_model_2(domain_id): + json_data='{"name":"品牌","bizName":"brand","description":"品牌","sensitiveLevel":0,"databaseId":1,"domainId":4,"modelDetail":{"queryType":"sql_query","sqlQuery":"SELECT imp_date,brand_id,brand_name,brand_established_time,company_id,legal_representative,registered_capital FROM brand","identifiers":[{"name":"品牌id","type":"primary","bizName":"brand_id","isCreateDimension":0,"fieldName":"brand_id"},{"name":"公司id","type":"foreign","bizName":"company_id","isCreateDimension":0,"fieldName":"company_id"}],"dimensions":[{"name":"","type":"time","dateFormat":"yyyy-MM-dd","typeParams":{"isPrimary":"false","timeGranularity":"none"},"isCreateDimension":0,"bizName":"imp_date","isTag":0,"fieldName":"imp_date"},{"name":"品牌名称","type":"categorical","dateFormat":"yyyy-MM-dd","isCreateDimension":1,"bizName":"brand_name","isTag":0,"fieldName":"brand_name"},{"name":"品牌成立时间","type":"categorical","dateFormat":"yyyy-MM-dd","isCreateDimension":1,"bizName":"brand_established_time","isTag":0,"fieldName":"brand_established_time"},{"name":"法定代表人","type":"categorical","dateFormat":"yyyy-MM-dd","isCreateDimension":1,"bizName":"legal_representative","isTag":0,"fieldName":"legal_representative"}],"measures":[{"name":"注册资本","agg":"SUM","bizName":"registered_capital","isCreateMetric":1}],"fields":[{"fieldName":"company_id"},{"fieldName":"brand_id"},{"fieldName":"brand_name"},{"fieldName":"imp_date"},{"fieldName":"brand_established_time"},{"fieldName":"legal_representative"}]},"viewers":["admin","tom","jack"],"viewOrgs":["1"],"admins":["admin"],"adminOrgs":[],"admin":"admin","viewer":"admin,tom,jack","viewOrg":"1","timeDimension":[{"name":"","type":"time","dateFormat":"yyyy-MM-dd","typeParams":{"isPrimary":"false","timeGranularity":"none"},"isCreateDimension":0,"bizName":"imp_date","isTag":0,"fieldName":"imp_date"}],"adminOrg":""}' + json_dict=json.loads(json_data) + json_dict["domainId"]=domain_id + url=get_url_pre()+"/api/semantic/model/getModelList/"+str(domain_id) + model_list=get_list(url) + build=False + if model_list is None : + build=True + else: + exist=False + for model in model_list: + if model["bizName"]=="brand": + exist=True + break + if not exist: + build=True + if build: + url=get_url_pre()+"/api/semantic/model/createModel" + authorization=get_authorization() + header = {} + header["Authorization"] =authorization + resp=requests.post(url=url, headers=header,json=json_dict) + url=get_url_pre()+"/api/semantic/model/getModelList/"+str(domain_id) + model_list=get_list(url) + model_id=model_list[len(model_list)-1]["id"] + return model_id + +def build_model_3(domain_id): + json_data='{"name":"公司各品牌收入排名","bizName":"company_revenue","description":"公司各品牌收入排名","sensitiveLevel":0,"databaseId":1,"domainId":4,"modelDetail":{"queryType":"sql_query","sqlQuery":"SELECT imp_date,company_id,brand_id,revenue_proportion,profit_proportion,expenditure_proportion FROM company_revenue","identifiers":[{"name":"公司id","type":"foreign","bizName":"company_id","isCreateDimension":0,"fieldName":"company_id"},{"name":"品牌id","type":"foreign","bizName":"brand_id","isCreateDimension":0,"fieldName":"brand_id"}],"dimensions":[{"name":"","type":"time","dateFormat":"yyyy-MM-dd","typeParams":{"isPrimary":"false","timeGranularity":"none"},"isCreateDimension":0,"bizName":"imp_date","isTag":0,"fieldName":"imp_date"}],"measures":[{"name":"营收占比","agg":"SUM","bizName":"revenue_proportion","isCreateMetric":1},{"name":"利润占比","agg":"SUM","bizName":"profit_proportion","isCreateMetric":1},{"name":"支出占比","agg":"SUM","bizName":"expenditure_proportion","isCreateMetric":1}],"fields":[{"fieldName":"company_id"},{"fieldName":"brand_id"},{"fieldName":"imp_date"}]},"viewers":["admin","tom","jack"],"viewOrgs":["1"],"admins":["admin"],"adminOrgs":[],"admin":"admin","viewer":"admin,tom,jack","viewOrg":"1","timeDimension":[{"name":"","type":"time","dateFormat":"yyyy-MM-dd","typeParams":{"isPrimary":"false","timeGranularity":"none"},"isCreateDimension":0,"bizName":"imp_date","isTag":0,"fieldName":"imp_date"}],"adminOrg":""}' + json_dict=json.loads(json_data) + json_dict["domainId"]=domain_id + url=get_url_pre()+"/api/semantic/model/getModelList/"+str(domain_id) + model_list=get_list(url) + build=False + if model_list is None : + build=True + else: + exist=False + for model in model_list: + if model["bizName"]=="company_revenue": + exist=True + break + if not exist: + build=True + if build: + url=get_url_pre()+"/api/semantic/model/createModel" + authorization=get_authorization() + header = {} + header["Authorization"] =authorization + resp=requests.post(url=url, headers=header,json=json_dict) + url=get_url_pre()+"/api/semantic/model/getModelList/"+str(domain_id) + model_list=get_list(url) + model_id=model_list[len(model_list)-1]["id"] + return model_id + +def build_model_4(domain_id): + json_data='{"name":"公司品牌历年收入","bizName":"company_brand_revenue","description":"公司品牌历年收入","sensitiveLevel":0,"databaseId":1,"domainId":4,"modelDetail":{"queryType":"sql_query","sqlQuery":"SELECT imp_date,year_time,brand_id,revenue,profit,revenue_growth_year_on_year,profit_growth_year_on_year FROM company_brand_revenue","identifiers":[{"name":"品牌id","type":"foreign","bizName":"brand_id","isCreateDimension":0,"fieldName":"brand_id"}],"dimensions":[{"name":"","type":"time","dateFormat":"yyyy-MM-dd","typeParams":{"isPrimary":"false","timeGranularity":"none"},"isCreateDimension":0,"bizName":"imp_date","isTag":0,"fieldName":"imp_date"},{"name":"年份","type":"categorical","dateFormat":"yyyy-MM-dd","isCreateDimension":1,"bizName":"year_time","isTag":0,"fieldName":"year_time"}],"measures":[{"name":"营收","agg":"SUM","bizName":"revenue","isCreateMetric":1},{"name":"利润","agg":"SUM","bizName":"profit","isCreateMetric":1},{"name":"营收同比增长","agg":"SUM","bizName":"revenue_growth_year_on_year","isCreateMetric":1},{"name":"利润同比增长","agg":"SUM","bizName":"profit_growth_year_on_year","isCreateMetric":1}],"fields":[{"fieldName":"brand_id"},{"fieldName":"imp_date"},{"fieldName":"year_time"}]},"viewers":["admin","tom","jack"],"viewOrgs":["1"],"admins":["admin"],"adminOrgs":[],"admin":"admin","viewer":"admin,tom,jack","viewOrg":"1","timeDimension":[{"name":"","type":"time","dateFormat":"yyyy-MM-dd","typeParams":{"isPrimary":"false","timeGranularity":"none"},"isCreateDimension":0,"bizName":"imp_date","isTag":0,"fieldName":"imp_date"}],"adminOrg":""}' + json_dict=json.loads(json_data) + json_dict["domainId"]=domain_id + url=get_url_pre()+"/api/semantic/model/getModelList/"+str(domain_id) + model_list=get_list(url) + build=False + if model_list is None : + build=True + else: + exist=False + for model in model_list: + if model["bizName"]=="company_brand_revenue": + exist=True + break + if not exist: + build=True + if build: + url=get_url_pre()+"/api/semantic/model/createModel" + authorization=get_authorization() + header = {} + header["Authorization"] =authorization + resp=requests.post(url=url, headers=header,json=json_dict) + url=get_url_pre()+"/api/semantic/model/getModelList/"+str(domain_id) + model_list=get_list(url) + model_id=model_list[len(model_list)-1]["id"] + return model_id +def build_model_rela1(domain_id,from_model_id,to_model_id): + json_data='{"domainId":4,"fromModelId":9,"toModelId":10,"joinType":"inner join","joinConditions":[{"leftField":"company_id","rightField":"company_id","operator":"="}]}' + json_dict=json.loads(json_data) + json_dict["domainId"]=domain_id + json_dict["fromModelId"]=from_model_id + json_dict["toModelId"]=to_model_id + + url=get_url_pre()+"/api/semantic/modelRela" + authorization=get_authorization() + header = {} + header["Authorization"] =authorization + resp=requests.post(url=url, headers=header,json=json_dict) + +def build_model_rela2(domain_id,from_model_id,to_model_id): + json_data='{"domainId":4,"fromModelId":9,"toModelId":11,"joinType":"inner join","joinConditions":[{"leftField":"company_id","rightField":"company_id","operator":"="}]}' + json_dict=json.loads(json_data) + json_dict["domainId"]=domain_id + json_dict["fromModelId"]=from_model_id + json_dict["toModelId"]=to_model_id + url=get_url_pre()+"/api/semantic/modelRela" + authorization=get_authorization() + header = {} + header["Authorization"] =authorization + resp=requests.post(url=url, headers=header,json=json_dict) + +def build_model_rela3(domain_id,from_model_id,to_model_id): + json_data='{"domainId":4,"fromModelId":10,"toModelId":11,"joinType":"inner join","joinConditions":[{"leftField":"brand_id","rightField":"brand_id","operator":"="}]}' + json_dict=json.loads(json_data) + json_dict["domainId"]=domain_id + json_dict["fromModelId"]=from_model_id + json_dict["toModelId"]=to_model_id + url=get_url_pre()+"/api/semantic/modelRela" + authorization=get_authorization() + header = {} + header["Authorization"] =authorization + resp=requests.post(url=url, headers=header,json=json_dict) + +def build_model_rela4(domain_id,from_model_id,to_model_id): + json_data='{"domainId":4,"fromModelId":10,"toModelId":12,"joinType":"inner join","joinConditions":[{"leftField":"brand_id","rightField":"brand_id","operator":"="}]}' + json_dict=json.loads(json_data) + json_dict["domainId"]=domain_id + json_dict["fromModelId"]=from_model_id + json_dict["toModelId"]=to_model_id + url=get_url_pre()+"/api/semantic/modelRela" + authorization=get_authorization() + header = {} + header["Authorization"] =authorization + resp=requests.post(url=url, headers=header,json=json_dict) +def get_id_list(data_list): + id_list=[] + if data_list is not None: + for data in data_list: + id_list.append(data["id"]) + return id_list +def build_view(domain_id,model_id1,model_id2,model_id3,model_id4): + url=get_url_pre()+"/api/semantic/dimension/getDimensionList/"+str(model_id1) + dimension_list1=get_id_list(get_list(url)) + url=get_url_pre()+"/api/semantic/dimension/getDimensionList/"+str(model_id2) + dimension_list2=get_id_list(get_list(url)) + url=get_url_pre()+"/api/semantic/dimension/getDimensionList/"+str(model_id3) + dimension_list3=get_id_list(get_list(url)) + url=get_url_pre()+"/api/semantic/dimension/getDimensionList/"+str(model_id4) + dimension_list4=get_id_list(get_list(url)) + + url=get_url_pre()+"/api/semantic/metric/getMetricList/"+str(model_id1) + metric_list1=get_id_list(get_list(url)) + url=get_url_pre()+"/api/semantic/metric/getMetricList/"+str(model_id2) + metric_list2=get_id_list(get_list(url)) + url=get_url_pre()+"/api/semantic/metric/getMetricList/"+str(model_id3) + metric_list3=get_id_list(get_list(url)) + url=get_url_pre()+"/api/semantic/metric/getMetricList/"+str(model_id4) + metric_list4=get_id_list(get_list(url)) + + json_dict={"name":"DuSQL 互联网企业","bizName":"internet","description":"DuSQL互联网企业数据源相关的指标和维度等", + "typeEnum":"VIEW","sensitiveLevel":0,"domainId":domain_id,"viewDetail": + {"viewModelConfigs":[{"id":model_id1,"includesAll":False,"metrics":metric_list1, + "dimensions":dimension_list1},{"id":model_id2,"includesAll":False, + "metrics":metric_list2,"dimensions":dimension_list2},{"id":model_id3,"includesAll":False,"metrics":metric_list3,"dimensions":dimension_list3}, + {"id":model_id4,"includesAll":False,"metrics":metric_list4,"dimensions":dimension_list4}]},"queryConfig":{"tagTypeDefaultConfig": + {"dimensionIds":[],"metricIds":[]},"metricTypeDefaultConfig":{"timeDefaultConfig":{"unit":1,"period":"DAY","timeMode":"RECENT"}}},"admins":["admin"],"admin":"admin"} + + url=get_url_pre()+"/api/semantic/view" + authorization=get_authorization() + header = {} + header["Authorization"] =authorization + resp=requests.post(url=url, headers=header,json=json_dict) + + url=get_url_pre()+"/api/semantic/view/getViewList?domainId="+str(domain_id) + print(url) + resp=get_list(url) + data={} + data["id"]=resp[0]["id"] + dim={} + dim[model_id1]=dimension_list1 + dim[model_id2]=dimension_list2 + dim[model_id3]=dimension_list3 + dim[model_id4]=dimension_list4 + data["dim"]=dim + return data + + +def build_agent(view_id): + json_dict={ + "id":10, + "enableSearch":1, + "name":"DuSQL 互联网企业", + "description":"DuSQL", + "status":1, + "examples":[], + "agentConfig":json.dumps({ + "tools":[{ + "id":1, + "type":"NL2SQL_LLM", + "viewIds":[view_id] + }] + }) + } + url=get_url_pre()+"/api/chat/agent" + authorization=get_authorization() + header = {} + header["Authorization"] =authorization + resp=requests.post(url=url, headers=header,json=json_dict) +def build_chat(agentId): + url=get_url_pre()+"/api/chat/manage/save?chatName=DuSQL问答&agentId="+str(agentId) + authorization=get_authorization() + header = {} + header["Authorization"] =authorization + resp=requests.post(url=url, headers=header) + + url=get_url_pre()+"/api/chat/manage/getAll?agentId="+str(agentId) + data=get_list(url) + return data[0]["chatId"] +def build_dim_value_dict(modelIds,info): + url=get_url_pre()+"/api/chat/dict/task" + authorization=get_authorization() + header = {} + header["Authorization"] =authorization + data={ + "updateMode":"REALTIME_ADD", + "modelIds":modelIds, + "modelAndDimPair":info["dim"] + } + print(data) + resp=requests.post(url=url, headers=header,json=data) + + +def build(): + dict_info=build_domain() + domain_id=dict_info["domain_id"] + if dict_info["build"]: + model_id1=build_model_1(domain_id) + model_id2=build_model_2(domain_id) + model_id3=build_model_3(domain_id) + model_id4=build_model_4(domain_id) + view_id=build_view(domain_id,model_id1,model_id2,model_id3,model_id4) + build_model_rela1(domain_id,model_id1,model_id2) + build_model_rela2(domain_id,model_id1,model_id3) + build_model_rela3(domain_id,model_id2,model_id3) + build_model_rela4(domain_id,model_id2,model_id4) + build_agent(view_id["id"]) + agentId=10 + chat_id=build_chat(agentId) + dict={} + dict["agent_id"]=agentId + dict["chat_id"]=chat_id + else: + agentId=10 + chat_id=build_chat(agentId) + dict={} + dict["agent_id"]=agentId + dict["chat_id"]=chat_id + return dict + + + +if __name__ == '__main__': + dict_info=build() + print(dict_info) + + + + diff --git a/evaluation/build_pred_result.py b/evaluation/build_pred_result.py index 710c3c4ed..a9b294734 100644 --- a/evaluation/build_pred_result.py +++ b/evaluation/build_pred_result.py @@ -1,10 +1,11 @@ +import time + import requests import logging import json -import jwt -import time import os import yaml +from build_models import build,get_authorization def read_query(input_path): result=[] @@ -24,13 +25,16 @@ def get_pred_sql(query,url,agentId,chatId,authorization,default_sql): header["Authorization"] =authorization try: result = requests.post(url=url, headers=header, json=data) + print(result.json()) + print(result.json()["traceId"]) if result.status_code == 200: data = result.json()["data"] selectedParses = data["selectedParses"] if selectedParses is not None and len(selectedParses) > 0: querySQL = selectedParses[0]["sqlInfo"]["querySQL"] - querySQL=querySQL.replace("`dusql`.", "").replace("dusql", "").replace("\n", "") - return querySQL+'\n' + if querySQL is not None: + querySQL=querySQL.replace("`dusql`.", "").replace("dusql", "").replace("\n", "") + return querySQL+'\n' return default_sql+'\n' except Exception as e: print(url) @@ -38,24 +42,22 @@ def get_pred_sql(query,url,agentId,chatId,authorization,default_sql): print(e) logging.info(e) return default_sql+'\n' -def get_authorization(): - exp = time.time() + 1000 - token= jwt.encode({"token_user_name": "admin","exp": exp}, "secret", algorithm="HS512") - return "Bearer "+token def get_pred_result(): current_directory = os.path.dirname(os.path.abspath(__file__)) config_file=current_directory+"/config/config.yaml" with open(config_file, 'r') as file: config = yaml.safe_load(file) - input_path=current_directory+"/data/"+"internet.txt" - pred_sql_path = current_directory+"/data/"+"pred_example_dusql.txt" + input_path=current_directory+"/data/internet.txt" + pred_sql_path = current_directory+"/data/pred_example_dusql.txt" pred_sql_exist=os.path.exists(pred_sql_path) if pred_sql_exist: os.remove(pred_sql_path) print("pred_sql_path removed!") - agent_id=config["agent_id"] - chat_id=config["chat_id"] + dict_info=build() + print(dict_info) + agent_id=dict_info["agent_id"] + chat_id=dict_info["chat_id"] url=config["url"] authorization=get_authorization() print(input_path) @@ -66,6 +68,7 @@ def get_pred_result(): for i in range(0,len(questions)): pred_sql=get_pred_sql(questions[i],url,agent_id,chat_id,authorization,default_sql) pred_sql_list.append(pred_sql) + time.sleep(30) write_sql(pred_sql_path, pred_sql_list) if __name__ == "__main__": diff --git a/evaluation/config/config.yaml b/evaluation/config/config.yaml index 17f1804f8..1dc40ed53 100644 --- a/evaluation/config/config.yaml +++ b/evaluation/config/config.yaml @@ -1,3 +1 @@ -chat_id: 3 -agent_id: 4 url: http://localhost:9080 diff --git a/evaluation/error_case.json b/evaluation/error_case.json deleted file mode 100644 index 0fb15f375..000000000 --- a/evaluation/error_case.json +++ /dev/null @@ -1,17 +0,0 @@ -[ - { - "query": "在各公司所有品牌收入排名中,给出每一个品牌,其所在公司以及收入占该公司的总收入比例,同时给出该公司的年营业额", - "gold_sql": "SELECT T3.company_name, T3.annual_turnover, T2.brand_name, T1.revenue_proportion FROM company_revenue AS T1 JOIN brand AS T2 JOIN company AS T3 ON T1.brand_id = T2.brand_id AND T1.company_id = T3.company_id", - "pred_sql": "select * from tablea" - }, - { - "query": "在各公司所有品牌收入排名中,给出每一个品牌,其所在公司以及收入占该公司的总收入比例", - "gold_sql": "SELECT T3.company_name, T2.brand_name, T1.revenue_proportion FROM company_revenue AS T1 JOIN brand AS T2 JOIN company AS T3 ON T1.brand_id = T2.brand_id AND T1.company_id = T3.company_id", - "pred_sql": "select * from tablea" - }, - { - "query": "在各公司所有品牌收入排名中,给出每一个品牌和其法人,其所在公司以及收入占该公司的总收入比例", - "gold_sql": "SELECT T3.company_name, T2.brand_name, T2.legal_representative, T1.revenue_proportion FROM company_revenue AS T1 JOIN brand AS T2 JOIN company AS T3 ON T1.brand_id = T2.brand_id AND T1.company_id = T3.company_id", - "pred_sql": "select * from tablea" - } -] \ No newline at end of file diff --git a/headless/server/src/main/java/com/tencent/supersonic/headless/server/persistence/dataobject/DimensionDO.java b/headless/server/src/main/java/com/tencent/supersonic/headless/server/persistence/dataobject/DimensionDO.java index 71c97abf5..77a139c70 100644 --- a/headless/server/src/main/java/com/tencent/supersonic/headless/server/persistence/dataobject/DimensionDO.java +++ b/headless/server/src/main/java/com/tencent/supersonic/headless/server/persistence/dataobject/DimensionDO.java @@ -51,4 +51,4 @@ public class DimensionDO { private String dataType; private int isTag; -} \ No newline at end of file +} diff --git a/headless/server/src/main/java/com/tencent/supersonic/headless/server/persistence/dataobject/DomainDO.java b/headless/server/src/main/java/com/tencent/supersonic/headless/server/persistence/dataobject/DomainDO.java index c5f1abc10..0597c812f 100644 --- a/headless/server/src/main/java/com/tencent/supersonic/headless/server/persistence/dataobject/DomainDO.java +++ b/headless/server/src/main/java/com/tencent/supersonic/headless/server/persistence/dataobject/DomainDO.java @@ -81,4 +81,4 @@ public class DomainDO { */ private String viewOrg; -} \ No newline at end of file +} diff --git a/headless/server/src/main/java/com/tencent/supersonic/headless/server/persistence/dataobject/MetricDO.java b/headless/server/src/main/java/com/tencent/supersonic/headless/server/persistence/dataobject/MetricDO.java index 73b693e7e..a292885ac 100644 --- a/headless/server/src/main/java/com/tencent/supersonic/headless/server/persistence/dataobject/MetricDO.java +++ b/headless/server/src/main/java/com/tencent/supersonic/headless/server/persistence/dataobject/MetricDO.java @@ -80,17 +80,17 @@ public class MetricDO { private String dataFormat; /** - * + * */ private String alias; /** - * + * */ private String tags; /** - * + * */ private String relateDimensions; @@ -103,4 +103,4 @@ public class MetricDO { private String defineType; -} \ No newline at end of file +} diff --git a/headless/server/src/main/java/com/tencent/supersonic/headless/server/persistence/dataobject/ModelDO.java b/headless/server/src/main/java/com/tencent/supersonic/headless/server/persistence/dataobject/ModelDO.java index 32653f035..42e04c710 100644 --- a/headless/server/src/main/java/com/tencent/supersonic/headless/server/persistence/dataobject/ModelDO.java +++ b/headless/server/src/main/java/com/tencent/supersonic/headless/server/persistence/dataobject/ModelDO.java @@ -56,4 +56,4 @@ public class ModelDO { private String sourceType; -} \ No newline at end of file +} diff --git a/headless/server/src/main/java/com/tencent/supersonic/headless/server/persistence/repository/impl/StatRepositoryImpl.java b/headless/server/src/main/java/com/tencent/supersonic/headless/server/persistence/repository/impl/StatRepositoryImpl.java index 13c9fd223..95781e556 100644 --- a/headless/server/src/main/java/com/tencent/supersonic/headless/server/persistence/repository/impl/StatRepositoryImpl.java +++ b/headless/server/src/main/java/com/tencent/supersonic/headless/server/persistence/repository/impl/StatRepositoryImpl.java @@ -48,10 +48,11 @@ public class StatRepositoryImpl implements StatRepository { statInfos.stream().forEach(stat -> { String dimensions = stat.getDimensions(); String metrics = stat.getMetrics(); - updateStatMapInfo(map, dimensions, TypeEnums.DIMENSION.name().toLowerCase(), stat.getModelId()); - updateStatMapInfo(map, metrics, TypeEnums.METRIC.name().toLowerCase(), stat.getModelId()); + if (Objects.nonNull(stat.getViewId())) { + updateStatMapInfo(map, dimensions, TypeEnums.DIMENSION.name().toLowerCase(), stat.getViewId()); + updateStatMapInfo(map, metrics, TypeEnums.METRIC.name().toLowerCase(), stat.getViewId()); + } }); - map.forEach((k, v) -> { Long classId = Long.parseLong(k.split(AT_SYMBOL + AT_SYMBOL)[0]); String type = k.split(AT_SYMBOL + AT_SYMBOL)[1]; @@ -68,13 +69,13 @@ public class StatRepositoryImpl implements StatRepository { return statMapper.getStatInfo(itemUseCommend); } - private void updateStatMapInfo(Map map, String dimensions, String type, Long modelId) { + private void updateStatMapInfo(Map map, String dimensions, String type, Long viewId) { if (Strings.isNotEmpty(dimensions)) { try { List dimensionList = mapper.readValue(dimensions, new TypeReference>() { }); dimensionList.stream().forEach(dimension -> { - String key = modelId + AT_SYMBOL + AT_SYMBOL + type + AT_SYMBOL + AT_SYMBOL + dimension; + String key = viewId + AT_SYMBOL + AT_SYMBOL + type + AT_SYMBOL + AT_SYMBOL + dimension; if (map.containsKey(key)) { map.put(key, map.get(key) + 1); } else { @@ -97,4 +98,4 @@ public class StatRepositoryImpl implements StatRepository { } } } -} \ No newline at end of file +} diff --git a/launchers/standalone/src/main/java/com/tencent/supersonic/BenchMarkDemoDataLoader.java b/launchers/standalone/src/main/java/com/tencent/supersonic/BenchMarkDemoDataLoader.java index cfc9a95ba..c54d033a4 100644 --- a/launchers/standalone/src/main/java/com/tencent/supersonic/BenchMarkDemoDataLoader.java +++ b/launchers/standalone/src/main/java/com/tencent/supersonic/BenchMarkDemoDataLoader.java @@ -103,6 +103,7 @@ public class BenchMarkDemoDataLoader { dimension1.setTypeParams(new DimensionTimeTypeParams()); dimensions.add(dimension1); dimensions.add(new Dim("活跃区域", "most_popular_in", DimensionType.categorical.name(), 1)); + dimensions.add(new Dim("音乐类型名称", "g_name", DimensionType.categorical.name(), 1)); modelDetail.setDimensions(dimensions); List identifiers = new ArrayList<>(); @@ -129,6 +130,7 @@ public class BenchMarkDemoDataLoader { modelReq.setDatabaseId(1L); ModelDetail modelDetail = new ModelDetail(); List dimensions = new ArrayList<>(); + dimensions.add(new Dim("艺术家名称", "artist_name", DimensionType.categorical.name(), 1)); dimensions.add(new Dim("国籍", "country", DimensionType.categorical.name(), 1)); dimensions.add(new Dim("性别", "gender", DimensionType.categorical.name(), 1)); modelDetail.setDimensions(dimensions); @@ -157,6 +159,7 @@ public class BenchMarkDemoDataLoader { List dimensions = new ArrayList<>(); dimensions.add(new Dim("持续时间", "duration", DimensionType.categorical.name(), 1)); dimensions.add(new Dim("文件格式", "formats", DimensionType.categorical.name(), 1)); + dimensions.add(new Dim("艺术家名称", "artist_name", DimensionType.categorical.name(), 1)); modelDetail.setDimensions(dimensions); List identifiers = new ArrayList<>(); @@ -184,6 +187,7 @@ public class BenchMarkDemoDataLoader { Dim dimension1 = new Dim("", "imp_date", DimensionType.time.name(), 0); dimension1.setTypeParams(new DimensionTimeTypeParams()); dimensions.add(dimension1); + dimensions.add(new Dim("歌曲名称", "song_name", DimensionType.categorical.name(), 1)); dimensions.add(new Dim("国家", "country", DimensionType.categorical.name(), 1)); dimensions.add(new Dim("语种", "languages", DimensionType.categorical.name(), 1)); dimensions.add(new Dim("发行时间", "releasedate", DimensionType.categorical.name(), 1)); diff --git a/launchers/standalone/src/main/java/com/tencent/supersonic/ChatDemoLoader.java b/launchers/standalone/src/main/java/com/tencent/supersonic/ChatDemoLoader.java index e6d87f3dd..84cc2326e 100644 --- a/launchers/standalone/src/main/java/com/tencent/supersonic/ChatDemoLoader.java +++ b/launchers/standalone/src/main/java/com/tencent/supersonic/ChatDemoLoader.java @@ -74,7 +74,7 @@ public class ChatDemoLoader implements CommandLineRunner { addAgent1(); addAgent2(); addAgent3(); - addAgent4(); + //addAgent4(); addSampleChats(); addSampleChats2(); updateQueryScore(1); @@ -248,6 +248,7 @@ public class ChatDemoLoader implements CommandLineRunner { } agent.setAgentConfig(JSONObject.toJSONString(agentConfig)); + log.info("agent:{}", JsonUtil.toString(agent)); agentService.createAgent(agent, User.getFakeUser()); } diff --git a/launchers/standalone/src/main/java/com/tencent/supersonic/HeadlessDemoLoader.java b/launchers/standalone/src/main/java/com/tencent/supersonic/HeadlessDemoLoader.java index be4a6e136..bc6b64a25 100644 --- a/launchers/standalone/src/main/java/com/tencent/supersonic/HeadlessDemoLoader.java +++ b/launchers/standalone/src/main/java/com/tencent/supersonic/HeadlessDemoLoader.java @@ -39,7 +39,7 @@ public class HeadlessDemoLoader implements CommandLineRunner { } modelDataDemoLoader.doRun(); benchMarkDemoLoader.doRun(); - duSQLDemoDataLoader.doRun(); + //duSQLDemoDataLoader.doRun(); isLoad = true; } diff --git a/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_10_20.txt b/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_10_20.txt deleted file mode 100644 index 7bc20ae1d..000000000 --- a/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_10_20.txt +++ /dev/null @@ -1,8 +0,0 @@ -阿里云 _10_20 5 -天猫 _10_20 5 -腾讯游戏 _10_20 5 -度小满 _10_20 5 -京东金融 _10_20 5 - - - diff --git a/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_10_22.txt b/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_10_22.txt deleted file mode 100644 index 4d9cccf5a..000000000 --- a/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_10_22.txt +++ /dev/null @@ -1,8 +0,0 @@ -张勇 _10_22 5 -马化腾 _10_22 5 -朱光 _10_22 5 -刘强东 _10_22 5 - - - - diff --git a/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_5_10.txt b/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_5_10.txt index 6402c14f5..cce180c73 100644 --- a/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_5_10.txt +++ b/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_5_10.txt @@ -1,4 +1,4 @@ -美国 _5_8 1 -加拿大 _5_8 1 -锡尔赫特、吉大港、库斯蒂亚 _5_8 1 -孟加拉国 _5_8 3 \ No newline at end of file +美国 _3_8 1 +加拿大 _3_8 1 +锡尔赫特、吉大港、库斯蒂亚 _3_8 1 +孟加拉国 _3_8 3 diff --git a/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_5_11.txt b/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_5_11.txt index 6cef5f046..bd4498b5d 100644 --- a/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_5_11.txt +++ b/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_5_11.txt @@ -1,6 +1,6 @@ -现代 _5_9 1 -tagore _5_9 1 -蓝调 _5_9 1 -流行 _5_9 1 -民间 _5_9 1 -nazrul _5_9 1 \ No newline at end of file +现代 _3_9 1 +tagore _3_9 1 +蓝调 _3_9 1 +流行 _3_9 1 +民间 _3_9 1 +nazrul _3_9 1 diff --git a/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_6_12.txt b/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_6_12.txt index b5458d1ab..9e65454db 100644 --- a/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_6_12.txt +++ b/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_6_12.txt @@ -1,4 +1,4 @@ -美国 _6_10 1 -印度 _6_10 2 -英国 _6_10 1 -孟加拉国 _6_10 2 \ No newline at end of file +美国 _3_11 1 +印度 _3_11 2 +英国 _3_11 1 +孟加拉国 _3_11 2 diff --git a/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_6_13.txt b/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_6_13.txt index 33944bd5d..1f6344ad7 100644 --- a/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_6_13.txt +++ b/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_6_13.txt @@ -1,2 +1,2 @@ -男性 _6_11 3 -女性 _6_11 3 \ No newline at end of file +男性 _3_12 3 +女性 _3_12 3 diff --git a/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_7_16.txt b/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_7_16.txt index 46f76c558..0ddbbfdc3 100644 --- a/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_7_16.txt +++ b/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_7_16.txt @@ -1,2 +1,2 @@ -mp4 _7_14 4 -mp3 _7_14 2 \ No newline at end of file +mp4 _3_14 4 +mp3 _3_14 2 diff --git a/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_8_18.txt b/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_8_18.txt index 78ea079a1..65cb9fbf7 100644 --- a/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_8_18.txt +++ b/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_8_18.txt @@ -1,4 +1,4 @@ -美国 _8_16 1 -印度 _8_16 2 -英国 _8_16 1 -孟加拉国 _8_16 2 \ No newline at end of file +美国 _3_17 1 +印度 _3_17 2 +英国 _3_17 1 +孟加拉国 _3_17 2 diff --git a/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_8_19.txt b/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_8_19.txt index 86a5882f3..5705011ee 100644 --- a/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_8_19.txt +++ b/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_8_19.txt @@ -1,2 +1,2 @@ -英文 _8_17 2 -孟加拉语 _8_17 4 \ No newline at end of file +英文 _3_18 2 +孟加拉语 _3_18 4 diff --git a/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_8_21.txt b/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_8_21.txt index 0c00c7fa7..1235c3ad6 100644 --- a/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_8_21.txt +++ b/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_8_21.txt @@ -1,6 +1,6 @@ -阿米·奥帕尔·霍伊 _8_19 1 -我的爱 _8_19 1 -打败它 _8_19 1 -阿杰伊阿卡什 _8_19 1 -Tumi#长袍#尼罗布 _8_19 1 -舒克诺#帕塔尔#努普尔#帕埃 _8_19 1 \ No newline at end of file +阿米·奥帕尔·霍伊 _3_16 1 +我的爱 _3_16 1 +打败它 _3_16 1 +阿杰伊阿卡什 _3_16 1 +Tumi#长袍#尼罗布 _3_16 1 +舒克诺#帕塔尔#努普尔#帕埃 _3_16 1 diff --git a/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_9_15.txt b/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_9_15.txt deleted file mode 100644 index 5a2cb8f1d..000000000 --- a/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_9_15.txt +++ /dev/null @@ -1,5 +0,0 @@ -百度集团 _9_15 5 -阿里巴巴集团 _9_15 5 -深圳市腾讯计算机系统有限公司 _9_15 5 -北京京东世纪贸易有限公司 _9_15 5 -网易公司 _9_15 5 diff --git a/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_9_16.txt b/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_9_16.txt deleted file mode 100644 index c4e7f41d6..000000000 --- a/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_9_16.txt +++ /dev/null @@ -1,4 +0,0 @@ -北京 _9_16 5 -杭州 _9_16 5 -深圳 _9_16 5 - diff --git a/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_9_18.txt b/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_9_18.txt deleted file mode 100644 index b15cc33d9..000000000 --- a/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_9_18.txt +++ /dev/null @@ -1,7 +0,0 @@ -李彦宏 _9_18 5 -马云 _9_18 5 -马化腾 _9_18 5 -刘强东 _9_18 5 -丁磊 _9_18 5 - - diff --git a/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_9_19.txt b/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_9_19.txt deleted file mode 100644 index 233cfe17f..000000000 --- a/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_9_19.txt +++ /dev/null @@ -1,7 +0,0 @@ -李彦宏 _9_19 5 -张勇 _9_19 5 -刘炽平 _9_19 5 -刘强东 _9_19 5 -丁磊 _9_19 5 - - diff --git a/launchers/standalone/src/main/resources/hanlp.properties b/launchers/standalone/src/main/resources/hanlp.properties index 729ad70ce..50c40ca8f 100644 --- a/launchers/standalone/src/main/resources/hanlp.properties +++ b/launchers/standalone/src/main/resources/hanlp.properties @@ -1,2 +1,2 @@ root=. -CustomDictionaryPath=data/dictionary/custom/DimValue_1_1.txt;data/dictionary/custom/DimValue_1_2.txt;data/dictionary/custom/DimValue_9_15.txt;data/dictionary/custom/DimValue_9_16.txt;data/dictionary/custom/DimValue_9_18.txt;data/dictionary/custom/DimValue_9_19.txt;data/dictionary/custom/DimValue_10_20.txt;data/dictionary/custom/DimValue_10_22.txt; +CustomDictionaryPath=data/dictionary/custom/DimValue_1_1.txt;data/dictionary/custom/DimValue_1_2.txt;