[improvement][supersonic] add text-to-sql evaluation (#696)

* [improvement] llm supports all models

* [improvement] alias convert to SemanticParseInfo

* [improvement] support join

* [improvement] add evaluation.py

* [improvement] add text2sql_evalution.py

* [improvement] add text2sql_evalution.py

* [improvement] add evalution

* [improvement] add evalution

* [improvement] add evalution

---------

Co-authored-by: zuopengge <hwzuopengge@tencent.com>
This commit is contained in:
mainmain
2024-01-30 10:46:45 +08:00
committed by GitHub
parent aae3d6b297
commit c398ac1a84
29 changed files with 3347 additions and 15 deletions

View File

@@ -91,4 +91,4 @@ inMemoryEmbeddingStore:
query:
optimizer:
enable: true
enable: true

View File

@@ -0,0 +1,8 @@
阿里云 _10_20 5
天猫 _10_20 5
腾讯游戏 _10_20 5
度小满 _10_20 5
京东金融 _10_20 5

View File

@@ -0,0 +1,8 @@
张勇 _10_22 5
马化腾 _10_22 5
朱光 _10_22 5
刘强东 _10_22 5

View File

@@ -0,0 +1,5 @@
百度集团 _9_15 5
阿里巴巴集团 _9_15 5
深圳市腾讯计算机系统有限公司 _9_15 5
北京京东世纪贸易有限公司 _9_15 5
网易公司 _9_15 5

View File

@@ -0,0 +1,4 @@
北京 _9_16 5
杭州 _9_16 5
深圳 _9_16 5

View File

@@ -0,0 +1,7 @@
李彦宏 _9_18 5
马云 _9_18 5
马化腾 _9_18 5
刘强东 _9_18 5
丁磊 _9_18 5

View File

@@ -0,0 +1,7 @@
李彦宏 _9_19 5
张勇 _9_19 5
刘炽平 _9_19 5
刘强东 _9_19 5
丁磊 _9_19 5

View File

@@ -1111,4 +1111,29 @@ MERGE INTO song(imp_date,song_name,artist_name,country,f_id,g_name,rating,langua
MERGE INTO song(imp_date,song_name,artist_name,country,f_id,g_name,rating,languages,releasedate,resolution) VALUES (DATEADD('DAY', 0, CURRENT_DATE()),'打败它','Michel','英国',5,'流行',8,'英文','17-MAR-2002',720);
MERGE INTO song(imp_date,song_name,artist_name,country,f_id,g_name,rating,languages,releasedate,resolution) VALUES (DATEADD('DAY', 0, CURRENT_DATE()),'阿杰伊阿卡什','Topu','印度',6,'现代',10,'孟加拉语','27-MAR-2004',320);
insert into company(imp_date,company_id,company_name,headquarter_address,company_established_time,founder,ceo,annual_turnover,employee_count) VALUES (DATEADD('DAY', -1, CURRENT_DATE()),'item_enterprise_13_131','百度集团','北京','2000','李彦宏','李彦宏',102300000000,40000);
insert into company(imp_date,company_id,company_name,headquarter_address,company_established_time,founder,ceo,annual_turnover,employee_count) VALUES (DATEADD('DAY', -1, CURRENT_DATE()),'item_enterprise_13_132','阿里巴巴集团','杭州','1999年','马云','张勇',376800000000,103699);
insert into company(imp_date,company_id,company_name,headquarter_address,company_established_time,founder,ceo,annual_turnover,employee_count) VALUES (DATEADD('DAY', -1, CURRENT_DATE()),'item_enterprise_13_133','深圳市腾讯计算机系统有限公司','深圳','1998','马化腾','刘炽平',321600000000,56310);
insert into company(imp_date,company_id,company_name,headquarter_address,company_established_time,founder,ceo,annual_turnover,employee_count) VALUES (DATEADD('DAY', -1, CURRENT_DATE()),'item_enterprise_13_134','北京京东世纪贸易有限公司','北京','1998','刘强东','刘强东',28800000000,179000);
insert into company(imp_date,company_id,company_name,headquarter_address,company_established_time,founder,ceo,annual_turnover,employee_count) VALUES (DATEADD('DAY', -1, CURRENT_DATE()),'item_enterprise_13_135','网易公司','杭州','1997','丁磊','丁磊',67500000000,20000);
insert into brand(imp_date,brand_id,brand_name,brand_established_time,company_id,legal_representative,registered_capital) VALUES (DATEADD('DAY', -1, CURRENT_DATE()),'item_enterprise_13_136','阿里云','2009年9月10日','item_enterprise_13_134','张勇',50000000);
insert into brand(imp_date,brand_id,brand_name,brand_established_time,company_id,legal_representative,registered_capital) VALUES (DATEADD('DAY', -1, CURRENT_DATE()),'item_enterprise_13_137','天猫','2012年1月11日','item_enterprise_13_134','张勇',100000000);
insert into brand(imp_date,brand_id,brand_name,brand_established_time,company_id,legal_representative,registered_capital) VALUES (DATEADD('DAY', -1, CURRENT_DATE()),'item_enterprise_13_138','腾讯游戏','2003','item_enterprise_13_131','马化腾',50000000);
insert into brand(imp_date,brand_id,brand_name,brand_established_time,company_id,legal_representative,registered_capital) VALUES (DATEADD('DAY', -1, CURRENT_DATE()),'item_enterprise_13_139','度小满','2018','item_enterprise_13_132','朱光',100000000);
insert into brand(imp_date,brand_id,brand_name,brand_established_time,company_id,legal_representative,registered_capital) VALUES (DATEADD('DAY', -1, CURRENT_DATE()),'item_enterprise_13_140','京东金融','2017','item_enterprise_13_134','刘强东',100000000);
insert into company_revenue(imp_date,company_id,brand_id,revenue_proportion,profit_proportion,expenditure_proportion) VALUES ( DATEADD('DAY', -1, CURRENT_DATE()),'item_enterprise_13_131','item_enterprise_13_139',10,10,30);
insert into company_revenue(imp_date,company_id,brand_id,revenue_proportion,profit_proportion,expenditure_proportion) VALUES ( DATEADD('DAY', -1, CURRENT_DATE()),'item_enterprise_13_134','item_enterprise_13_138',80,80,60);
insert into company_revenue(imp_date,company_id,brand_id,revenue_proportion,profit_proportion,expenditure_proportion) VALUES ( DATEADD('DAY', -1, CURRENT_DATE()),'item_enterprise_13_135','item_enterprise_13_139',80,80,60);
insert into company_revenue(imp_date,company_id,brand_id,revenue_proportion,profit_proportion,expenditure_proportion) VALUES ( DATEADD('DAY', -1, CURRENT_DATE()),'item_enterprise_13_131','item_enterprise_13_137',80,80,60);
insert into company_revenue(imp_date,company_id,brand_id,revenue_proportion,profit_proportion,expenditure_proportion) VALUES ( DATEADD('DAY', -1, CURRENT_DATE()),'item_enterprise_13_135','item_enterprise_13_137',10,10,30);
insert into company_brand_revenue(imp_date,year_time,brand_id,revenue,profit,revenue_growth_year_on_year,profit_growth_year_on_year) VALUES (DATEADD('DAY', -1, CURRENT_DATE()), '2018','item_enterprise_13_138',500000000,-300000000,10,-10);
insert into company_brand_revenue(imp_date,year_time,brand_id,revenue,profit,revenue_growth_year_on_year,profit_growth_year_on_year) VALUES (DATEADD('DAY', -1, CURRENT_DATE()), '2019','item_enterprise_13_136',100000000000,50000000000,100,50);
insert into company_brand_revenue(imp_date,year_time,brand_id,revenue,profit,revenue_growth_year_on_year,profit_growth_year_on_year) VALUES ( DATEADD('DAY', -1, CURRENT_DATE()),'2018','item_enterprise_13_137',100000000000,50000000000,100,-10);
insert into company_brand_revenue(imp_date,year_time,brand_id,revenue,profit,revenue_growth_year_on_year,profit_growth_year_on_year) VALUES (DATEADD('DAY', -1, CURRENT_DATE()), '2018','item_enterprise_13_139',500000000,50000000000,10,50);
insert into company_brand_revenue(imp_date,year_time,brand_id,revenue,profit,revenue_growth_year_on_year,profit_growth_year_on_year) VALUES ( DATEADD('DAY', -1, CURRENT_DATE()),'2018','item_enterprise_13_138',100000000000,-300000000,10,50);
-- benchmark

View File

@@ -455,6 +455,51 @@ CREATE TABLE IF NOT EXISTS `song` (
);
COMMENT ON TABLE song IS 'song';
CREATE TABLE IF NOT EXISTS `company` (
`imp_date` varchar(50) ,
`company_id` varchar(50) NOT NULL ,
`company_name` varchar(50) NOT NULL ,
`headquarter_address` varchar(50) NOT NULL ,
`company_established_time` varchar(20) NOT NULL ,
`founder` varchar(20) NOT NULL ,
`ceo` varchar(20) NOT NULL ,
`annual_turnover` bigint(15) ,
`employee_count` int(7) ,
PRIMARY KEY (`company_id`)
);
CREATE TABLE IF NOT EXISTS `brand` (
`imp_date` varchar(50) ,
`brand_id` varchar(50) NOT NULL ,
`brand_name` varchar(50) NOT NULL ,
`brand_established_time` varchar(20) NOT NULL ,
`company_id` varchar(50) NOT NULL ,
`legal_representative` varchar(20) NOT NULL ,
`registered_capital` bigint(15) ,
PRIMARY KEY (`brand_id`)
);
CREATE TABLE IF NOT EXISTS `company_revenue` (
`imp_date` varchar(50) ,
`company_id` varchar(50) NOT NULL ,
`brand_id` varchar(50) NOT NULL ,
`revenue_proportion` double NOT NULL,
`profit_proportion` double NOT NULL ,
`expenditure_proportion` double NOT NULL
);
CREATE TABLE IF NOT EXISTS `company_brand_revenue` (
`imp_date` varchar(50) ,
`year_time` varchar(10) NOT NULL ,
`brand_id` varchar(50) NOT NULL ,
`revenue` bigint(15) NOT NULL,
`profit` bigint(15) NOT NULL ,
`revenue_growth_year_on_year` double NOT NULL ,
`profit_growth_year_on_year` double NOT NULL
);
CREATE TABLE IF NOT EXISTS s2_sys_parameter
(
id INT PRIMARY KEY AUTO_INCREMENT,
@@ -498,4 +543,4 @@ CREATE TABLE IF NOT EXISTS `s2_app` (
created_by VARCHAR(255),
updated_at TIMESTAMP,
updated_by VARCHAR(255)
);
);

View File

@@ -1,2 +1,2 @@
root=.
CustomDictionaryPath=data/dictionary/custom/DimValue_1_1.txt;data/dictionary/custom/DimValue_1_2.txt;data/dictionary/custom/DimValue_1_3.txt;data/dictionary/custom/benchmark_cspider.txt;
CustomDictionaryPath=data/dictionary/custom/DimValue_1_1.txt;data/dictionary/custom/DimValue_1_2.txt;data/dictionary/custom/DimValue_9_15.txt;data/dictionary/custom/DimValue_9_16.txt;data/dictionary/custom/DimValue_9_18.txt;data/dictionary/custom/DimValue_9_19.txt;data/dictionary/custom/DimValue_10_20.txt;data/dictionary/custom/DimValue_10_22.txt;