(improvement) Move out the datasource and merge the datasource with the model, and adapt the chat module (#423)

Co-authored-by: jolunoluo <jolunoluo@tencent.com>
This commit is contained in:
jipeli
2023-11-27 11:05:24 +08:00
committed by GitHub
parent 0534053ff9
commit 27bb1b322e
190 changed files with 3900 additions and 10561 deletions

View File

@@ -1,14 +1,7 @@
package com.tencent.supersonic.integration;
import static com.tencent.supersonic.common.pojo.enums.AggregateTypeEnum.NONE;
import static com.tencent.supersonic.common.pojo.enums.AggregateTypeEnum.SUM;
import com.tencent.supersonic.auth.api.authentication.pojo.User;
import com.tencent.supersonic.chat.api.pojo.SemanticParseInfo;
import com.tencent.supersonic.chat.api.pojo.request.ChatConfigEditReqReq;
import com.tencent.supersonic.chat.api.pojo.request.ItemVisibility;
import com.tencent.supersonic.chat.api.pojo.request.QueryFilter;
import com.tencent.supersonic.chat.api.pojo.response.ChatConfigResp;
import com.tencent.supersonic.chat.api.pojo.response.ParseResp;
import com.tencent.supersonic.chat.api.pojo.response.QueryResult;
import com.tencent.supersonic.chat.query.rule.metric.MetricFilterQuery;
@@ -19,15 +12,17 @@ import com.tencent.supersonic.common.pojo.DateConf;
import com.tencent.supersonic.common.pojo.QueryType;
import com.tencent.supersonic.common.pojo.enums.FilterOperatorEnum;
import com.tencent.supersonic.util.DataUtils;
import org.junit.Assert;
import org.junit.Test;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import org.junit.Assert;
import org.junit.Test;
import org.springframework.beans.BeanUtils;
import static com.tencent.supersonic.common.pojo.enums.AggregateTypeEnum.NONE;
import static com.tencent.supersonic.common.pojo.enums.AggregateTypeEnum.SUM;
public class MetricQueryTest extends BaseQueryTest {
@@ -46,7 +41,7 @@ public class MetricQueryTest extends BaseQueryTest {
expectedParseInfo.getMetrics().add(DataUtils.getSchemaElement("访问次数"));
expectedParseInfo.getDimensionFilters().add(DataUtils.getFilter("user_name",
FilterOperatorEnum.EQUALS, "alice", "用户", 2L));
FilterOperatorEnum.EQUALS, "alice", "用户", 2L));
expectedParseInfo.setDateInfo(DataUtils.getDateConf(DateConf.DateMode.RECENT, unit, period, startDay, endDay));
expectedParseInfo.setQueryType(QueryType.METRIC);
@@ -129,7 +124,7 @@ public class MetricQueryTest extends BaseQueryTest {
List<String> list = new ArrayList<>();
list.add("alice");
list.add("lucy");
QueryFilter dimensionFilter = DataUtils.getFilter("user_name", FilterOperatorEnum.IN, list, "用户", 2L);
QueryFilter dimensionFilter = DataUtils.getFilter("user_name", FilterOperatorEnum.IN, list, "用户", 2L);
expectedParseInfo.getDimensionFilters().add(dimensionFilter);
expectedParseInfo.setDateInfo(DataUtils.getDateConf(DateConf.DateMode.RECENT, unit, period, startDay, endDay));
@@ -150,7 +145,9 @@ public class MetricQueryTest extends BaseQueryTest {
expectedParseInfo.setAggType(SUM);
expectedParseInfo.getMetrics().add(DataUtils.getSchemaElement("访问次数"));
expectedParseInfo.getDimensions().add(DataUtils.getSchemaElement("用户"));
expectedParseInfo.getDimensions().add(DataUtils.getSchemaElement("用户名"));
expectedParseInfo.getDimensions().add(DataUtils.getSchemaElement("用户名称"));
expectedParseInfo.setDateInfo(DataUtils.getDateConf(3, DateConf.DateMode.RECENT, "DAY"));
expectedParseInfo.setQueryType(QueryType.METRIC);
@@ -195,7 +192,7 @@ public class MetricQueryTest extends BaseQueryTest {
expectedParseInfo.getMetrics().add(DataUtils.getSchemaElement("访问次数"));
expectedParseInfo.getDimensionFilters().add(DataUtils.getFilter("user_name",
FilterOperatorEnum.EQUALS, "alice", "用户", 2L));
FilterOperatorEnum.EQUALS, "alice", "用户", 2L));
expectedParseInfo.setDateInfo(DataUtils.getDateConf(DateConf.DateMode.BETWEEN, 1, period, startDay, startDay));
expectedParseInfo.setQueryType(QueryType.METRIC);
@@ -203,44 +200,4 @@ public class MetricQueryTest extends BaseQueryTest {
assertQueryResult(expectedResult, actualResult);
}
@Test
public void queryTest_config_visibility() throws Exception {
// 1. round_1 use blacklist
ChatConfigResp chatConfig = configService.fetchConfigByModelId(1L);
ChatConfigEditReqReq extendEditCmd = new ChatConfigEditReqReq();
BeanUtils.copyProperties(chatConfig, extendEditCmd);
// add blacklist
List<Long> blackMetrics = Arrays.asList(2L);
extendEditCmd.getChatAggConfig().getVisibility().setBlackMetricIdList(blackMetrics);
configService.editConfig(extendEditCmd, User.getFakeUser());
QueryResult actualResult = submitNewChat("超音数访问人数、访问次数");
QueryResult expectedResult = new QueryResult();
SemanticParseInfo expectedParseInfo = new SemanticParseInfo();
expectedResult.setChatContext(expectedParseInfo);
expectedResult.setQueryMode(MetricModelQuery.QUERY_MODE);
expectedParseInfo.setAggType(NONE);
expectedParseInfo.getMetrics().add(DataUtils.getSchemaElement("访问次数"));
expectedParseInfo.setDateInfo(DataUtils.getDateConf(DateConf.DateMode.RECENT, unit, period, startDay, endDay));
expectedParseInfo.setQueryType(QueryType.METRIC);
assertQueryResult(expectedResult, actualResult);
// 2. round_2 no blacklist
// remove blacklist
extendEditCmd.getChatAggConfig().setVisibility(new ItemVisibility());
configService.editConfig(extendEditCmd, User.getFakeUser());
actualResult = submitNewChat("超音数访问人数、访问次数");
expectedParseInfo.getMetrics().clear();
expectedParseInfo.getMetrics().add(DataUtils.getSchemaElement("访问次数"));
expectedParseInfo.getMetrics().add(DataUtils.getSchemaElement("访问人数"));
assertQueryResult(expectedResult, actualResult);
}
}

View File

@@ -3,7 +3,8 @@ com.tencent.supersonic.chat.api.component.SchemaMapper=\
com.tencent.supersonic.chat.mapper.HanlpDictMapper, \
com.tencent.supersonic.chat.mapper.FuzzyNameMapper, \
com.tencent.supersonic.chat.mapper.QueryFilterMapper, \
com.tencent.supersonic.chat.mapper.EntityMapper
com.tencent.supersonic.chat.mapper.EntityMapper, \
com.tencent.supersonic.chat.mapper.ModelClusterMapper
com.tencent.supersonic.chat.api.component.SemanticParser=\
com.tencent.supersonic.chat.parser.rule.RuleBasedParser, \

View File

@@ -80,21 +80,6 @@ CREATE TABLE IF NOT EXISTS `s2_chat_config` (
) ;
COMMENT ON TABLE s2_chat_config IS 'chat config information table ';
CREATE TABLE IF NOT EXISTS s2_agent
(
id int AUTO_INCREMENT,
name varchar(100) null,
description varchar(500) null,
status int null,
examples varchar(500) null,
config varchar(2000) null,
created_by varchar(100) null,
created_at TIMESTAMP null,
updated_by varchar(100) null,
updated_at TIMESTAMP null,
enable_search int null,
PRIMARY KEY (`id`)
); COMMENT ON TABLE s2_agent IS 'agent information table';
create table s2_user
(
@@ -134,8 +119,8 @@ CREATE TABLE IF NOT EXISTS `s2_model` (
`name` varchar(255) DEFAULT NULL , -- domain name
`biz_name` varchar(255) DEFAULT NULL , -- internal name
`domain_id` INT DEFAULT '0' , -- parent domain ID
`alias` varchar(255) DEFAULT NULL , -- alias name
`status` INT DEFAULT NULL ,
`alias` varchar(255) DEFAULT NULL , -- internal name
`status` INT DEFAULT NULL,
`description` varchar(500) DEFAULT NULL ,
`created_at` TIMESTAMP DEFAULT NULL ,
`created_by` varchar(100) DEFAULT NULL ,
@@ -148,6 +133,10 @@ CREATE TABLE IF NOT EXISTS `s2_model` (
`view_org` varchar(3000) DEFAULT NULL , -- domain available organization
`entity` varchar(500) DEFAULT NULL , -- domain entity info
`drill_down_dimensions` varchar(500) DEFAULT NULL , -- drill down dimensions info
`database_id` INT NOT NULL ,
`model_detail` LONGVARCHAR NOT NULL ,
`depends` varchar(500) DEFAULT NULL ,
`filter_sql` varchar(1000) DEFAULT NULL ,
PRIMARY KEY (`id`)
);
COMMENT ON TABLE s2_model IS 'model information';
@@ -171,16 +160,12 @@ CREATE TABLE `s2_database` (
COMMENT ON TABLE s2_database IS 'database instance table';
CREATE TABLE IF NOT EXISTS `s2_datasource` (
`id` INT NOT NULL AUTO_INCREMENT,
`model_id` INT NOT NULL ,
`name` varchar(255) NOT NULL ,
`id` INT NOT NULL AUTO_INCREMENT,
`model_id` INT NOT NULL ,
`name` varchar(255) NOT NULL ,
`biz_name` varchar(255) NOT NULL ,
`description` varchar(500) DEFAULT NULL ,
`database_id` INT NOT NULL ,
`depends` varchar(500) DEFAULT NULL ,
`datasource_detail` LONGVARCHAR NOT NULL ,
`status` int(11) DEFAULT NULL ,
`filter_sql` varchar(1000) DEFAULT NULL ,
`created_at` TIMESTAMP NOT NULL ,
`created_by` varchar(100) NOT NULL ,
`updated_at` TIMESTAMP NOT NULL ,
@@ -202,7 +187,7 @@ CREATE TABLE IF NOT EXISTS `s2_metric` (
`name` varchar(255) NOT NULL ,
`biz_name` varchar(255) NOT NULL ,
`description` varchar(500) DEFAULT NULL ,
`status` INT NOT NULL , -- status, 0 is normal, 1 is off the shelf, 2 is deleted
`status` INT NOT NULL , -- status, 0 is off the shelf, 1 is normal
`sensitive_level` INT NOT NULL ,
`type` varchar(50) NOT NULL , -- type proxy,expr
`type_params` LONGVARCHAR DEFAULT NULL ,
@@ -223,11 +208,10 @@ COMMENT ON TABLE s2_metric IS 'metric information table';
CREATE TABLE IF NOT EXISTS `s2_dimension` (
`id` INT NOT NULL AUTO_INCREMENT ,
`model_id` INT NOT NULL ,
`datasource_id` INT NOT NULL ,
`name` varchar(255) NOT NULL ,
`biz_name` varchar(255) NOT NULL ,
`description` varchar(500) NOT NULL ,
`status` INT NOT NULL , -- status, 0 is normal, 1 is off the shelf, 2 is deleted
`status` INT NOT NULL , -- status, 0 is off the shelf, 1 is normal
`sensitive_level` INT DEFAULT NULL ,
`data_type` varchar(50) DEFAULT NULL , -- type date,array,varchar
`type` varchar(50) NOT NULL , -- type categorical,time
@@ -246,20 +230,16 @@ CREATE TABLE IF NOT EXISTS `s2_dimension` (
);
COMMENT ON TABLE s2_dimension IS 'dimension information table';
create table s2_datasource_rela
CREATE TABLE s2_model_rela
(
id INT AUTO_INCREMENT,
model_id INT null,
datasource_from INT null,
datasource_to INT null,
join_key varchar(100) null,
created_at TIMESTAMP null,
created_by varchar(100) null,
updated_at TIMESTAMP null,
updated_by varchar(100) null,
id BIGINT AUTO_INCREMENT,
domain_id BIGINT,
from_model_id BIGINT,
to_model_id BIGINT,
join_type VARCHAR(255),
join_condition VARCHAR(255),
PRIMARY KEY (`id`)
);
COMMENT ON TABLE s2_datasource_rela IS 'data source association table';
create table s2_view_info
(
@@ -295,7 +275,6 @@ CREATE TABLE `s2_query_stat_info` (
`native_query` INT DEFAULT NULL, -- 1-detail query, 0-aggregation query
`start_date` varchar(50) DEFAULT NULL,
`end_date` varchar(50) DEFAULT NULL,
`query_opt_mode` varchar(50) DEFAULT NULL,
`dimensions`LONGVARCHAR , -- dimensions involved in sql
`metrics`LONGVARCHAR , -- metric involved in sql
`select_cols`LONGVARCHAR ,
@@ -307,6 +286,7 @@ CREATE TABLE `s2_query_stat_info` (
`use_sql_cache` TINYINT DEFAULT '-1' , -- whether to hit the sql cache
`sql_cache_key`LONGVARCHAR , -- sql cache key
`result_cache_key`LONGVARCHAR , -- result cache key
`query_opt_mode` varchar(50) DEFAULT NULL ,
PRIMARY KEY (`id`)
) ;
COMMENT ON TABLE s2_query_stat_info IS 'query statistics table';
@@ -370,6 +350,22 @@ CREATE TABLE IF NOT EXISTS `s2_plugin`
PRIMARY KEY (`id`)
); COMMENT ON TABLE s2_plugin IS 'plugin information table';
CREATE TABLE IF NOT EXISTS s2_agent
(
id int AUTO_INCREMENT,
name varchar(100) null,
description varchar(500) null,
status int null,
examples varchar(500) null,
config varchar(2000) null,
created_by varchar(100) null,
created_at TIMESTAMP null,
updated_by varchar(100) null,
updated_at TIMESTAMP null,
enable_search int null,
PRIMARY KEY (`id`)
); COMMENT ON TABLE s2_agent IS 'agent information table';
-------demo for semantic and chat
CREATE TABLE IF NOT EXISTS `s2_user_department` (
@@ -405,8 +401,68 @@ CREATE TABLE IF NOT EXISTS `singer` (
);
COMMENT ON TABLE singer IS 'singer_info';
CREATE TABLE IF NOT EXISTS `s2_dictionary_task` (
`id` INT NOT NULL AUTO_INCREMENT,
`name` varchar(255) NOT NULL , -- task name
`description` varchar(255) ,
`command`LONGVARCHAR NOT NULL , -- task Request Parameters
`command_md5` varchar(255) NOT NULL , -- task Request Parameters md5
`status` INT NOT NULL , -- the final status of the task
`dimension_ids` varchar(500) NULL ,
`created_at` TIMESTAMP DEFAULT CURRENT_TIMESTAMP ,
`created_by` varchar(100) NOT NULL ,
`progress` DOUBLE default 0.00 , -- task real-time progress
`elapsed_ms` bigINT DEFAULT NULL , -- the task takes time in milliseconds
`message` LONGVARCHAR , -- remark related information
PRIMARY KEY (`id`)
);
COMMENT ON TABLE s2_dictionary_task IS 'dictionary task information table';
-- benchmark
CREATE TABLE IF NOT EXISTS `genre` (
`g_name` varchar(20) NOT NULL , -- genre name
`rating` INT ,
`most_popular_in` varchar(50) ,
PRIMARY KEY (`g_name`)
);
COMMENT ON TABLE genre IS 'genre';
CREATE TABLE IF NOT EXISTS `artist` (
`artist_name` varchar(50) NOT NULL , -- genre name
`country` varchar(20) ,
`gender` varchar(20) ,
`g_name` varchar(50)
);
COMMENT ON TABLE artist IS 'artist';
CREATE TABLE IF NOT EXISTS `files` (
`f_id` bigINT NOT NULL,
`artist_name` varchar(50) ,
`file_size` varchar(20) ,
`duration` varchar(20) ,
`formats` varchar(20) ,
PRIMARY KEY (`f_id`)
);
COMMENT ON TABLE files IS 'files';
CREATE TABLE IF NOT EXISTS `song` (
`imp_date` varchar(50) ,
`song_name` varchar(50) ,
`artist_name` varchar(50) ,
`country` varchar(20) ,
`f_id` bigINT ,
`g_name` varchar(20) ,
`rating` INT ,
`languages` varchar(20) ,
`releasedate` varchar(50) ,
`resolution` bigINT NOT NULL
);
COMMENT ON TABLE song IS 'song';
-- benchmark
create table s2_materialization
(
id int AUTO_INCREMENT ,
@@ -469,13 +525,9 @@ CREATE TABLE s2_materialization_record
PRIMARY KEY (`id`)
);
CREATE TABLE s2_sys_parameter
(
id INT PRIMARY KEY AUTO_INCREMENT,
admin varchar(500),
parameters text null
);
);