diff --git a/assembly/bin/supersonic-build.sh b/assembly/bin/supersonic-build.sh index 3f0fc75e9..335147ffa 100755 --- a/assembly/bin/supersonic-build.sh +++ b/assembly/bin/supersonic-build.sh @@ -67,4 +67,4 @@ moveToRuntime standalone setEnvToWeb chat setEnvToWeb semantic -rm -fr ${buildDir}/webapp \ No newline at end of file +rm -fr ${buildDir}/webapp diff --git a/assembly/bin/supersonic-daemon.sh b/assembly/bin/supersonic-daemon.sh index 49dfa53bd..ae4e3eb37 100755 --- a/assembly/bin/supersonic-daemon.sh +++ b/assembly/bin/supersonic-daemon.sh @@ -192,4 +192,4 @@ case "$command" in *) echo "Use command {start|stop|restart} to run." exit 1 -esac \ No newline at end of file +esac diff --git a/chat/api/src/main/java/com/tencent/supersonic/chat/api/pojo/request/QueryDataReq.java b/chat/api/src/main/java/com/tencent/supersonic/chat/api/pojo/request/QueryDataReq.java index b93804ff0..af9a1425e 100644 --- a/chat/api/src/main/java/com/tencent/supersonic/chat/api/pojo/request/QueryDataReq.java +++ b/chat/api/src/main/java/com/tencent/supersonic/chat/api/pojo/request/QueryDataReq.java @@ -1,25 +1,20 @@ package com.tencent.supersonic.chat.api.pojo.request; +import com.tencent.supersonic.auth.api.authentication.pojo.User; import com.tencent.supersonic.chat.api.pojo.SchemaElement; import com.tencent.supersonic.common.pojo.DateConf; -import com.tencent.supersonic.common.pojo.Order; -import com.tencent.supersonic.common.pojo.enums.AggregateTypeEnum; import java.util.HashSet; import java.util.Set; import lombok.Data; @Data public class QueryDataReq { - String queryMode; - SchemaElement model; - Set metrics = new HashSet<>(); - Set dimensions = new HashSet<>(); - Set dimensionFilters = new HashSet<>(); - Set metricFilters = new HashSet<>(); - private AggregateTypeEnum aggType = AggregateTypeEnum.NONE; - private Set orders = new HashSet<>(); + private User user; + private Set metrics = new HashSet<>(); + private Set dimensions = new HashSet<>(); + private Set dimensionFilters = new HashSet<>(); private DateConf dateInfo; - private Long limit; - private Boolean nativeQuery = false; + private Long queryId = 7L; + private Integer parseId = 2; } diff --git a/chat/core/src/main/java/com/tencent/supersonic/chat/rest/ChatQueryController.java b/chat/core/src/main/java/com/tencent/supersonic/chat/rest/ChatQueryController.java index f88b24ea8..d27d36842 100644 --- a/chat/core/src/main/java/com/tencent/supersonic/chat/rest/ChatQueryController.java +++ b/chat/core/src/main/java/com/tencent/supersonic/chat/rest/ChatQueryController.java @@ -72,6 +72,7 @@ public class ChatQueryController { public Object queryData(@RequestBody QueryDataReq queryData, HttpServletRequest request, HttpServletResponse response) throws Exception { + queryData.setUser(UserHolder.findUser(request, response)); return queryService.executeDirectQuery(queryData, UserHolder.findUser(request, response)); } diff --git a/chat/core/src/main/java/com/tencent/supersonic/chat/service/impl/QueryServiceImpl.java b/chat/core/src/main/java/com/tencent/supersonic/chat/service/impl/QueryServiceImpl.java index e569efe60..63a054454 100644 --- a/chat/core/src/main/java/com/tencent/supersonic/chat/service/impl/QueryServiceImpl.java +++ b/chat/core/src/main/java/com/tencent/supersonic/chat/service/impl/QueryServiceImpl.java @@ -3,11 +3,14 @@ package com.tencent.supersonic.chat.service.impl; import com.tencent.supersonic.auth.api.authentication.pojo.User; import com.tencent.supersonic.chat.api.component.SchemaMapper; +import com.tencent.supersonic.chat.api.component.SemanticLayer; import com.tencent.supersonic.chat.api.component.SemanticQuery; import com.tencent.supersonic.chat.api.component.SemanticParser; import com.tencent.supersonic.chat.api.pojo.ChatContext; import com.tencent.supersonic.chat.api.pojo.QueryContext; import com.tencent.supersonic.chat.api.pojo.SemanticParseInfo; +import com.tencent.supersonic.chat.api.pojo.request.QueryDataReq; +import com.tencent.supersonic.chat.api.pojo.request.QueryFilter; import com.tencent.supersonic.chat.api.pojo.request.DimensionValueReq; import com.tencent.supersonic.chat.api.pojo.request.ExecuteQueryReq; import com.tencent.supersonic.chat.api.pojo.request.QueryReq; @@ -15,37 +18,43 @@ import com.tencent.supersonic.chat.api.pojo.response.EntityInfo; import com.tencent.supersonic.chat.api.pojo.response.ParseResp; import com.tencent.supersonic.chat.api.pojo.response.QueryResult; import com.tencent.supersonic.chat.api.pojo.response.QueryState; +import com.tencent.supersonic.chat.parser.llm.dsl.DSLParseResult; import com.tencent.supersonic.chat.persistence.dataobject.ChatParseDO; import com.tencent.supersonic.chat.persistence.dataobject.CostType; import com.tencent.supersonic.chat.persistence.dataobject.StatisticsDO; import com.tencent.supersonic.chat.query.QuerySelector; -import com.tencent.supersonic.chat.api.pojo.request.QueryDataReq; import com.tencent.supersonic.chat.query.QueryManager; +import com.tencent.supersonic.chat.query.llm.dsl.DslQuery; +import com.tencent.supersonic.chat.query.llm.dsl.LLMResp; import com.tencent.supersonic.chat.service.ChatService; import com.tencent.supersonic.chat.service.QueryService; import com.tencent.supersonic.chat.service.SemanticService; import com.tencent.supersonic.chat.service.StatisticsService; import com.tencent.supersonic.chat.utils.ComponentFactory; +import java.util.Map; import com.tencent.supersonic.semantic.api.model.response.ExplainResp; import java.util.List; import java.util.ArrayList; import java.util.Set; import java.util.HashSet; +import java.util.HashMap; import java.util.Comparator; import java.util.Objects; import java.util.stream.Collectors; +import com.tencent.supersonic.common.pojo.Constants; import com.tencent.supersonic.common.pojo.DateConf; import com.tencent.supersonic.common.util.ContextUtils; import com.tencent.supersonic.common.util.JsonUtil; +import com.tencent.supersonic.common.util.jsqlparser.SqlParserUpdateHelper; import com.tencent.supersonic.semantic.api.model.response.QueryResultWithSchemaResp; import com.tencent.supersonic.semantic.api.query.enums.FilterOperatorEnum; import com.tencent.supersonic.semantic.api.query.pojo.Filter; import com.tencent.supersonic.semantic.api.query.request.QueryStructReq; import lombok.extern.slf4j.Slf4j; import org.apache.calcite.sql.parser.SqlParseException; -import org.springframework.beans.BeanUtils; +import org.apache.commons.collections.CollectionUtils; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.Primary; @@ -258,8 +267,52 @@ public class QueryServiceImpl implements QueryService { @Override public QueryResult executeDirectQuery(QueryDataReq queryData, User user) throws SqlParseException { - SemanticQuery semanticQuery = QueryManager.createRuleQuery(queryData.getQueryMode()); - BeanUtils.copyProperties(queryData, semanticQuery.getParseInfo()); + ChatParseDO chatParseDO = chatService.getParseInfo(queryData.getQueryId(), + queryData.getUser().getName(), queryData.getParseId()); + SemanticParseInfo parseInfo = JsonUtil.toObject(chatParseDO.getParseInfo(), SemanticParseInfo.class); + if (!parseInfo.getQueryMode().equals(DslQuery.QUERY_MODE)) { + if (CollectionUtils.isNotEmpty(queryData.getDimensions())) { + parseInfo.setDimensions(queryData.getDimensions()); + } + if (CollectionUtils.isNotEmpty(queryData.getMetrics())) { + parseInfo.setMetrics(queryData.getMetrics()); + } + if (CollectionUtils.isNotEmpty(queryData.getDimensionFilters())) { + parseInfo.setDimensionFilters(queryData.getDimensionFilters()); + } + } + if (Objects.nonNull(queryData.getDateInfo())) { + parseInfo.setDateInfo(queryData.getDateInfo()); + } + if (parseInfo.getQueryMode().equals(DslQuery.QUERY_MODE) + && CollectionUtils.isNotEmpty(queryData.getDimensionFilters())) { + Map> filedNameToValueMap = new HashMap<>(); + String json = JsonUtil.toString(parseInfo.getProperties().get(Constants.CONTEXT)); + DSLParseResult dslParseResult = JsonUtil.toObject(json, DSLParseResult.class); + LLMResp llmResp = dslParseResult.getLlmResp(); + String correctorSql = llmResp.getCorrectorSql(); + log.info("correctorSql before replacing:{}", correctorSql); + for (QueryFilter dslQueryFilter : queryData.getDimensionFilters()) { + for (QueryFilter queryFilter : parseInfo.getDimensionFilters()) { + if (dslQueryFilter.getBizName().equals(queryFilter.getBizName())) { + Map map = new HashMap<>(); + map.put(queryFilter.getValue().toString(), dslQueryFilter.getValue().toString()); + filedNameToValueMap.put(dslQueryFilter.getBizName(), map); + break; + } + } + } + log.info("filedNameToValueMap:{}", filedNameToValueMap); + correctorSql = SqlParserUpdateHelper.replaceValue(correctorSql, filedNameToValueMap); + log.info("correctorSql after replacing:{}", correctorSql); + llmResp.setCorrectorSql(correctorSql); + dslParseResult.setLlmResp(llmResp); + Map properties = new HashMap<>(); + properties.put(Constants.CONTEXT, dslParseResult); + parseInfo.setProperties(properties); + } + SemanticQuery semanticQuery = QueryManager.createQuery(parseInfo.getQueryMode()); + semanticQuery.setParseInfo(parseInfo); QueryResult queryResult = semanticQuery.execute(user); queryResult.setChatContext(semanticQuery.getParseInfo()); return queryResult; @@ -267,8 +320,6 @@ public class QueryServiceImpl implements QueryService { @Override public Object queryDimensionValue(DimensionValueReq dimensionValueReq, User user) throws Exception { - com.tencent.supersonic.semantic.query.service.QueryService queryService = - ContextUtils.getBean(com.tencent.supersonic.semantic.query.service.QueryService.class); QueryStructReq queryStructReq = new QueryStructReq(); DateConf dateConf = new DateConf(); @@ -292,7 +343,8 @@ public class QueryServiceImpl implements QueryService { dimensionFilters.add(dimensionFilter); queryStructReq.setDimensionFilters(dimensionFilters); } - QueryResultWithSchemaResp queryResultWithSchemaResp = queryService.queryByStructWithAuth(queryStructReq, user); + SemanticLayer semanticLayer = ComponentFactory.getSemanticLayer(); + QueryResultWithSchemaResp queryResultWithSchemaResp = semanticLayer.queryByStruct(queryStructReq, user); Set dimensionValues = new HashSet<>(); queryResultWithSchemaResp.getResultList().removeIf(o -> { if (dimensionValues.contains(o.get(dimensionValueReq.getBizName()))) { diff --git a/chat/core/src/main/python/run_config.py b/chat/core/src/main/python/run_config.py index 2d4cbaf53..962c77669 100644 --- a/chat/core/src/main/python/run_config.py +++ b/chat/core/src/main/python/run_config.py @@ -26,4 +26,4 @@ if __name__ == '__main__': print('EMB_MODEL_PATH: ', HF_TEXT2VEC_MODEL_NAME) print('CHROMA_DB_PERSIST_PATH: ', CHROMA_DB_PERSIST_PATH) print('LLMPARSER_HOST: ', LLMPARSER_HOST) - print('LLMPARSER_PORT: ', LLMPARSER_PORT) \ No newline at end of file + print('LLMPARSER_PORT: ', LLMPARSER_PORT) diff --git a/chat/knowledge/src/main/java/com/tencent/supersonic/knowledge/semantic/LocalSemanticLayer.java b/chat/knowledge/src/main/java/com/tencent/supersonic/knowledge/semantic/LocalSemanticLayer.java index 81a445afc..902c3c83a 100644 --- a/chat/knowledge/src/main/java/com/tencent/supersonic/knowledge/semantic/LocalSemanticLayer.java +++ b/chat/knowledge/src/main/java/com/tencent/supersonic/knowledge/semantic/LocalSemanticLayer.java @@ -57,17 +57,13 @@ public class LocalSemanticLayer extends BaseSemanticLayer { } @Override + @SneakyThrows public QueryResultWithSchemaResp queryByDsl(QueryDslReq queryDslReq, User user) { - try { - queryService = ContextUtils.getBean(QueryService.class); - Object object = queryService.queryBySql(queryDslReq, user); - QueryResultWithSchemaResp queryResultWithSchemaResp = JsonUtil.toObject(JsonUtil.toString(object), + queryService = ContextUtils.getBean(QueryService.class); + Object object = queryService.queryBySql(queryDslReq, user); + QueryResultWithSchemaResp queryResultWithSchemaResp = JsonUtil.toObject(JsonUtil.toString(object), QueryResultWithSchemaResp.class); - return queryResultWithSchemaResp; - } catch (Exception e) { - log.info("queryByDsl has an exception:{}", e); - } - return null; + return queryResultWithSchemaResp; } @Override diff --git a/launchers/standalone/src/main/resources/data/dictionary/custom/benchmark_cspider.txt b/launchers/standalone/src/main/resources/data/dictionary/custom/benchmark_cspider.txt new file mode 100644 index 000000000..ec3d5443f --- /dev/null +++ b/launchers/standalone/src/main/resources/data/dictionary/custom/benchmark_cspider.txt @@ -0,0 +1,36 @@ +tagore _3_8 9000 +nazrul _3_8 9000 +民间 _3_8 9000 +现代 _3_8 9000 +蓝调 _3_8 9000 +流行 _3_8 9000 +孟加拉国 _3_10 9000 +锡尔赫特、吉大港、库斯蒂亚 _3_10 9000 +加拿大 _3_10 9000 +美国 _3_10 9000 +Shrikanta _3_11 9000 +Prity _3_11 9000 +Farida _3_11 9000 +Topu _3_11 9000 +Enrique _3_11 9000 +Michel _3_11 9000 +孟加拉国 _3_12 9000 +印度 _3_12 9000 +美国 _3_12 9000 +英国 _3_12 9000 +男性 _3_13 9000 +女性 _3_13 9000 +mp4 _3_19 9000 +mp3 _3_19 9000 +Tumi#长袍#尼罗布 _3_20 9000 +舒克诺#帕塔尔#努普尔#帕埃 _3_20 9000 +阿米·奥帕尔·霍伊 _3_20 9000 +我的爱 _3_20 9000 +打败它 _3_20 9000 +阿杰伊阿卡什 _3_20 9000 +孟加拉国 _3_22 9000 +印度 _3_22 9000 +美国 _3_22 9000 +英国 _3_22 9000 +孟加拉语 _3_26 9000 +英文 _3_26 9000 diff --git a/launchers/standalone/src/main/resources/db/data-h2.sql b/launchers/standalone/src/main/resources/db/data-h2.sql index 68709c0c2..8e680bec1 100644 --- a/launchers/standalone/src/main/resources/db/data-h2.sql +++ b/launchers/standalone/src/main/resources/db/data-h2.sql @@ -1108,3 +1108,67 @@ INSERT INTO s2_stay_time_statis (imp_date, user_name, stay_hours, page) VALUES ( INSERT INTO s2_stay_time_statis (imp_date, user_name, stay_hours, page) VALUES (DATEADD('DAY', -19, CURRENT_DATE()), 'alice', '0.8131712486302015', 'p2'); INSERT INTO s2_stay_time_statis (imp_date, user_name, stay_hours, page) VALUES (DATEADD('DAY', -15, CURRENT_DATE()), 'lucy', '0.8124302447925607', 'p4'); INSERT INTO s2_stay_time_statis (imp_date, user_name, stay_hours, page) VALUES (DATEADD('DAY', -8, CURRENT_DATE()), 'lucy', '0.039935860913407284', 'p2'); + + + +-- benchmark +-- CSpider music_1 +insert into s2_domain (id, `name`, biz_name, parent_id, status, created_at, created_by, updated_at, updated_by, `admin`, admin_org, viewer, view_org) VALUES(2, '音乐', 'music', 0, 1, '2023-05-24 00:00:00', 'admin', '2023-05-24 00:00:00', 'admin', 'admin', '', 'admin,tom,jack', 'admin' ); +insert into s2_model (id, `name`, biz_name, domain_id, created_at, created_by, updated_at, updated_by, `admin`, admin_org, is_open, viewer, view_org, entity) VALUES(3, '音乐', 'music', 2, '2023-05-24 00:00:00', 'admin', '2023-05-24 00:00:00', 'admin', 'admin', '', 0, 'admin,tom,jack', 'admin','{"entityId": 7, "names": ["音乐"]}' ); +insert into s2_datasource (id , model_id, `name`, biz_name, description, database_id ,datasource_detail, created_at, created_by, updated_at, updated_by ) VALUES(5, 3, '艺术类型', 'genre', '艺术类型', 1, '{"dimensions":[{"bizName":"most_popular_in","dateFormat":"yyyy-MM-dd","expr":"most_popular_in","isCreateDimension":0,"type":"categorical"}],"identifiers":[{"bizName":"g_name","name":"音乐类型名称","type":"primary"}],"measures":[{"agg":"sum","bizName":"rating","expr":"rating","isCreateMetric":1,"name":"评分"}],"queryType":"sql_query","sqlQuery":"SELECT g_name, rating,most_popular_in FROM genre"}', '2023-05-25 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin'); +insert into s2_datasource (id , model_id, `name`, biz_name, description, database_id ,datasource_detail, created_at, created_by, updated_at, updated_by ) VALUES(6, 3, '艺术家', 'artist', '艺术家', 1, '{"dimensions":[{"bizName":"country","dateFormat":"yyyy-MM-dd","expr":"country","isCreateDimension":0,"type":"categorical"},{"bizName":"gender","dateFormat":"yyyy-MM-dd","expr":"gender","isCreateDimension":0,"type":"categorical"}],"identifiers":[{"bizName":"artist_name","name":"艺术家名称","type":"primary"},{"bizName":"g_name","name":"音乐类型名称","type":"foreign"}],"measures":[],"queryType":"sql_query","sqlQuery":"SELECT artist_name,country,gender,g_name FROM artist"}', '2023-05-25 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin'); +insert into s2_datasource (id , model_id, `name`, biz_name, description, database_id ,datasource_detail, created_at, created_by, updated_at, updated_by ) VALUES(7, 3, '文件', 'files', '文件', 1, '{"dimensions":[{"bizName":"duration","dateFormat":"yyyy-MM-dd","expr":"duration","isCreateDimension":0,"type":"categorical"},{"bizName":"formats","dateFormat":"yyyy-MM-dd","expr":"formats","isCreateDimension":0,"type":"categorical"}],"identifiers":[{"bizName":"f_id","name":"歌曲ID","type":"primary"},{"bizName":"artist_name","name":"艺术家名称","type":"foreign"}],"measures":[],"queryType":"sql_query","sqlQuery":"SELECT f_id,artist_name,file_size,duration,formats FROM files"}', '2023-05-25 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin'); +insert into s2_datasource (id , model_id, `name`, biz_name, description, database_id ,datasource_detail, created_at, created_by, updated_at, updated_by ) VALUES(8, 3, '歌曲', 'song', '歌曲', 1, '{"dimensions":[{"bizName":"imp_date","dateFormat":"yyyy-MM-dd","expr":"imp_date","isCreateDimension":0,"type":"time","typeParams":{"isPrimary":"true","timeGranularity":"day"}},{"bizName":"country","dateFormat":"yyyy-MM-dd","expr":"country","isCreateDimension":1,"name":"国家","type":"categorical"},{"bizName":"languages","dateFormat":"yyyy-MM-dd","expr":"languages","isCreateDimension":1,"name":"语种","type":"categorical"},{"bizName":"releasedate","dateFormat":"yyyy-MM-dd","expr":"releasedate","isCreateDimension":1,"name":"发行时间","type":"categorical"},{"bizName":"rating","dateFormat":"yyyy-MM-dd","expr":"rating","isCreateDimension":1,"name":"评分","type":"categorical"}],"identifiers":[{"bizName":"song_name","name":"歌曲名称","type":"primary"},{"bizName":"f_id","name":"歌曲ID","type":"foreign"},{"bizName":"artist_name","name":"艺术家名称","type":"foreign"},{"bizName":"g_name","name":"音乐类型名称","type":"foreign"}],"measures":[{"agg":"sum","bizName":"resolution","expr":"resolution","isCreateMetric":1,"name":"分辨率"},{"agg":"sum","bizName":"rating","expr":"rating","isCreateMetric":1,"name":"评分"}],"queryType":"sql_query","sqlQuery":"SELECT imp_date,song_name,artist_name,country,f_id,g_name,rating,languages,releasedate,resolution FROM song "}', '2023-05-25 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin'); +insert into s2_datasource_rela (id , model_id, `datasource_from`, datasource_to, join_key, created_at, created_by, updated_at, updated_by ) VALUES(4, 3, 5, 6, 'g_name', '2023-05-25 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin'); +insert into s2_datasource_rela (id , model_id, `datasource_from`, datasource_to, join_key, created_at, created_by, updated_at, updated_by ) VALUES(5, 3, 6, 7, 'artist_name', '2023-05-25 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin'); +insert into s2_datasource_rela (id , model_id, `datasource_from`, datasource_to, join_key, created_at, created_by, updated_at, updated_by ) VALUES(6, 3, 6, 8, 'artist_name', '2023-05-25 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin'); +insert into s2_datasource_rela (id , model_id, `datasource_from`, datasource_to, join_key, created_at, created_by, updated_at, updated_by ) VALUES(7, 3, 5, 8, 'g_name', '2023-05-25 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin'); +insert into s2_datasource_rela (id , model_id, `datasource_from`, datasource_to, join_key, created_at, created_by, updated_at, updated_by ) VALUES(8, 3, 7, 8, 'f_id', '2023-05-25 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin'); + + +insert into s2_dimension (id , model_id, datasource_id, `name`, biz_name, description, status, sensitive_level, `type`, type_params, expr, created_at, created_by, updated_at, updated_by, semantic_type) VALUES(8, 3, 5, '音乐类型名称', 'g_name', '音乐类型名称', 1, 0, 'primary', NULL, 'g_name', '2023-05-24 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin', 'CATEGORY'); +insert into s2_dimension (id , model_id, datasource_id, `name`, biz_name, description, status, sensitive_level, `type`, type_params, expr, created_at, created_by, updated_at, updated_by, semantic_type) VALUES(10, 3, 5, '最受欢迎', 'most_popular_in', '最受欢迎', 1, 0, 'categorical', NULL, 'most_popular_in', '2023-05-24 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin', 'CATEGORY'); +insert into s2_dimension (id , model_id, datasource_id, `name`, biz_name, description, status, sensitive_level, `type`, type_params, expr, created_at, created_by, updated_at, updated_by, semantic_type) VALUES(11, 3, 6, '艺术家名称', 'artist_name', '艺术家名称', 1, 0, 'primary', NULL, 'artist_name', '2023-05-24 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin', 'CATEGORY'); +insert into s2_dimension (id , model_id, datasource_id, `name`, biz_name, description, status, sensitive_level, `type`, type_params, expr, created_at, created_by, updated_at, updated_by, semantic_type) VALUES(12, 3, 6, '国籍', 'country', '国籍', 1, 0, 'categorical', NULL, 'country', '2023-05-24 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin', 'CATEGORY'); +insert into s2_dimension (id , model_id, datasource_id, `name`, biz_name, description, status, sensitive_level, `type`, type_params, expr, created_at, created_by, updated_at, updated_by, semantic_type) VALUES(13, 3, 6, '性别', 'gender', '性别', 1, 0, 'categorical', NULL, 'gender', '2023-05-24 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin', 'CATEGORY'); +insert into s2_dimension (id , model_id, datasource_id, `name`, biz_name, description, status, sensitive_level, `type`, type_params, expr, created_at, created_by, updated_at, updated_by, semantic_type) VALUES(15, 3, 7, '歌曲ID', 'f_id', '歌曲ID', 1, 0, 'primary', NULL, 'f_id', '2023-05-24 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin', 'CATEGORY'); +insert into s2_dimension (id , model_id, datasource_id, `name`, biz_name, description, status, sensitive_level, `type`, type_params, expr, created_at, created_by, updated_at, updated_by, semantic_type) VALUES(17, 3, 7, '文件大小', 'file_size', '文件大小', 1, 0, 'categorical', NULL, 'file_size', '2023-05-24 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin', 'CATEGORY'); +insert into s2_dimension (id , model_id, datasource_id, `name`, biz_name, description, status, sensitive_level, `type`, type_params, expr, created_at, created_by, updated_at, updated_by, semantic_type) VALUES(18, 3, 7, '持续时间', 'duration', '持续时间', 1, 0, 'categorical', NULL, 'duration', '2023-05-24 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin', 'CATEGORY'); +insert into s2_dimension (id , model_id, datasource_id, `name`, biz_name, description, status, sensitive_level, `type`, type_params, expr, created_at, created_by, updated_at, updated_by, semantic_type) VALUES(19, 3, 7, '文件格式', 'formats', '文件格式', 1, 0, 'categorical', NULL, 'formats', '2023-05-24 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin', 'CATEGORY'); +insert into s2_dimension (id , model_id, datasource_id, `name`, biz_name, description, status, sensitive_level, `type`, type_params, expr, created_at, created_by, updated_at, updated_by, semantic_type) VALUES(20, 3, 8, '歌曲名称', 'song_name', '歌曲名称', 1, 0, 'primary', NULL, 'song_name', '2023-05-24 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin', 'CATEGORY'); +insert into s2_dimension (id , model_id, datasource_id, `name`, biz_name, description, status, sensitive_level, `type`, type_params, expr, created_at, created_by, updated_at, updated_by, semantic_type) VALUES(22, 3, 8, '国籍', 'country', '国籍', 1, 0, 'categorical', NULL, 'country', '2023-05-24 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin', 'CATEGORY'); +insert into s2_dimension (id , model_id, datasource_id, `name`, biz_name, description, status, sensitive_level, `type`, type_params, expr, created_at, created_by, updated_at, updated_by, semantic_type) VALUES(26, 3, 8, '语种', 'languages', '语种', 1, 0, 'categorical', NULL, 'languages', '2023-05-24 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin', 'CATEGORY'); +insert into s2_dimension (id , model_id, datasource_id, `name`, biz_name, description, status, sensitive_level, `type`, type_params, expr, created_at, created_by, updated_at, updated_by, semantic_type) VALUES(27, 3, 8, '发行时间', 'releasedate', '发行时间', 1, 0, 'categorical', NULL, 'releasedate', '2023-05-24 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin', 'CATEGORY'); +insert into s2_metric (id, model_id, `name`, biz_name, description, status, sensitive_level, `type`, type_params, created_at, created_by, updated_at, updated_by, data_format_type, data_format) VALUES(7, 3, '分辨率', 'resolution', '分辨率', 1, 0, 'ATOMIC', ' {"expr":"resolution","measures":[{"agg":"sum","bizName":"resolution","datasourceId":8,"expr":"resolution","isCreateMetric":1,"name":"resolution"}]}' , '2023-05-24 17:00:00', 'admin', '2023-05-25 00:00:00', 'admin', NULL, NULL ); +insert into s2_metric (id, model_id, `name`, biz_name, description, status, sensitive_level, `type`, type_params, created_at, created_by, updated_at, updated_by, data_format_type, data_format) VALUES(9, 3, '评分', 'rating', '评分', 1, 0, 'ATOMIC', ' {"expr":"rating","measures":[{"agg":"sum","bizName":"rating","datasourceId":8,"expr":"rating","isCreateMetric":1,"name":"rating"}]}' , '2023-05-24 17:00:00', 'admin', '2023-05-25 00:00:00', 'admin', NULL, NULL ); + + +insert into genre(g_name,rating,most_popular_in) VALUES ('tagore',8,'孟加拉国'); +insert into genre(g_name,rating,most_popular_in) VALUES ('nazrul',7,'孟加拉国'); +insert into genre(g_name,rating,most_popular_in) VALUES ('民间',9,'锡尔赫特、吉大港、库斯蒂亚'); +insert into genre(g_name,rating,most_popular_in) VALUES ('现代',8,'孟加拉国'); +insert into genre(g_name,rating,most_popular_in) VALUES ('蓝调',7,'加拿大'); +insert into genre(g_name,rating,most_popular_in) VALUES ('流行',9,'美国'); + +insert into artist(artist_name,country,gender,g_name) VALUES ('Shrikanta','印度','男性','tagore'); +insert into artist(artist_name,country,gender,g_name) VALUES ('Prity','孟加拉国','女性','nazrul'); +insert into artist(artist_name,country,gender,g_name) VALUES ('Farida','孟加拉国','女性','民间'); +insert into artist(artist_name,country,gender,g_name) VALUES ('Topu','印度','女性','现代'); +insert into artist(artist_name,country,gender,g_name) VALUES ('Enrique','美国','男性','蓝调'); +insert into artist(artist_name,country,gender,g_name) VALUES ('Michel','英国','男性','流行'); + +insert into files(f_id,artist_name,file_size,duration,formats) VALUES (1,'Shrikanta','3.78 MB','3:45','mp4'); +insert into files(f_id,artist_name,file_size,duration,formats) VALUES (2,'Prity','4.12 MB','2:56','mp3'); +insert into files(f_id,artist_name,file_size,duration,formats) VALUES (3,'Farida','3.69 MB','4:12','mp4'); +insert into files(f_id,artist_name,file_size,duration,formats) VALUES (4,'Enrique','4.58 MB','5:23','mp4'); +insert into files(f_id,artist_name,file_size,duration,formats) VALUES (5,'Michel','5.10 MB','4:34','mp3'); +insert into files(f_id,artist_name,file_size,duration,formats) VALUES (6,'Topu','4.10 MB','4:30','mp4'); + +insert into song(imp_date,song_name,artist_name,country,f_id,g_name,rating,languages,releasedate,resolution) VALUES (DATEADD('DAY', 0, CURRENT_DATE()),'Tumi 长袍 尼罗布','Shrikanta','印度',1,'tagore',8,'孟加拉语','28-AUG-2011',1080); +insert into song(imp_date,song_name,artist_name,country,f_id,g_name,rating,languages,releasedate,resolution) VALUES (DATEADD('DAY', 0, CURRENT_DATE()),'舒克诺 帕塔尔 努普尔 帕埃','Prity','孟加拉国',2,'nazrul',5,'孟加拉语','21-SEP-1997',512); +insert into song(imp_date,song_name,artist_name,country,f_id,g_name,rating,languages,releasedate,resolution) VALUES (DATEADD('DAY', 0, CURRENT_DATE()),'阿米·奥帕尔·霍伊','Farida','孟加拉国',3,'民间',7,'孟加拉语','7-APR-2001',320); +insert into song(imp_date,song_name,artist_name,country,f_id,g_name,rating,languages,releasedate,resolution) VALUES (DATEADD('DAY', 0, CURRENT_DATE()),'我的爱','Enrique','美国',4,'蓝调',6,'英文','24-JAN-2007',1080); +insert into song(imp_date,song_name,artist_name,country,f_id,g_name,rating,languages,releasedate,resolution) VALUES (DATEADD('DAY', 0, CURRENT_DATE()),'打败它','Michel','英国',5,'流行',8,'英文','17-MAR-2002',720); +insert into song(imp_date,song_name,artist_name,country,f_id,g_name,rating,languages,releasedate,resolution) VALUES (DATEADD('DAY', 0, CURRENT_DATE()),'阿杰伊阿卡什','Topu','印度',6,'现代',10,'孟加拉语','27-MAR-2004',320); + +-- benchmark diff --git a/launchers/standalone/src/main/resources/db/schema-h2.sql b/launchers/standalone/src/main/resources/db/schema-h2.sql index 1370bc82d..087686e7e 100644 --- a/launchers/standalone/src/main/resources/db/schema-h2.sql +++ b/launchers/standalone/src/main/resources/db/schema-h2.sql @@ -414,4 +414,47 @@ COMMENT ON TABLE s2_dictionary_task IS 'dictionary task information table'; +-- benchmark +CREATE TABLE IF NOT EXISTS `genre` ( + `g_name` varchar(20) NOT NULL , -- genre name + `rating` INT , + `most_popular_in` varchar(50) , + PRIMARY KEY (`g_name`) + ); +COMMENT ON TABLE genre IS 'genre'; + +CREATE TABLE IF NOT EXISTS `artist` ( + `artist_name` varchar(50) NOT NULL , -- genre name + `country` varchar(20) , + `gender` varchar(20) , + `g_name` varchar(50) + ); +COMMENT ON TABLE artist IS 'artist'; + +CREATE TABLE IF NOT EXISTS `files` ( + `f_id` bigINT NOT NULL, + `artist_name` varchar(50) , + `file_size` varchar(20) , + `duration` varchar(20) , + `formats` varchar(20) , + PRIMARY KEY (`f_id`) + ); +COMMENT ON TABLE files IS 'files'; + +CREATE TABLE IF NOT EXISTS `song` ( + `imp_date` varchar(50) , + `song_name` varchar(50) , + `artist_name` varchar(50) , + `country` varchar(20) , + `f_id` bigINT , + `g_name` varchar(20) , + `rating` INT , + `languages` varchar(20) , + `releasedate` varchar(50) , + `resolution` bigINT NOT NULL + ); +COMMENT ON TABLE song IS 'song'; + +-- benchmark + diff --git a/launchers/standalone/src/main/resources/hanlp.properties b/launchers/standalone/src/main/resources/hanlp.properties index 9d91904eb..8faa512a4 100644 --- a/launchers/standalone/src/main/resources/hanlp.properties +++ b/launchers/standalone/src/main/resources/hanlp.properties @@ -1,2 +1,2 @@ root=. -CustomDictionaryPath=data/dictionary/custom/DimValue_1_1.txt;data/dictionary/custom/DimValue_1_2.txt;data/dictionary/custom/DimValue_1_3.txt; \ No newline at end of file +CustomDictionaryPath=data/dictionary/custom/DimValue_1_1.txt;data/dictionary/custom/DimValue_1_2.txt;data/dictionary/custom/DimValue_1_3.txt;data/dictionary/custom/benchmark_cspider.txt; diff --git a/launchers/standalone/src/test/java/com/tencent/supersonic/benchmark/CSpider.java b/launchers/standalone/src/test/java/com/tencent/supersonic/benchmark/CSpider.java new file mode 100644 index 000000000..f85f0588b --- /dev/null +++ b/launchers/standalone/src/test/java/com/tencent/supersonic/benchmark/CSpider.java @@ -0,0 +1,10 @@ +package com.tencent.supersonic.benchmark; + +import org.junit.Test; + +public class CSpider { + @Test + public void case1(){ + + } +} diff --git a/semantic/query/src/main/java/com/tencent/supersonic/semantic/query/service/AuthCommonService.java b/semantic/query/src/main/java/com/tencent/supersonic/semantic/query/service/AuthCommonService.java new file mode 100644 index 000000000..e6a30a620 --- /dev/null +++ b/semantic/query/src/main/java/com/tencent/supersonic/semantic/query/service/AuthCommonService.java @@ -0,0 +1,258 @@ +package com.tencent.supersonic.semantic.query.service; + +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.tencent.supersonic.auth.api.authentication.pojo.User; +import com.tencent.supersonic.auth.api.authorization.pojo.AuthRes; +import com.tencent.supersonic.auth.api.authorization.pojo.AuthResGrp; +import com.tencent.supersonic.auth.api.authorization.pojo.DimensionFilter; +import com.tencent.supersonic.auth.api.authorization.request.QueryAuthResReq; +import com.tencent.supersonic.auth.api.authorization.response.AuthorizedResourceResp; +import com.tencent.supersonic.auth.api.authorization.service.AuthService; +import com.tencent.supersonic.common.pojo.Constants; +import com.tencent.supersonic.common.pojo.QueryAuthorization; +import com.tencent.supersonic.common.pojo.QueryColumn; +import com.tencent.supersonic.common.pojo.enums.AuthType; +import com.tencent.supersonic.common.pojo.exception.InvalidPermissionException; +import com.tencent.supersonic.semantic.api.model.pojo.SchemaItem; +import com.tencent.supersonic.semantic.api.model.response.DimensionResp; +import com.tencent.supersonic.semantic.api.model.response.MetricResp; +import com.tencent.supersonic.semantic.api.model.response.ModelResp; +import com.tencent.supersonic.semantic.api.model.response.QueryResultWithSchemaResp; +import com.tencent.supersonic.semantic.model.domain.DimensionService; +import com.tencent.supersonic.semantic.model.domain.MetricService; +import com.tencent.supersonic.semantic.model.domain.ModelService; +import lombok.extern.slf4j.Slf4j; +import org.assertj.core.util.Sets; +import org.springframework.beans.BeanUtils; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; +import org.springframework.util.CollectionUtils; + +import java.text.SimpleDateFormat; +import java.util.List; +import java.util.ArrayList; +import java.util.Map; +import java.util.HashMap; +import java.util.Set; +import java.util.HashSet; + +import java.util.stream.Collectors; + +@Service +@Slf4j +public class AuthCommonService { + private static final ObjectMapper MAPPER = new ObjectMapper().setDateFormat( + new SimpleDateFormat(Constants.DAY_FORMAT)); + @Autowired + private AuthService authService; + @Autowired + private DimensionService dimensionService; + @Autowired + private MetricService metricService; + + @Autowired + private ModelService modelService; + + public boolean doModelAdmin(User user, Long modelId) { + List modelListAdmin = modelService.getModelListWithAuth(user, null, AuthType.ADMIN); + if (CollectionUtils.isEmpty(modelListAdmin)) { + return false; + } else { + Map> id2modelResp = modelListAdmin.stream() + .collect(Collectors.groupingBy(SchemaItem::getId)); + return !CollectionUtils.isEmpty(id2modelResp) && id2modelResp.containsKey(modelId); + } + } + + public void doModelVisible(User user, Long modelId) { + Boolean visible = true; + List modelListVisible = modelService.getModelListWithAuth(user, null, AuthType.VISIBLE); + if (CollectionUtils.isEmpty(modelListVisible)) { + visible = false; + } else { + Map> id2domainDesc = modelListVisible.stream() + .collect(Collectors.groupingBy(SchemaItem::getId)); + if (!CollectionUtils.isEmpty(id2domainDesc) && !id2domainDesc.containsKey(modelId)) { + visible = false; + } + } + if (!visible) { + ModelResp modelResp = modelService.getModel(modelId); + String modelName = modelResp.getName(); + List admins = modelService.getModelAdmin(modelResp.getId()); + String message = String.format("您没有主题域[%s]权限,请联系管理员%s开通", modelName, admins); + throw new InvalidPermissionException(message); + } + + } + + public Set getHighSensitiveColsByModelId(Long modelId) { + Set highSensitiveCols = new HashSet<>(); + List highSensitiveDimensions = dimensionService.getHighSensitiveDimension(modelId); + List highSensitiveMetrics = metricService.getHighSensitiveMetric(modelId); + if (!CollectionUtils.isEmpty(highSensitiveDimensions)) { + highSensitiveDimensions.stream().forEach(dim -> highSensitiveCols.add(dim.getBizName())); + } + if (!CollectionUtils.isEmpty(highSensitiveMetrics)) { + highSensitiveMetrics.stream().forEach(metric -> highSensitiveCols.add(metric.getBizName())); + } + return highSensitiveCols; + } + + public AuthorizedResourceResp getAuthorizedResource(User user, Long domainId, + Set sensitiveResReq) { + List resourceReqList = new ArrayList<>(); + sensitiveResReq.forEach(res -> resourceReqList.add(new AuthRes(domainId.toString(), res))); + QueryAuthResReq queryAuthResReq = new QueryAuthResReq(); + queryAuthResReq.setResources(resourceReqList); + queryAuthResReq.setModelId(domainId + ""); + AuthorizedResourceResp authorizedResource = fetchAuthRes(queryAuthResReq, user); + log.info("user:{}, domainId:{}, after queryAuthorizedResources:{}", user.getName(), domainId, + authorizedResource); + return authorizedResource; + } + private AuthorizedResourceResp fetchAuthRes(QueryAuthResReq queryAuthResReq, User user) { + log.info("queryAuthResReq:{}", queryAuthResReq); + return authService.queryAuthorizedResources(queryAuthResReq, user); + } + public Set getAuthResNameSet(AuthorizedResourceResp authorizedResource, Long domainId) { + Set resAuthName = new HashSet<>(); + List authResGrpList = authorizedResource.getResources(); + authResGrpList.stream().forEach(authResGrp -> { + List cols = authResGrp.getGroup(); + if (!CollectionUtils.isEmpty(cols)) { + cols.stream().filter(col -> domainId.equals(Long.parseLong(col.getModelId()))) + .forEach(col -> resAuthName.add(col.getName())); + } + + }); + log.info("resAuthName:{}", resAuthName); + return resAuthName; + } + public boolean allSensitiveResReqIsOk(Set sensitiveResReq, Set resAuthSet) { + if (resAuthSet.containsAll(sensitiveResReq)) { + return true; + } + log.info("sensitiveResReq:{}, resAuthSet:{}", sensitiveResReq, resAuthSet); + return false; + } + + public QueryResultWithSchemaResp getQueryResultWithColumns(QueryResultWithSchemaResp resultWithColumns, + Long domainId, AuthorizedResourceResp authResource) { + addPromptInfoInfo(domainId, resultWithColumns, authResource, Sets.newHashSet()); + return resultWithColumns; + } + + public QueryResultWithSchemaResp desensitizationData(QueryResultWithSchemaResp raw, Set need2Apply) { + log.debug("start desensitizationData logic"); + if (CollectionUtils.isEmpty(need2Apply)) { + log.info("user has all sensitiveRes"); + return raw; + } + + List columns = raw.getColumns(); + + boolean doDesensitization = false; + for (QueryColumn queryColumn : columns) { + if (need2Apply.contains(queryColumn.getNameEn())) { + doDesensitization = true; + break; + } + } + if (!doDesensitization) { + return raw; + } + + QueryResultWithSchemaResp queryResultWithColumns = raw; + try { + queryResultWithColumns = deepCopyResult(raw); + } catch (Exception e) { + log.warn("deepCopyResult: ", e); + } + addAuthorizedSchemaInfo(queryResultWithColumns.getColumns(), need2Apply); + desensitizationInternal(queryResultWithColumns.getResultList(), need2Apply); + return queryResultWithColumns; + } + + private void addAuthorizedSchemaInfo(List columns, Set need2Apply) { + if (CollectionUtils.isEmpty(need2Apply)) { + return; + } + columns.stream().forEach(col -> { + if (need2Apply.contains(col.getNameEn())) { + col.setAuthorized(false); + } + }); + } + + private void desensitizationInternal(List> result, Set need2Apply) { + log.info("start desensitizationInternal logic"); + for (int i = 0; i < result.size(); i++) { + Map row = result.get(i); + Map newRow = new HashMap<>(); + for (String col : row.keySet()) { + if (need2Apply.contains(col)) { + newRow.put(col, "****"); + } else { + newRow.put(col, row.get(col)); + } + } + result.set(i, newRow); + } + } + + private QueryResultWithSchemaResp deepCopyResult(QueryResultWithSchemaResp raw) throws Exception { + QueryResultWithSchemaResp queryResultWithColumns = new QueryResultWithSchemaResp(); + BeanUtils.copyProperties(raw, queryResultWithColumns); + + List columns = new ArrayList<>(); + if (!CollectionUtils.isEmpty(raw.getColumns())) { + String columnsStr = MAPPER.writeValueAsString(raw.getColumns()); + columns = MAPPER.readValue(columnsStr, new TypeReference>() { + }); + queryResultWithColumns.setColumns(columns); + } + queryResultWithColumns.setColumns(columns); + + List> resultData = new ArrayList<>(); + if (!CollectionUtils.isEmpty(raw.getResultList())) { + for (Map line : raw.getResultList()) { + Map newLine = new HashMap<>(); + newLine.putAll(line); + resultData.add(newLine); + } + } + queryResultWithColumns.setResultList(resultData); + return queryResultWithColumns; + } + + public void addPromptInfoInfo(Long modelId, QueryResultWithSchemaResp queryResultWithColumns, + AuthorizedResourceResp authorizedResource, Set need2Apply) { + List filters = authorizedResource.getFilters(); + if (CollectionUtils.isEmpty(need2Apply) && CollectionUtils.isEmpty(filters)) { + return; + } + List admins = modelService.getModelAdmin(modelId); + if (!CollectionUtils.isEmpty(need2Apply)) { + String promptInfo = String.format("当前结果已经过脱敏处理, 申请权限请联系管理员%s", admins); + queryResultWithColumns.setQueryAuthorization(new QueryAuthorization(promptInfo)); + } + if (!CollectionUtils.isEmpty(filters)) { + log.debug("dimensionFilters:{}", filters); + ModelResp modelResp = modelService.getModel(modelId); + List exprList = new ArrayList<>(); + List descList = new ArrayList<>(); + filters.stream().forEach(filter -> { + descList.add(filter.getDescription()); + exprList.add(filter.getExpressions().toString()); + }); + String promptInfo = "当前结果已经过行权限过滤,详细过滤条件如下:%s, 申请权限请联系管理员%s"; + String message = String.format(promptInfo, CollectionUtils.isEmpty(descList) ? exprList : descList, admins); + + queryResultWithColumns.setQueryAuthorization( + new QueryAuthorization(modelResp.getName(), exprList, descList, message)); + log.info("queryResultWithColumns:{}", queryResultWithColumns); + } + } +} diff --git a/semantic/query/src/main/java/com/tencent/supersonic/semantic/query/service/QueryServiceImpl.java b/semantic/query/src/main/java/com/tencent/supersonic/semantic/query/service/QueryServiceImpl.java index 298a0e897..d39287f61 100644 --- a/semantic/query/src/main/java/com/tencent/supersonic/semantic/query/service/QueryServiceImpl.java +++ b/semantic/query/src/main/java/com/tencent/supersonic/semantic/query/service/QueryServiceImpl.java @@ -21,6 +21,7 @@ import com.tencent.supersonic.semantic.api.query.request.QueryDslReq; import com.tencent.supersonic.semantic.api.query.request.QueryMultiStructReq; import com.tencent.supersonic.semantic.api.query.request.QueryStructReq; import com.tencent.supersonic.semantic.api.query.response.ItemUseResp; +import com.tencent.supersonic.semantic.query.utils.DslPermissionAnnotation; import com.tencent.supersonic.semantic.query.executor.QueryExecutor; import com.tencent.supersonic.semantic.query.parser.convert.QueryReqConverter; import com.tencent.supersonic.semantic.query.persistence.pojo.QueryStatement; @@ -66,9 +67,16 @@ public class QueryServiceImpl implements QueryService { } @Override - public Object queryBySql(QueryDslReq querySqlCmd, User user) throws Exception { + @DslPermissionAnnotation + @SneakyThrows + public Object queryBySql(QueryDslReq querySqlCmd, User user) { statUtils.initStatInfo(querySqlCmd, user); - QueryStatement queryStatement = convertToQueryStatement(querySqlCmd, user); + QueryStatement queryStatement = new QueryStatement(); + try { + queryStatement = convertToQueryStatement(querySqlCmd, user); + } catch (Exception e) { + log.info("convertToQueryStatement has a exception:{}", e.toString()); + } QueryResultWithSchemaResp results = semanticQueryEngine.execute(queryStatement); statUtils.statInfo2DbAsync(TaskStatusEnum.SUCCESS); return results; diff --git a/semantic/query/src/main/java/com/tencent/supersonic/semantic/query/utils/DslDataAspect.java b/semantic/query/src/main/java/com/tencent/supersonic/semantic/query/utils/DslDataAspect.java new file mode 100644 index 000000000..15d706c6f --- /dev/null +++ b/semantic/query/src/main/java/com/tencent/supersonic/semantic/query/utils/DslDataAspect.java @@ -0,0 +1,188 @@ +package com.tencent.supersonic.semantic.query.utils; + +import com.google.common.base.Strings; +import com.tencent.supersonic.auth.api.authentication.pojo.User; +import com.tencent.supersonic.auth.api.authorization.response.AuthorizedResourceResp; +import com.tencent.supersonic.common.pojo.Constants; +import com.tencent.supersonic.common.pojo.exception.InvalidPermissionException; +import com.tencent.supersonic.common.util.jsqlparser.SqlParserUpdateHelper; +import com.tencent.supersonic.semantic.api.model.response.DimensionResp; +import com.tencent.supersonic.semantic.api.model.response.ModelResp; +import com.tencent.supersonic.semantic.api.model.response.QueryResultWithSchemaResp; +import com.tencent.supersonic.semantic.api.query.request.QueryDslReq; +import com.tencent.supersonic.semantic.model.domain.DimensionService; +import com.tencent.supersonic.semantic.model.domain.ModelService; +import com.tencent.supersonic.semantic.query.service.AuthCommonService; +import lombok.extern.slf4j.Slf4j; +import net.sf.jsqlparser.JSQLParserException; +import net.sf.jsqlparser.expression.Expression; +import net.sf.jsqlparser.parser.CCJSqlParserUtil; +import org.apache.commons.lang3.StringUtils; +import org.aspectj.lang.ProceedingJoinPoint; +import org.aspectj.lang.annotation.Around; +import org.aspectj.lang.annotation.Aspect; +import org.aspectj.lang.annotation.Pointcut; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.core.annotation.Order; +import org.springframework.stereotype.Component; +import org.springframework.util.CollectionUtils; + +import java.util.StringJoiner; +import java.util.Objects; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; +import java.util.HashSet; + +import java.util.stream.Collectors; + +import static com.tencent.supersonic.common.pojo.Constants.MINUS; + +@Component +@Aspect +@Order(1) +@Slf4j +public class DslDataAspect { + + @Autowired + private QueryStructUtils queryStructUtils; + @Autowired + private DimensionService dimensionService; + @Autowired + private ModelService modelService; + @Autowired + private AuthCommonService authCommonService; + @Value("${permission.data.enable:true}") + private Boolean permissionDataEnable; + + @Pointcut("@annotation(com.tencent.supersonic.semantic.query.utils.DslPermissionAnnotation)") + private void dslPermissionCheck() { + } + + @Around("dslPermissionCheck()") + public Object doAround(ProceedingJoinPoint joinPoint) throws Throwable { + log.info("dsl permission check!"); + Object[] objects = joinPoint.getArgs(); + QueryDslReq queryDslReq = (QueryDslReq) objects[0]; + User user = (User) objects[1]; + if (!permissionDataEnable) { + log.info("not to check dsl permission!"); + return joinPoint.proceed(); + } + if (Objects.isNull(user) || Strings.isNullOrEmpty(user.getName())) { + throw new RuntimeException("please provide user information"); + } + Long modelId = queryDslReq.getModelId(); + + //1. determine whether admin of the model + if (authCommonService.doModelAdmin(user, modelId)) { + return joinPoint.proceed(); + } + + // 2. determine whether the subject field is visible + authCommonService.doModelVisible(user, modelId); + + // 3. fetch data permission meta information + Set res4Privilege = queryStructUtils.getResNameEnExceptInternalCol(queryDslReq); + log.info("modelId:{}, res4Privilege:{}", modelId, res4Privilege); + + Set sensitiveResByModel = authCommonService.getHighSensitiveColsByModelId(modelId); + Set sensitiveResReq = res4Privilege.parallelStream() + .filter(sensitiveResByModel::contains).collect(Collectors.toSet()); + log.info("this query domainId:{}, sensitiveResReq:{}", modelId, sensitiveResReq); + + // query user privilege info + AuthorizedResourceResp authorizedResource = authCommonService + .getAuthorizedResource(user, modelId, sensitiveResReq); + // get sensitiveRes that user has privilege + Set resAuthSet = authCommonService.getAuthResNameSet(authorizedResource, modelId); + + // 4.if sensitive fields without permission are involved in filter, thrown an exception + doFilterCheckLogic(queryDslReq, resAuthSet, sensitiveResReq); + + // 5.row permission pre-filter + doRowPermission(queryDslReq, authorizedResource); + + // 6.proceed + QueryResultWithSchemaResp queryResultWithColumns = (QueryResultWithSchemaResp) joinPoint.proceed(); + + if (CollectionUtils.isEmpty(sensitiveResReq) || authCommonService + .allSensitiveResReqIsOk(sensitiveResReq, resAuthSet)) { + // if sensitiveRes is empty + log.info("sensitiveResReq is empty"); + return authCommonService.getQueryResultWithColumns(queryResultWithColumns, modelId, authorizedResource); + } + + // 6.if the column has no permission, hit * + Set need2Apply = sensitiveResReq.stream().filter(req -> !resAuthSet.contains(req)) + .collect(Collectors.toSet()); + QueryResultWithSchemaResp queryResultAfterDesensitization = authCommonService + .desensitizationData(queryResultWithColumns, need2Apply); + authCommonService.addPromptInfoInfo(modelId, queryResultAfterDesensitization, authorizedResource, need2Apply); + + return queryResultAfterDesensitization; + } + + private void doRowPermission(QueryDslReq queryDslReq, AuthorizedResourceResp authorizedResource) { + log.debug("start doRowPermission logic"); + StringJoiner joiner = new StringJoiner(" OR "); + List dimensionFilters = new ArrayList<>(); + if (!CollectionUtils.isEmpty(authorizedResource.getFilters())) { + authorizedResource.getFilters().stream() + .forEach(filter -> dimensionFilters.addAll(filter.getExpressions())); + } + + if (CollectionUtils.isEmpty(dimensionFilters)) { + log.debug("dimensionFilters is empty"); + return; + } + + dimensionFilters.stream().forEach(filter -> { + if (StringUtils.isNotEmpty(filter) && StringUtils.isNotEmpty(filter.trim())) { + joiner.add(" ( " + filter + " ) "); + } + }); + try { + Expression expression = CCJSqlParserUtil.parseCondExpression(" ( " + joiner.toString() + " ) "); + if (StringUtils.isNotEmpty(joiner.toString())) { + String sql = SqlParserUpdateHelper.addWhere(queryDslReq.getSql(), expression); + log.info("before doRowPermission, queryDslReq:{}", queryDslReq.getSql()); + queryDslReq.setSql(sql); + log.info("after doRowPermission, queryDslReq:{}", queryDslReq.getSql()); + } + } catch (JSQLParserException jsqlParserException) { + log.info("jsqlParser has an exception:{}", jsqlParserException.toString()); + } + + } + + private void doFilterCheckLogic(QueryDslReq queryDslReq, Set resAuthName, + Set sensitiveResReq) { + Set resFilterSet = queryStructUtils.getFilterResNameEnExceptInternalCol(queryDslReq); + Set need2Apply = resFilterSet.stream() + .filter(res -> !resAuthName.contains(res) && sensitiveResReq.contains(res)).collect(Collectors.toSet()); + Set nameCnSet = new HashSet<>(); + + List modelIds = new ArrayList<>(); + modelIds.add(queryDslReq.getModelId()); + List modelInfos = modelService.getModelList(modelIds); + String modelNameCn = Constants.EMPTY; + if (!CollectionUtils.isEmpty(modelInfos)) { + modelNameCn = modelInfos.get(0).getName(); + } + + List dimensionDescList = dimensionService.getDimensions(queryDslReq.getModelId()); + String finalDomainNameCn = modelNameCn; + dimensionDescList.stream().filter(dim -> need2Apply.contains(dim.getBizName())) + .forEach(dim -> nameCnSet.add(finalDomainNameCn + MINUS + dim.getName())); + + if (!CollectionUtils.isEmpty(need2Apply)) { + ModelResp modelResp = modelInfos.get(0); + List admins = modelService.getModelAdmin(modelResp.getId()); + log.info("in doFilterLogic, need2Apply:{}", need2Apply); + String message = String.format("您没有以下维度%s权限, 请联系管理员%s开通", nameCnSet, admins); + throw new InvalidPermissionException(message); + } + } +} diff --git a/semantic/query/src/main/java/com/tencent/supersonic/semantic/query/utils/DslPermissionAnnotation.java b/semantic/query/src/main/java/com/tencent/supersonic/semantic/query/utils/DslPermissionAnnotation.java new file mode 100644 index 000000000..8a9c368dd --- /dev/null +++ b/semantic/query/src/main/java/com/tencent/supersonic/semantic/query/utils/DslPermissionAnnotation.java @@ -0,0 +1,14 @@ +package com.tencent.supersonic.semantic.query.utils; + +import java.lang.annotation.Target; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.Documented; + +@Target(ElementType.METHOD) +@Retention(RetentionPolicy.RUNTIME) +@Documented +public @interface DslPermissionAnnotation { + +} diff --git a/semantic/query/src/main/java/com/tencent/supersonic/semantic/query/utils/QueryStructUtils.java b/semantic/query/src/main/java/com/tencent/supersonic/semantic/query/utils/QueryStructUtils.java index a13a2907d..65add3e75 100644 --- a/semantic/query/src/main/java/com/tencent/supersonic/semantic/query/utils/QueryStructUtils.java +++ b/semantic/query/src/main/java/com/tencent/supersonic/semantic/query/utils/QueryStructUtils.java @@ -6,11 +6,13 @@ import com.tencent.supersonic.common.pojo.DateConf.DateMode; import com.tencent.supersonic.common.pojo.enums.TypeEnums; import com.tencent.supersonic.common.pojo.Aggregator; import com.tencent.supersonic.common.pojo.DateConf; +import com.tencent.supersonic.common.util.jsqlparser.SqlParserSelectHelper; import com.tencent.supersonic.semantic.api.model.pojo.SchemaItem; import com.tencent.supersonic.semantic.api.model.pojo.ItemDateFilter; import com.tencent.supersonic.semantic.api.model.response.DimensionResp; import com.tencent.supersonic.semantic.api.model.response.ItemDateResp; import com.tencent.supersonic.semantic.api.model.response.MetricResp; +import com.tencent.supersonic.semantic.api.query.request.QueryDslReq; import com.tencent.supersonic.semantic.api.query.request.QueryStructReq; import com.tencent.supersonic.semantic.model.domain.Catalog; @@ -145,11 +147,19 @@ public class QueryStructUtils { sqlFilterUtils.getFiltersCol(queryStructCmd.getOriginalFilter()).stream().forEach(col -> resNameEnSet.add(col)); return resNameEnSet; } - + public Set getResNameEn(QueryDslReq queryDslReq) { + Set resNameEnSet = SqlParserSelectHelper.getAllFields(queryDslReq.getSql()) + .stream().collect(Collectors.toSet()); + return resNameEnSet; + } public Set getResNameEnExceptInternalCol(QueryStructReq queryStructCmd) { Set resNameEnSet = getResNameEn(queryStructCmd); return resNameEnSet.stream().filter(res -> !internalCols.contains(res)).collect(Collectors.toSet()); } + public Set getResNameEnExceptInternalCol(QueryDslReq queryDslReq) { + Set resNameEnSet = getResNameEn(queryDslReq); + return resNameEnSet.stream().filter(res -> !internalCols.contains(res)).collect(Collectors.toSet()); + } public Set getFilterResNameEn(QueryStructReq queryStructCmd) { Set resNameEnSet = new HashSet<>(); @@ -162,6 +172,12 @@ public class QueryStructUtils { return resNameEnSet.stream().filter(res -> !internalCols.contains(res)).collect(Collectors.toSet()); } + public Set getFilterResNameEnExceptInternalCol(QueryDslReq queryDslReq) { + String sql = queryDslReq.getSql(); + Set resNameEnSet = SqlParserSelectHelper.getWhereFields(sql).stream().collect(Collectors.toSet()); + return resNameEnSet.stream().filter(res -> !internalCols.contains(res)).collect(Collectors.toSet()); + } + public String generateInternalMetricName(Long modelId, List groups) { String internalMetricNamePrefix = ""; if (CollectionUtils.isEmpty(groups)) {