Merge branch 'feature/showcase' into feature/lxw
# Conflicts: # chat/core/src/main/java/com/tencent/supersonic/chat/service/impl/QueryServiceImpl.java # chat/core/src/main/resources/mapper/ChatQueryDOMapper.xml
3
.gitignore
vendored
@@ -16,4 +16,5 @@ assembly/runtime/*
|
|||||||
/runtime
|
/runtime
|
||||||
**/.flattened-pom.xml
|
**/.flattened-pom.xml
|
||||||
chm_db/
|
chm_db/
|
||||||
__pycache__/
|
__pycache__/
|
||||||
|
/dict
|
||||||
@@ -67,4 +67,4 @@ moveToRuntime standalone
|
|||||||
setEnvToWeb chat
|
setEnvToWeb chat
|
||||||
setEnvToWeb semantic
|
setEnvToWeb semantic
|
||||||
|
|
||||||
rm -fr ${buildDir}/webapp
|
rm -fr ${buildDir}/webapp
|
||||||
|
|||||||
@@ -192,4 +192,4 @@ case "$command" in
|
|||||||
*)
|
*)
|
||||||
echo "Use command {start|stop|restart} to run."
|
echo "Use command {start|stop|restart} to run."
|
||||||
exit 1
|
exit 1
|
||||||
esac
|
esac
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ public interface AuthService {
|
|||||||
|
|
||||||
List<AuthGroup> queryAuthGroups(String domainId, Integer groupId);
|
List<AuthGroup> queryAuthGroups(String domainId, Integer groupId);
|
||||||
|
|
||||||
void updateAuthGroup(AuthGroup group);
|
void addOrUpdateAuthGroup(AuthGroup group);
|
||||||
|
|
||||||
void removeAuthGroup(AuthGroup group);
|
void removeAuthGroup(AuthGroup group);
|
||||||
|
|
||||||
|
|||||||
@@ -53,7 +53,7 @@ public class AuthServiceImpl implements AuthService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void updateAuthGroup(AuthGroup group) {
|
public void addOrUpdateAuthGroup(AuthGroup group) {
|
||||||
Gson g = new Gson();
|
Gson g = new Gson();
|
||||||
if (group.getGroupId() == null) {
|
if (group.getGroupId() == null) {
|
||||||
int nextGroupId = 1;
|
int nextGroupId = 1;
|
||||||
|
|||||||
@@ -40,7 +40,7 @@ public class AuthController {
|
|||||||
@PostMapping("/createGroup")
|
@PostMapping("/createGroup")
|
||||||
public void newAuthGroup(@RequestBody AuthGroup group) {
|
public void newAuthGroup(@RequestBody AuthGroup group) {
|
||||||
group.setGroupId(null);
|
group.setGroupId(null);
|
||||||
authService.updateAuthGroup(group);
|
authService.addOrUpdateAuthGroup(group);
|
||||||
}
|
}
|
||||||
|
|
||||||
@PostMapping("/removeGroup")
|
@PostMapping("/removeGroup")
|
||||||
@@ -58,7 +58,7 @@ public class AuthController {
|
|||||||
if (group.getGroupId() == null || group.getGroupId() == 0) {
|
if (group.getGroupId() == null || group.getGroupId() == 0) {
|
||||||
throw new RuntimeException("groupId is empty");
|
throw new RuntimeException("groupId is empty");
|
||||||
}
|
}
|
||||||
authService.updateAuthGroup(group);
|
authService.addOrUpdateAuthGroup(group);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import java.util.Map;
|
|||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
public class SemanticSchema implements Serializable {
|
public class SemanticSchema implements Serializable {
|
||||||
|
|
||||||
private List<ModelSchema> modelSchemaList;
|
private List<ModelSchema> modelSchemaList;
|
||||||
|
|
||||||
public SemanticSchema(List<ModelSchema> modelSchemaList) {
|
public SemanticSchema(List<ModelSchema> modelSchemaList) {
|
||||||
@@ -34,12 +35,28 @@ public class SemanticSchema implements Serializable {
|
|||||||
return dimensions;
|
return dimensions;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public List<SchemaElement> getDimensions(Long modelId) {
|
||||||
|
List<SchemaElement> dimensions = getDimensions();
|
||||||
|
return getElementsByModelId(modelId, dimensions);
|
||||||
|
}
|
||||||
|
|
||||||
public List<SchemaElement> getMetrics() {
|
public List<SchemaElement> getMetrics() {
|
||||||
List<SchemaElement> metrics = new ArrayList<>();
|
List<SchemaElement> metrics = new ArrayList<>();
|
||||||
modelSchemaList.stream().forEach(d -> metrics.addAll(d.getMetrics()));
|
modelSchemaList.stream().forEach(d -> metrics.addAll(d.getMetrics()));
|
||||||
return metrics;
|
return metrics;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public List<SchemaElement> getMetrics(Long modelId) {
|
||||||
|
List<SchemaElement> metrics = getMetrics();
|
||||||
|
return getElementsByModelId(modelId, metrics);
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<SchemaElement> getElementsByModelId(Long modelId, List<SchemaElement> elements) {
|
||||||
|
return elements.stream()
|
||||||
|
.filter(schemaElement -> modelId.equals(schemaElement.getModel()))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
|
||||||
public List<SchemaElement> getModels() {
|
public List<SchemaElement> getModels() {
|
||||||
List<SchemaElement> models = new ArrayList<>();
|
List<SchemaElement> models = new ArrayList<>();
|
||||||
modelSchemaList.stream().forEach(d -> models.add(d.getModel()));
|
modelSchemaList.stream().forEach(d -> models.add(d.getModel()));
|
||||||
|
|||||||
@@ -1,25 +1,20 @@
|
|||||||
package com.tencent.supersonic.chat.api.pojo.request;
|
package com.tencent.supersonic.chat.api.pojo.request;
|
||||||
|
|
||||||
|
|
||||||
|
import com.tencent.supersonic.auth.api.authentication.pojo.User;
|
||||||
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
|
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
|
||||||
import com.tencent.supersonic.common.pojo.DateConf;
|
import com.tencent.supersonic.common.pojo.DateConf;
|
||||||
import com.tencent.supersonic.common.pojo.Order;
|
|
||||||
import com.tencent.supersonic.common.pojo.enums.AggregateTypeEnum;
|
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import lombok.Data;
|
import lombok.Data;
|
||||||
|
|
||||||
@Data
|
@Data
|
||||||
public class QueryDataReq {
|
public class QueryDataReq {
|
||||||
String queryMode;
|
private User user;
|
||||||
SchemaElement model;
|
private Set<SchemaElement> metrics = new HashSet<>();
|
||||||
Set<SchemaElement> metrics = new HashSet<>();
|
private Set<SchemaElement> dimensions = new HashSet<>();
|
||||||
Set<SchemaElement> dimensions = new HashSet<>();
|
private Set<QueryFilter> dimensionFilters = new HashSet<>();
|
||||||
Set<QueryFilter> dimensionFilters = new HashSet<>();
|
|
||||||
Set<QueryFilter> metricFilters = new HashSet<>();
|
|
||||||
private AggregateTypeEnum aggType = AggregateTypeEnum.NONE;
|
|
||||||
private Set<Order> orders = new HashSet<>();
|
|
||||||
private DateConf dateInfo;
|
private DateConf dateInfo;
|
||||||
private Long limit;
|
private Long queryId = 7L;
|
||||||
private Boolean nativeQuery = false;
|
private Integer parseId = 2;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,27 +0,0 @@
|
|||||||
package com.tencent.supersonic.chat.corrector;
|
|
||||||
|
|
||||||
import com.tencent.supersonic.chat.api.pojo.SemanticCorrectInfo;
|
|
||||||
import com.tencent.supersonic.chat.parser.llm.dsl.DSLDateHelper;
|
|
||||||
import com.tencent.supersonic.common.util.jsqlparser.SqlParserSelectHelper;
|
|
||||||
import com.tencent.supersonic.common.util.jsqlparser.SqlParserUpdateHelper;
|
|
||||||
import java.util.List;
|
|
||||||
import lombok.extern.slf4j.Slf4j;
|
|
||||||
import org.springframework.util.CollectionUtils;
|
|
||||||
|
|
||||||
@Slf4j
|
|
||||||
public class DateFieldCorrector extends BaseSemanticCorrector {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void correct(SemanticCorrectInfo semanticCorrectInfo) {
|
|
||||||
|
|
||||||
String sql = semanticCorrectInfo.getSql();
|
|
||||||
List<String> whereFields = SqlParserSelectHelper.getWhereFields(sql);
|
|
||||||
if (CollectionUtils.isEmpty(whereFields) || !whereFields.contains(DATE_FIELD)) {
|
|
||||||
String currentDate = DSLDateHelper.getReferenceDate(semanticCorrectInfo.getParseInfo().getModelId());
|
|
||||||
sql = SqlParserUpdateHelper.addWhere(sql, DATE_FIELD, currentDate);
|
|
||||||
}
|
|
||||||
semanticCorrectInfo.setPreSql(semanticCorrectInfo.getSql());
|
|
||||||
semanticCorrectInfo.setSql(sql);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
@@ -1,18 +0,0 @@
|
|||||||
package com.tencent.supersonic.chat.corrector;
|
|
||||||
|
|
||||||
import com.tencent.supersonic.chat.api.pojo.SemanticCorrectInfo;
|
|
||||||
import com.tencent.supersonic.common.util.jsqlparser.SqlParserUpdateHelper;
|
|
||||||
import lombok.extern.slf4j.Slf4j;
|
|
||||||
|
|
||||||
@Slf4j
|
|
||||||
public class FieldCorrector extends BaseSemanticCorrector {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void correct(SemanticCorrectInfo semanticCorrectInfo) {
|
|
||||||
String preSql = semanticCorrectInfo.getSql();
|
|
||||||
semanticCorrectInfo.setPreSql(preSql);
|
|
||||||
String sql = SqlParserUpdateHelper.replaceFields(preSql,
|
|
||||||
getFieldToBizName(semanticCorrectInfo.getParseInfo().getModelId()));
|
|
||||||
semanticCorrectInfo.setSql(sql);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,16 +0,0 @@
|
|||||||
package com.tencent.supersonic.chat.corrector;
|
|
||||||
|
|
||||||
import com.tencent.supersonic.chat.api.pojo.SemanticCorrectInfo;
|
|
||||||
import com.tencent.supersonic.common.util.jsqlparser.SqlParserUpdateHelper;
|
|
||||||
import lombok.extern.slf4j.Slf4j;
|
|
||||||
|
|
||||||
@Slf4j
|
|
||||||
public class FunctionAliasCorrector extends BaseSemanticCorrector {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void correct(SemanticCorrectInfo semanticCorrectInfo) {
|
|
||||||
String replaceAlias = SqlParserUpdateHelper.replaceAlias(semanticCorrectInfo.getSql());
|
|
||||||
semanticCorrectInfo.setSql(replaceAlias);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
@@ -1,17 +0,0 @@
|
|||||||
package com.tencent.supersonic.chat.corrector;
|
|
||||||
|
|
||||||
import com.tencent.supersonic.chat.api.pojo.SemanticCorrectInfo;
|
|
||||||
import com.tencent.supersonic.common.util.jsqlparser.SqlParserUpdateHelper;
|
|
||||||
import lombok.extern.slf4j.Slf4j;
|
|
||||||
|
|
||||||
@Slf4j
|
|
||||||
public class FunctionCorrector extends BaseSemanticCorrector {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void correct(SemanticCorrectInfo semanticCorrectInfo) {
|
|
||||||
String preSql = semanticCorrectInfo.getSql();
|
|
||||||
semanticCorrectInfo.setPreSql(preSql);
|
|
||||||
String sql = SqlParserUpdateHelper.replaceFunction(preSql);
|
|
||||||
semanticCorrectInfo.setSql(sql);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -16,11 +16,39 @@ import lombok.extern.slf4j.Slf4j;
|
|||||||
import org.springframework.util.CollectionUtils;
|
import org.springframework.util.CollectionUtils;
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
public class FieldNameCorrector extends BaseSemanticCorrector {
|
public class GlobalCorrector extends BaseSemanticCorrector {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void correct(SemanticCorrectInfo semanticCorrectInfo) {
|
public void correct(SemanticCorrectInfo semanticCorrectInfo) {
|
||||||
|
|
||||||
|
replaceAlias(semanticCorrectInfo);
|
||||||
|
|
||||||
|
updateFieldNameByLinkingValue(semanticCorrectInfo);
|
||||||
|
|
||||||
|
updateFieldNameByBizName(semanticCorrectInfo);
|
||||||
|
|
||||||
|
addAggregateToMetric(semanticCorrectInfo);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addAggregateToMetric(SemanticCorrectInfo semanticCorrectInfo) {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private void replaceAlias(SemanticCorrectInfo semanticCorrectInfo) {
|
||||||
|
String replaceAlias = SqlParserUpdateHelper.replaceAlias(semanticCorrectInfo.getSql());
|
||||||
|
semanticCorrectInfo.setSql(replaceAlias);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void updateFieldNameByBizName(SemanticCorrectInfo semanticCorrectInfo) {
|
||||||
|
|
||||||
|
Map<String, String> fieldToBizName = getFieldToBizName(semanticCorrectInfo.getParseInfo().getModelId());
|
||||||
|
|
||||||
|
String sql = SqlParserUpdateHelper.replaceFields(semanticCorrectInfo.getSql(), fieldToBizName);
|
||||||
|
|
||||||
|
semanticCorrectInfo.setSql(sql);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void updateFieldNameByLinkingValue(SemanticCorrectInfo semanticCorrectInfo) {
|
||||||
Object context = semanticCorrectInfo.getParseInfo().getProperties().get(Constants.CONTEXT);
|
Object context = semanticCorrectInfo.getParseInfo().getProperties().get(Constants.CONTEXT);
|
||||||
if (Objects.isNull(context)) {
|
if (Objects.isNull(context)) {
|
||||||
return;
|
return;
|
||||||
@@ -45,5 +73,4 @@ public class FieldNameCorrector extends BaseSemanticCorrector {
|
|||||||
String sql = SqlParserUpdateHelper.replaceFieldNameByValue(preSql, fieldValueToFieldNames);
|
String sql = SqlParserUpdateHelper.replaceFieldNameByValue(preSql, fieldValueToFieldNames);
|
||||||
semanticCorrectInfo.setSql(sql);
|
semanticCorrectInfo.setSql(sql);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
|
||||||
@@ -0,0 +1,15 @@
|
|||||||
|
package com.tencent.supersonic.chat.corrector;
|
||||||
|
|
||||||
|
import com.tencent.supersonic.chat.api.pojo.SemanticCorrectInfo;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
|
public class GroupByCorrector extends BaseSemanticCorrector {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void correct(SemanticCorrectInfo semanticCorrectInfo) {
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,14 @@
|
|||||||
|
package com.tencent.supersonic.chat.corrector;
|
||||||
|
|
||||||
|
import com.tencent.supersonic.chat.api.pojo.SemanticCorrectInfo;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
|
public class HavingCorrector extends BaseSemanticCorrector {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void correct(SemanticCorrectInfo semanticCorrectInfo) {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@@ -1,48 +0,0 @@
|
|||||||
package com.tencent.supersonic.chat.corrector;
|
|
||||||
|
|
||||||
import com.tencent.supersonic.chat.api.pojo.SemanticCorrectInfo;
|
|
||||||
import com.tencent.supersonic.chat.api.pojo.request.QueryFilters;
|
|
||||||
import com.tencent.supersonic.common.pojo.Constants;
|
|
||||||
import com.tencent.supersonic.common.util.StringUtil;
|
|
||||||
import com.tencent.supersonic.common.util.jsqlparser.SqlParserUpdateHelper;
|
|
||||||
import java.util.Objects;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
import lombok.extern.slf4j.Slf4j;
|
|
||||||
import net.sf.jsqlparser.JSQLParserException;
|
|
||||||
import net.sf.jsqlparser.expression.Expression;
|
|
||||||
import net.sf.jsqlparser.parser.CCJSqlParserUtil;
|
|
||||||
import org.apache.commons.collections.CollectionUtils;
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
|
||||||
|
|
||||||
@Slf4j
|
|
||||||
public class QueryFilterAppend extends BaseSemanticCorrector {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void correct(SemanticCorrectInfo semanticCorrectInfo) throws JSQLParserException {
|
|
||||||
String queryFilter = getQueryFilter(semanticCorrectInfo.getQueryFilters());
|
|
||||||
String preSql = semanticCorrectInfo.getSql();
|
|
||||||
|
|
||||||
if (StringUtils.isNotEmpty(queryFilter)) {
|
|
||||||
log.info("add queryFilter to preSql :{}", queryFilter);
|
|
||||||
Expression expression = CCJSqlParserUtil.parseCondExpression(queryFilter);
|
|
||||||
String sql = SqlParserUpdateHelper.addWhere(preSql, expression);
|
|
||||||
semanticCorrectInfo.setPreSql(preSql);
|
|
||||||
semanticCorrectInfo.setSql(sql);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private String getQueryFilter(QueryFilters queryFilters) {
|
|
||||||
if (Objects.isNull(queryFilters) || CollectionUtils.isEmpty(queryFilters.getFilters())) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
return queryFilters.getFilters().stream()
|
|
||||||
.map(filter -> {
|
|
||||||
String bizNameWrap = StringUtil.getSpaceWrap(filter.getBizName());
|
|
||||||
String operatorWrap = StringUtil.getSpaceWrap(filter.getOperator().getValue());
|
|
||||||
String valueWrap = StringUtil.getCommaWrap(filter.getValue().toString());
|
|
||||||
return bizNameWrap + operatorWrap + valueWrap;
|
|
||||||
})
|
|
||||||
.collect(Collectors.joining(Constants.AND_UPPER));
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
@@ -13,11 +13,12 @@ import net.sf.jsqlparser.expression.Expression;
|
|||||||
import org.springframework.util.CollectionUtils;
|
import org.springframework.util.CollectionUtils;
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
public class SelectFieldAppendCorrector extends BaseSemanticCorrector {
|
public class SelectCorrector extends BaseSemanticCorrector {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void correct(SemanticCorrectInfo semanticCorrectInfo) {
|
public void correct(SemanticCorrectInfo semanticCorrectInfo) {
|
||||||
String preSql = semanticCorrectInfo.getSql();
|
String preSql = semanticCorrectInfo.getSql();
|
||||||
|
|
||||||
if (SqlParserSelectHelper.hasAggregateFunction(preSql)) {
|
if (SqlParserSelectHelper.hasAggregateFunction(preSql)) {
|
||||||
Expression havingExpression = SqlParserSelectHelper.getHavingExpression(preSql);
|
Expression havingExpression = SqlParserSelectHelper.getHavingExpression(preSql);
|
||||||
if (Objects.nonNull(havingExpression)) {
|
if (Objects.nonNull(havingExpression)) {
|
||||||
@@ -5,7 +5,7 @@ import com.tencent.supersonic.common.util.jsqlparser.SqlParserUpdateHelper;
|
|||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
public class TableNameCorrector extends BaseSemanticCorrector {
|
public class TableCorrector extends BaseSemanticCorrector {
|
||||||
|
|
||||||
public static final String TABLE_PREFIX = "t_";
|
public static final String TABLE_PREFIX = "t_";
|
||||||
|
|
||||||
@@ -1,26 +1,92 @@
|
|||||||
package com.tencent.supersonic.chat.corrector;
|
package com.tencent.supersonic.chat.corrector;
|
||||||
|
|
||||||
import com.tencent.supersonic.chat.api.pojo.SemanticCorrectInfo;
|
|
||||||
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
|
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
|
||||||
import com.tencent.supersonic.chat.api.pojo.SchemaValueMap;
|
import com.tencent.supersonic.chat.api.pojo.SchemaValueMap;
|
||||||
|
import com.tencent.supersonic.chat.api.pojo.SemanticCorrectInfo;
|
||||||
import com.tencent.supersonic.chat.api.pojo.SemanticSchema;
|
import com.tencent.supersonic.chat.api.pojo.SemanticSchema;
|
||||||
|
import com.tencent.supersonic.chat.api.pojo.request.QueryFilters;
|
||||||
|
import com.tencent.supersonic.chat.parser.llm.dsl.DSLDateHelper;
|
||||||
|
import com.tencent.supersonic.common.pojo.Constants;
|
||||||
import com.tencent.supersonic.common.util.ContextUtils;
|
import com.tencent.supersonic.common.util.ContextUtils;
|
||||||
|
import com.tencent.supersonic.common.util.StringUtil;
|
||||||
|
import com.tencent.supersonic.common.util.jsqlparser.SqlParserSelectHelper;
|
||||||
import com.tencent.supersonic.common.util.jsqlparser.SqlParserUpdateHelper;
|
import com.tencent.supersonic.common.util.jsqlparser.SqlParserUpdateHelper;
|
||||||
import com.tencent.supersonic.knowledge.service.SchemaService;
|
import com.tencent.supersonic.knowledge.service.SchemaService;
|
||||||
|
import com.tencent.supersonic.semantic.api.model.enums.TimeDimensionEnum;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import net.sf.jsqlparser.JSQLParserException;
|
||||||
|
import net.sf.jsqlparser.expression.Expression;
|
||||||
|
import net.sf.jsqlparser.parser.CCJSqlParserUtil;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.logging.log4j.util.Strings;
|
import org.apache.logging.log4j.util.Strings;
|
||||||
import org.springframework.util.CollectionUtils;
|
import org.springframework.util.CollectionUtils;
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
public class FieldValueCorrector extends BaseSemanticCorrector {
|
public class WhereCorrector extends BaseSemanticCorrector {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void correct(SemanticCorrectInfo semanticCorrectInfo) {
|
public void correct(SemanticCorrectInfo semanticCorrectInfo) throws JSQLParserException {
|
||||||
|
|
||||||
|
addDateIfNotExist(semanticCorrectInfo);
|
||||||
|
|
||||||
|
parserDateDiffFunction(semanticCorrectInfo);
|
||||||
|
|
||||||
|
addQueryFilter(semanticCorrectInfo);
|
||||||
|
|
||||||
|
updateFieldValueByTechName(semanticCorrectInfo);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addQueryFilter(SemanticCorrectInfo semanticCorrectInfo) throws JSQLParserException {
|
||||||
|
String queryFilter = getQueryFilter(semanticCorrectInfo.getQueryFilters());
|
||||||
|
|
||||||
|
String preSql = semanticCorrectInfo.getSql();
|
||||||
|
|
||||||
|
if (StringUtils.isNotEmpty(queryFilter)) {
|
||||||
|
log.info("add queryFilter to preSql :{}", queryFilter);
|
||||||
|
Expression expression = CCJSqlParserUtil.parseCondExpression(queryFilter);
|
||||||
|
String sql = SqlParserUpdateHelper.addWhere(preSql, expression);
|
||||||
|
semanticCorrectInfo.setPreSql(preSql);
|
||||||
|
semanticCorrectInfo.setSql(sql);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void parserDateDiffFunction(SemanticCorrectInfo semanticCorrectInfo) {
|
||||||
|
String preSql = semanticCorrectInfo.getSql();
|
||||||
|
semanticCorrectInfo.setPreSql(preSql);
|
||||||
|
String sql = SqlParserUpdateHelper.replaceFunction(preSql);
|
||||||
|
semanticCorrectInfo.setSql(sql);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addDateIfNotExist(SemanticCorrectInfo semanticCorrectInfo) {
|
||||||
|
String sql = semanticCorrectInfo.getSql();
|
||||||
|
List<String> whereFields = SqlParserSelectHelper.getWhereFields(sql);
|
||||||
|
if (CollectionUtils.isEmpty(whereFields) || !whereFields.contains(TimeDimensionEnum.DAY.getName())) {
|
||||||
|
String currentDate = DSLDateHelper.getReferenceDate(semanticCorrectInfo.getParseInfo().getModelId());
|
||||||
|
sql = SqlParserUpdateHelper.addWhere(sql, TimeDimensionEnum.DAY.getName(), currentDate);
|
||||||
|
}
|
||||||
|
semanticCorrectInfo.setSql(sql);
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getQueryFilter(QueryFilters queryFilters) {
|
||||||
|
if (Objects.isNull(queryFilters) || CollectionUtils.isEmpty(queryFilters.getFilters())) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return queryFilters.getFilters().stream()
|
||||||
|
.map(filter -> {
|
||||||
|
String bizNameWrap = StringUtil.getSpaceWrap(filter.getBizName());
|
||||||
|
String operatorWrap = StringUtil.getSpaceWrap(filter.getOperator().getValue());
|
||||||
|
String valueWrap = StringUtil.getCommaWrap(filter.getValue().toString());
|
||||||
|
return bizNameWrap + operatorWrap + valueWrap;
|
||||||
|
})
|
||||||
|
.collect(Collectors.joining(Constants.AND_UPPER));
|
||||||
|
}
|
||||||
|
|
||||||
|
private void updateFieldValueByTechName(SemanticCorrectInfo semanticCorrectInfo) {
|
||||||
SemanticSchema semanticSchema = ContextUtils.getBean(SchemaService.class).getSemanticSchema();
|
SemanticSchema semanticSchema = ContextUtils.getBean(SchemaService.class).getSemanticSchema();
|
||||||
Long modelId = semanticCorrectInfo.getParseInfo().getModel().getId();
|
Long modelId = semanticCorrectInfo.getParseInfo().getModel().getId();
|
||||||
List<SchemaElement> dimensions = semanticSchema.getDimensions().stream()
|
List<SchemaElement> dimensions = semanticSchema.getDimensions().stream()
|
||||||
@@ -39,7 +105,6 @@ public class FieldValueCorrector extends BaseSemanticCorrector {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private Map<String, Map<String, String>> getAliasAndBizNameToTechName(List<SchemaElement> dimensions) {
|
private Map<String, Map<String, String>> getAliasAndBizNameToTechName(List<SchemaElement> dimensions) {
|
||||||
if (CollectionUtils.isEmpty(dimensions)) {
|
if (CollectionUtils.isEmpty(dimensions)) {
|
||||||
return new HashMap<>();
|
return new HashMap<>();
|
||||||
@@ -408,27 +408,20 @@ public class LLMDslParser implements SemanticParser {
|
|||||||
|
|
||||||
protected List<String> getFieldNameList(QueryContext queryCtx, Long modelId, SemanticSchema semanticSchema,
|
protected List<String> getFieldNameList(QueryContext queryCtx, Long modelId, SemanticSchema semanticSchema,
|
||||||
LLMParserConfig llmParserConfig) {
|
LLMParserConfig llmParserConfig) {
|
||||||
|
|
||||||
|
Set<String> results = getTopNFieldNames(modelId, semanticSchema, llmParserConfig);
|
||||||
|
|
||||||
|
Set<String> fieldNameList = getMatchedFieldNames(queryCtx, modelId, semanticSchema);
|
||||||
|
|
||||||
|
results.addAll(fieldNameList);
|
||||||
|
return new ArrayList<>(results);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Set<String> getMatchedFieldNames(QueryContext queryCtx, Long modelId, SemanticSchema semanticSchema) {
|
||||||
Map<Long, String> itemIdToName = getItemIdToName(modelId, semanticSchema);
|
Map<Long, String> itemIdToName = getItemIdToName(modelId, semanticSchema);
|
||||||
|
|
||||||
Set<String> results = semanticSchema.getDimensions().stream()
|
|
||||||
.filter(schemaElement -> modelId.equals(schemaElement.getModel()))
|
|
||||||
.sorted(Comparator.comparing(SchemaElement::getUseCnt).reversed())
|
|
||||||
.limit(llmParserConfig.getDimensionTopN())
|
|
||||||
.map(entry -> entry.getName())
|
|
||||||
.collect(Collectors.toSet());
|
|
||||||
|
|
||||||
Set<String> metrics = semanticSchema.getMetrics().stream()
|
|
||||||
.filter(schemaElement -> modelId.equals(schemaElement.getModel()))
|
|
||||||
.sorted(Comparator.comparing(SchemaElement::getUseCnt).reversed())
|
|
||||||
.limit(llmParserConfig.getMetricTopN())
|
|
||||||
.map(entry -> entry.getName())
|
|
||||||
.collect(Collectors.toSet());
|
|
||||||
|
|
||||||
results.addAll(metrics);
|
|
||||||
|
|
||||||
List<SchemaElementMatch> matchedElements = queryCtx.getMapInfo().getMatchedElements(modelId);
|
List<SchemaElementMatch> matchedElements = queryCtx.getMapInfo().getMatchedElements(modelId);
|
||||||
if (CollectionUtils.isEmpty(matchedElements)) {
|
if (CollectionUtils.isEmpty(matchedElements)) {
|
||||||
return new ArrayList<>(results);
|
return new HashSet<>();
|
||||||
}
|
}
|
||||||
Set<String> fieldNameList = matchedElements.stream()
|
Set<String> fieldNameList = matchedElements.stream()
|
||||||
.filter(schemaElementMatch -> {
|
.filter(schemaElementMatch -> {
|
||||||
@@ -447,13 +440,29 @@ public class LLMDslParser implements SemanticParser {
|
|||||||
})
|
})
|
||||||
.filter(name -> StringUtils.isNotEmpty(name) && !name.contains("%"))
|
.filter(name -> StringUtils.isNotEmpty(name) && !name.contains("%"))
|
||||||
.collect(Collectors.toSet());
|
.collect(Collectors.toSet());
|
||||||
results.addAll(fieldNameList);
|
return fieldNameList;
|
||||||
return new ArrayList<>(results);
|
}
|
||||||
|
|
||||||
|
private Set<String> getTopNFieldNames(Long modelId, SemanticSchema semanticSchema,
|
||||||
|
LLMParserConfig llmParserConfig) {
|
||||||
|
Set<String> results = semanticSchema.getDimensions(modelId).stream()
|
||||||
|
.sorted(Comparator.comparing(SchemaElement::getUseCnt).reversed())
|
||||||
|
.limit(llmParserConfig.getDimensionTopN())
|
||||||
|
.map(entry -> entry.getName())
|
||||||
|
.collect(Collectors.toSet());
|
||||||
|
|
||||||
|
Set<String> metrics = semanticSchema.getMetrics(modelId).stream()
|
||||||
|
.sorted(Comparator.comparing(SchemaElement::getUseCnt).reversed())
|
||||||
|
.limit(llmParserConfig.getMetricTopN())
|
||||||
|
.map(entry -> entry.getName())
|
||||||
|
.collect(Collectors.toSet());
|
||||||
|
|
||||||
|
results.addAll(metrics);
|
||||||
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected Map<Long, String> getItemIdToName(Long modelId, SemanticSchema semanticSchema) {
|
protected Map<Long, String> getItemIdToName(Long modelId, SemanticSchema semanticSchema) {
|
||||||
return semanticSchema.getDimensions().stream()
|
return semanticSchema.getDimensions(modelId).stream()
|
||||||
.filter(entry -> modelId.equals(entry.getModel()))
|
|
||||||
.collect(Collectors.toMap(SchemaElement::getId, SchemaElement::getName, (value1, value2) -> value2));
|
.collect(Collectors.toMap(SchemaElement::getId, SchemaElement::getName, (value1, value2) -> value2));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -72,6 +72,7 @@ public class ChatQueryController {
|
|||||||
public Object queryData(@RequestBody QueryDataReq queryData,
|
public Object queryData(@RequestBody QueryDataReq queryData,
|
||||||
HttpServletRequest request, HttpServletResponse response)
|
HttpServletRequest request, HttpServletResponse response)
|
||||||
throws Exception {
|
throws Exception {
|
||||||
|
queryData.setUser(UserHolder.findUser(request, response));
|
||||||
return queryService.executeDirectQuery(queryData, UserHolder.findUser(request, response));
|
return queryService.executeDirectQuery(queryData, UserHolder.findUser(request, response));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -3,11 +3,14 @@ package com.tencent.supersonic.chat.service.impl;
|
|||||||
|
|
||||||
import com.tencent.supersonic.auth.api.authentication.pojo.User;
|
import com.tencent.supersonic.auth.api.authentication.pojo.User;
|
||||||
import com.tencent.supersonic.chat.api.component.SchemaMapper;
|
import com.tencent.supersonic.chat.api.component.SchemaMapper;
|
||||||
|
import com.tencent.supersonic.chat.api.component.SemanticLayer;
|
||||||
import com.tencent.supersonic.chat.api.component.SemanticQuery;
|
import com.tencent.supersonic.chat.api.component.SemanticQuery;
|
||||||
import com.tencent.supersonic.chat.api.component.SemanticParser;
|
import com.tencent.supersonic.chat.api.component.SemanticParser;
|
||||||
import com.tencent.supersonic.chat.api.pojo.ChatContext;
|
import com.tencent.supersonic.chat.api.pojo.ChatContext;
|
||||||
import com.tencent.supersonic.chat.api.pojo.QueryContext;
|
import com.tencent.supersonic.chat.api.pojo.QueryContext;
|
||||||
import com.tencent.supersonic.chat.api.pojo.SemanticParseInfo;
|
import com.tencent.supersonic.chat.api.pojo.SemanticParseInfo;
|
||||||
|
import com.tencent.supersonic.chat.api.pojo.request.QueryDataReq;
|
||||||
|
import com.tencent.supersonic.chat.api.pojo.request.QueryFilter;
|
||||||
import com.tencent.supersonic.chat.api.pojo.request.DimensionValueReq;
|
import com.tencent.supersonic.chat.api.pojo.request.DimensionValueReq;
|
||||||
import com.tencent.supersonic.chat.api.pojo.request.ExecuteQueryReq;
|
import com.tencent.supersonic.chat.api.pojo.request.ExecuteQueryReq;
|
||||||
import com.tencent.supersonic.chat.api.pojo.request.QueryReq;
|
import com.tencent.supersonic.chat.api.pojo.request.QueryReq;
|
||||||
@@ -15,13 +18,15 @@ import com.tencent.supersonic.chat.api.pojo.response.EntityInfo;
|
|||||||
import com.tencent.supersonic.chat.api.pojo.response.ParseResp;
|
import com.tencent.supersonic.chat.api.pojo.response.ParseResp;
|
||||||
import com.tencent.supersonic.chat.api.pojo.response.QueryResult;
|
import com.tencent.supersonic.chat.api.pojo.response.QueryResult;
|
||||||
import com.tencent.supersonic.chat.api.pojo.response.QueryState;
|
import com.tencent.supersonic.chat.api.pojo.response.QueryState;
|
||||||
|
import com.tencent.supersonic.chat.parser.llm.dsl.DSLParseResult;
|
||||||
import com.tencent.supersonic.chat.api.pojo.response.SolvedQueryRecallResp;
|
import com.tencent.supersonic.chat.api.pojo.response.SolvedQueryRecallResp;
|
||||||
import com.tencent.supersonic.chat.persistence.dataobject.ChatParseDO;
|
import com.tencent.supersonic.chat.persistence.dataobject.ChatParseDO;
|
||||||
import com.tencent.supersonic.chat.persistence.dataobject.CostType;
|
import com.tencent.supersonic.chat.persistence.dataobject.CostType;
|
||||||
import com.tencent.supersonic.chat.persistence.dataobject.StatisticsDO;
|
import com.tencent.supersonic.chat.persistence.dataobject.StatisticsDO;
|
||||||
import com.tencent.supersonic.chat.query.QuerySelector;
|
import com.tencent.supersonic.chat.query.QuerySelector;
|
||||||
import com.tencent.supersonic.chat.api.pojo.request.QueryDataReq;
|
|
||||||
import com.tencent.supersonic.chat.query.QueryManager;
|
import com.tencent.supersonic.chat.query.QueryManager;
|
||||||
|
import com.tencent.supersonic.chat.query.llm.dsl.DslQuery;
|
||||||
|
import com.tencent.supersonic.chat.query.llm.dsl.LLMResp;
|
||||||
import com.tencent.supersonic.chat.queryresponder.QueryResponder;
|
import com.tencent.supersonic.chat.queryresponder.QueryResponder;
|
||||||
import com.tencent.supersonic.chat.service.ChatService;
|
import com.tencent.supersonic.chat.service.ChatService;
|
||||||
import com.tencent.supersonic.chat.service.QueryService;
|
import com.tencent.supersonic.chat.service.QueryService;
|
||||||
@@ -29,25 +34,29 @@ import com.tencent.supersonic.chat.service.SemanticService;
|
|||||||
import com.tencent.supersonic.chat.service.StatisticsService;
|
import com.tencent.supersonic.chat.service.StatisticsService;
|
||||||
import com.tencent.supersonic.chat.utils.ComponentFactory;
|
import com.tencent.supersonic.chat.utils.ComponentFactory;
|
||||||
|
|
||||||
|
import java.util.Map;
|
||||||
import com.tencent.supersonic.semantic.api.model.response.ExplainResp;
|
import com.tencent.supersonic.semantic.api.model.response.ExplainResp;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import com.tencent.supersonic.common.pojo.Constants;
|
||||||
import com.tencent.supersonic.common.pojo.DateConf;
|
import com.tencent.supersonic.common.pojo.DateConf;
|
||||||
import com.tencent.supersonic.common.util.ContextUtils;
|
import com.tencent.supersonic.common.util.ContextUtils;
|
||||||
import com.tencent.supersonic.common.util.JsonUtil;
|
import com.tencent.supersonic.common.util.JsonUtil;
|
||||||
|
import com.tencent.supersonic.common.util.jsqlparser.SqlParserUpdateHelper;
|
||||||
import com.tencent.supersonic.semantic.api.model.response.QueryResultWithSchemaResp;
|
import com.tencent.supersonic.semantic.api.model.response.QueryResultWithSchemaResp;
|
||||||
import com.tencent.supersonic.semantic.api.query.enums.FilterOperatorEnum;
|
import com.tencent.supersonic.semantic.api.query.enums.FilterOperatorEnum;
|
||||||
import com.tencent.supersonic.semantic.api.query.pojo.Filter;
|
import com.tencent.supersonic.semantic.api.query.pojo.Filter;
|
||||||
import com.tencent.supersonic.semantic.api.query.request.QueryStructReq;
|
import com.tencent.supersonic.semantic.api.query.request.QueryStructReq;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
import org.apache.calcite.sql.parser.SqlParseException;
|
import org.apache.calcite.sql.parser.SqlParseException;
|
||||||
import org.springframework.beans.BeanUtils;
|
import org.apache.commons.collections.CollectionUtils;
|
||||||
import org.springframework.beans.factory.annotation.Autowired;
|
import org.springframework.beans.factory.annotation.Autowired;
|
||||||
import org.springframework.beans.factory.annotation.Value;
|
import org.springframework.beans.factory.annotation.Value;
|
||||||
import org.springframework.context.annotation.Primary;
|
import org.springframework.context.annotation.Primary;
|
||||||
@@ -175,34 +184,26 @@ public class QueryServiceImpl implements QueryService {
|
|||||||
ChatContext chatCtx = chatService.getOrCreateContext(queryReq.getChatId());
|
ChatContext chatCtx = chatService.getOrCreateContext(queryReq.getChatId());
|
||||||
chatCtx.setAgentId(queryReq.getAgentId());
|
chatCtx.setAgentId(queryReq.getAgentId());
|
||||||
Long startTime = System.currentTimeMillis();
|
Long startTime = System.currentTimeMillis();
|
||||||
QueryResult queryResult = null;
|
QueryResult queryResult = semanticQuery.execute(queryReq.getUser());
|
||||||
try {
|
|
||||||
queryResult = semanticQuery.execute(queryReq.getUser());
|
if (queryResult != null) {
|
||||||
} catch (Exception e) {
|
timeCostDOList.add(StatisticsDO.builder().cost((int) (System.currentTimeMillis() - startTime))
|
||||||
log.error("query execute failed, queryText:{}", queryReq.getQueryText(), e);
|
.interfaceName(semanticQuery.getClass().getSimpleName()).type(CostType.QUERY.getType()).build());
|
||||||
queryResult = new QueryResult();
|
saveInfo(timeCostDOList, queryReq.getQueryText(), queryReq.getQueryId(),
|
||||||
queryResult.setQueryState(QueryState.INVALID);
|
queryReq.getUser().getName(), queryReq.getChatId().longValue());
|
||||||
|
queryResult.setChatContext(parseInfo);
|
||||||
|
// update chat context after a successful semantic query
|
||||||
|
if (queryReq.isSaveAnswer() && QueryState.SUCCESS.equals(queryResult.getQueryState())) {
|
||||||
|
chatCtx.setParseInfo(parseInfo);
|
||||||
|
chatService.updateContext(chatCtx);
|
||||||
|
}
|
||||||
|
chatCtx.setQueryText(queryReq.getQueryText());
|
||||||
|
chatCtx.setUser(queryReq.getUser().getName());
|
||||||
|
chatService.updateQuery(queryReq.getQueryId(), queryResult, chatCtx);
|
||||||
|
} else {
|
||||||
|
chatService.deleteChatQuery(queryReq.getQueryId());
|
||||||
}
|
}
|
||||||
|
|
||||||
timeCostDOList.add(StatisticsDO.builder().cost((int) (System.currentTimeMillis() - startTime))
|
|
||||||
.interfaceName(semanticQuery.getClass().getSimpleName()).type(CostType.QUERY.getType()).build());
|
|
||||||
saveInfo(timeCostDOList, queryReq.getQueryText(), queryReq.getQueryId(),
|
|
||||||
queryReq.getUser().getName(), queryReq.getChatId().longValue());
|
|
||||||
queryResult.setChatContext(parseInfo);
|
|
||||||
// update chat context after a successful semantic query
|
|
||||||
if (queryReq.isSaveAnswer() && QueryState.SUCCESS.equals(queryResult.getQueryState())) {
|
|
||||||
chatCtx.setParseInfo(parseInfo);
|
|
||||||
chatService.updateContext(chatCtx);
|
|
||||||
queryResponder.saveSolvedQuery(queryReq.getQueryText(), queryReq.getQueryId(), queryReq.getParseId());
|
|
||||||
}
|
|
||||||
chatCtx.setQueryText(queryReq.getQueryText());
|
|
||||||
chatCtx.setUser(queryReq.getUser().getName());
|
|
||||||
chatService.updateQuery(queryReq.getQueryId(), queryResult, chatCtx);
|
|
||||||
if (!QueryState.SUCCESS.equals(queryResult.getQueryState())) {
|
|
||||||
List<SolvedQueryRecallResp> solvedQueryRecallResps =
|
|
||||||
queryResponder.recallSolvedQuery(queryReq.getQueryText());
|
|
||||||
queryResult.setSimilarSolvedQuery(solvedQueryRecallResps);
|
|
||||||
}
|
|
||||||
return queryResult;
|
return queryResult;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -273,8 +274,52 @@ public class QueryServiceImpl implements QueryService {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public QueryResult executeDirectQuery(QueryDataReq queryData, User user) throws SqlParseException {
|
public QueryResult executeDirectQuery(QueryDataReq queryData, User user) throws SqlParseException {
|
||||||
SemanticQuery semanticQuery = QueryManager.createRuleQuery(queryData.getQueryMode());
|
ChatParseDO chatParseDO = chatService.getParseInfo(queryData.getQueryId(),
|
||||||
BeanUtils.copyProperties(queryData, semanticQuery.getParseInfo());
|
queryData.getUser().getName(), queryData.getParseId());
|
||||||
|
SemanticParseInfo parseInfo = JsonUtil.toObject(chatParseDO.getParseInfo(), SemanticParseInfo.class);
|
||||||
|
if (!parseInfo.getQueryMode().equals(DslQuery.QUERY_MODE)) {
|
||||||
|
if (CollectionUtils.isNotEmpty(queryData.getDimensions())) {
|
||||||
|
parseInfo.setDimensions(queryData.getDimensions());
|
||||||
|
}
|
||||||
|
if (CollectionUtils.isNotEmpty(queryData.getMetrics())) {
|
||||||
|
parseInfo.setMetrics(queryData.getMetrics());
|
||||||
|
}
|
||||||
|
if (CollectionUtils.isNotEmpty(queryData.getDimensionFilters())) {
|
||||||
|
parseInfo.setDimensionFilters(queryData.getDimensionFilters());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (Objects.nonNull(queryData.getDateInfo())) {
|
||||||
|
parseInfo.setDateInfo(queryData.getDateInfo());
|
||||||
|
}
|
||||||
|
if (parseInfo.getQueryMode().equals(DslQuery.QUERY_MODE)
|
||||||
|
&& CollectionUtils.isNotEmpty(queryData.getDimensionFilters())) {
|
||||||
|
Map<String, Map<String, String>> filedNameToValueMap = new HashMap<>();
|
||||||
|
String json = JsonUtil.toString(parseInfo.getProperties().get(Constants.CONTEXT));
|
||||||
|
DSLParseResult dslParseResult = JsonUtil.toObject(json, DSLParseResult.class);
|
||||||
|
LLMResp llmResp = dslParseResult.getLlmResp();
|
||||||
|
String correctorSql = llmResp.getCorrectorSql();
|
||||||
|
log.info("correctorSql before replacing:{}", correctorSql);
|
||||||
|
for (QueryFilter dslQueryFilter : queryData.getDimensionFilters()) {
|
||||||
|
for (QueryFilter queryFilter : parseInfo.getDimensionFilters()) {
|
||||||
|
if (dslQueryFilter.getBizName().equals(queryFilter.getBizName())) {
|
||||||
|
Map<String, String> map = new HashMap<>();
|
||||||
|
map.put(queryFilter.getValue().toString(), dslQueryFilter.getValue().toString());
|
||||||
|
filedNameToValueMap.put(dslQueryFilter.getBizName(), map);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
log.info("filedNameToValueMap:{}", filedNameToValueMap);
|
||||||
|
correctorSql = SqlParserUpdateHelper.replaceValue(correctorSql, filedNameToValueMap);
|
||||||
|
log.info("correctorSql after replacing:{}", correctorSql);
|
||||||
|
llmResp.setCorrectorSql(correctorSql);
|
||||||
|
dslParseResult.setLlmResp(llmResp);
|
||||||
|
Map<String, Object> properties = new HashMap<>();
|
||||||
|
properties.put(Constants.CONTEXT, dslParseResult);
|
||||||
|
parseInfo.setProperties(properties);
|
||||||
|
}
|
||||||
|
SemanticQuery semanticQuery = QueryManager.createQuery(parseInfo.getQueryMode());
|
||||||
|
semanticQuery.setParseInfo(parseInfo);
|
||||||
QueryResult queryResult = semanticQuery.execute(user);
|
QueryResult queryResult = semanticQuery.execute(user);
|
||||||
queryResult.setChatContext(semanticQuery.getParseInfo());
|
queryResult.setChatContext(semanticQuery.getParseInfo());
|
||||||
return queryResult;
|
return queryResult;
|
||||||
@@ -282,8 +327,6 @@ public class QueryServiceImpl implements QueryService {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Object queryDimensionValue(DimensionValueReq dimensionValueReq, User user) throws Exception {
|
public Object queryDimensionValue(DimensionValueReq dimensionValueReq, User user) throws Exception {
|
||||||
com.tencent.supersonic.semantic.query.service.QueryService queryService =
|
|
||||||
ContextUtils.getBean(com.tencent.supersonic.semantic.query.service.QueryService.class);
|
|
||||||
QueryStructReq queryStructReq = new QueryStructReq();
|
QueryStructReq queryStructReq = new QueryStructReq();
|
||||||
|
|
||||||
DateConf dateConf = new DateConf();
|
DateConf dateConf = new DateConf();
|
||||||
@@ -307,7 +350,8 @@ public class QueryServiceImpl implements QueryService {
|
|||||||
dimensionFilters.add(dimensionFilter);
|
dimensionFilters.add(dimensionFilter);
|
||||||
queryStructReq.setDimensionFilters(dimensionFilters);
|
queryStructReq.setDimensionFilters(dimensionFilters);
|
||||||
}
|
}
|
||||||
QueryResultWithSchemaResp queryResultWithSchemaResp = queryService.queryByStructWithAuth(queryStructReq, user);
|
SemanticLayer semanticLayer = ComponentFactory.getSemanticLayer();
|
||||||
|
QueryResultWithSchemaResp queryResultWithSchemaResp = semanticLayer.queryByStruct(queryStructReq, user);
|
||||||
Set<String> dimensionValues = new HashSet<>();
|
Set<String> dimensionValues = new HashSet<>();
|
||||||
queryResultWithSchemaResp.getResultList().removeIf(o -> {
|
queryResultWithSchemaResp.getResultList().removeIf(o -> {
|
||||||
if (dimensionValues.contains(o.get(dimensionValueReq.getBizName()))) {
|
if (dimensionValues.contains(o.get(dimensionValueReq.getBizName()))) {
|
||||||
|
|||||||
@@ -1,348 +1,371 @@
|
|||||||
examplars= [
|
examplars = [
|
||||||
{ "current_date":"2020-12-01",
|
{
|
||||||
"table_name":"内容库产品",
|
"current_date": "2020-12-01",
|
||||||
"fields_list":"""["部门", "模块", "用户名", "访问次数", "访问人数", "访问时长", "数据日期"]""",
|
"table_name": "内容库产品",
|
||||||
"question":"比较jackjchen和robinlee在内容库的访问次数",
|
"fields_list": """["部门", "模块", "用户名", "访问次数", "访问人数", "访问时长", "数据日期"]""",
|
||||||
"prior_schema_links":"""['jackjchen'->用户名, 'robinlee'->用户名]""",
|
"question": "比较jackjchen和robinlee在内容库的访问次数",
|
||||||
|
"prior_schema_links": """['jackjchen'->用户名, 'robinlee'->用户名]""",
|
||||||
"analysis": """让我们一步一步地思考。在问题“比较jackjchen和robinlee在内容库的访问次数“中,我们被问:
|
"analysis": """让我们一步一步地思考。在问题“比较jackjchen和robinlee在内容库的访问次数“中,我们被问:
|
||||||
“比较jackjchen和robinlee”,所以我们需要column=[用户名]
|
“比较jackjchen和robinlee”,所以我们需要column=[用户名]
|
||||||
”内容库的访问次数“,所以我们需要column=[访问次数]
|
”内容库的访问次数“,所以我们需要column=[访问次数]
|
||||||
基于table和columns,可能的cell values 是 = ['jackjchen', 'robinlee']。""",
|
基于table和columns,可能的cell values 是 = ['jackjchen', 'robinlee']。""",
|
||||||
"schema_links":"""["用户名", "访问次数", "'jackjchen'", "'robinlee'"]""",
|
"schema_links": """["用户名", "访问次数", "'jackjchen'", "'robinlee'"]""",
|
||||||
"sql":"""select 用户名, 访问次数 from 内容库产品 where 用户名 in ('jackjchen', 'robinlee') and 数据日期 = '2020-12-01' """
|
"sql": """select 用户名, 访问次数 from 内容库产品 where 用户名 in ('jackjchen', 'robinlee') and 数据日期 = '2020-12-01' """,
|
||||||
},
|
},
|
||||||
{ "current_date":"2022-11-06",
|
{
|
||||||
"table_name":"内容库产品",
|
"current_date": "2022-11-06",
|
||||||
"fields_list":"""["部门", "模块", "用户名", "访问次数", "访问人数", "访问时长", "数据日期"]""",
|
"table_name": "内容库产品",
|
||||||
"question":"内容库近12个月访问人数 按部门",
|
"fields_list": """["部门", "模块", "用户名", "访问次数", "访问人数", "访问时长", "数据日期"]""",
|
||||||
"prior_schema_links":"""[]""",
|
"question": "内容库近12个月访问人数 按部门",
|
||||||
|
"prior_schema_links": """[]""",
|
||||||
"analysis": """让我们一步一步地思考。在问题“内容库近12个月访问人数 按部门“中,我们被问:
|
"analysis": """让我们一步一步地思考。在问题“内容库近12个月访问人数 按部门“中,我们被问:
|
||||||
”内容库近12个月“,所以我们需要column=[数据日期]
|
”内容库近12个月“,所以我们需要column=[数据日期]
|
||||||
“访问人数”,所以我们需要column=[访问人数]
|
“访问人数”,所以我们需要column=[访问人数]
|
||||||
”按部门“,所以我们需要column=[部门]
|
”按部门“,所以我们需要column=[部门]
|
||||||
基于table和columns,可能的cell values 是 = [12]。""",
|
基于table和columns,可能的cell values 是 = [12]。""",
|
||||||
"schema_links":"""["访问人数", "部门", "数据日期", 12]""",
|
"schema_links": """["访问人数", "部门", "数据日期", 12]""",
|
||||||
"sql":"""select 部门, 数据日期, 访问人数 from 内容库产品 where datediff('month', 数据日期, '2022-11-06') <= 12 """
|
"sql": """select 部门, 数据日期, 访问人数 from 内容库产品 where datediff('month', 数据日期, '2022-11-06') <= 12 """,
|
||||||
},
|
},
|
||||||
{ "current_date":"2023-04-21",
|
{
|
||||||
"table_name":"内容库产品",
|
"current_date": "2023-04-21",
|
||||||
"fields_list":"""["部门", "模块", "用户名", "访问次数", "访问人数", "访问时长", "数据日期"]""",
|
"table_name": "内容库产品",
|
||||||
"question":"内容库美术部、技术研发部的访问时长",
|
"fields_list": """["部门", "模块", "用户名", "访问次数", "访问人数", "访问时长", "数据日期"]""",
|
||||||
"prior_schema_links":"""['美术部'->部门, '技术研发部'->部门]""",
|
"question": "内容库美术部、技术研发部的访问时长",
|
||||||
|
"prior_schema_links": """['美术部'->部门, '技术研发部'->部门]""",
|
||||||
"analysis": """让我们一步一步地思考。在问题“内容库美术部、技术研发部的访问时长“中,我们被问:
|
"analysis": """让我们一步一步地思考。在问题“内容库美术部、技术研发部的访问时长“中,我们被问:
|
||||||
“访问时长”,所以我们需要column=[访问时长]
|
“访问时长”,所以我们需要column=[访问时长]
|
||||||
”内容库美术部、技术研发部“,所以我们需要column=[部门]
|
”内容库美术部、技术研发部“,所以我们需要column=[部门]
|
||||||
基于table和columns,可能的cell values 是 = ['美术部', '技术研发部']。""",
|
基于table和columns,可能的cell values 是 = ['美术部', '技术研发部']。""",
|
||||||
"schema_links":"""["访问时长", "部门", "'美术部'", "'技术研发部'"]""",
|
"schema_links": """["访问时长", "部门", "'美术部'", "'技术研发部'"]""",
|
||||||
"sql":"""select 部门, 访问时长 from 内容库产品 where 部门 in ('美术部', '技术研发部') and 数据日期 = '2023-04-21' """
|
"sql": """select 部门, 访问时长 from 内容库产品 where 部门 in ('美术部', '技术研发部') and 数据日期 = '2023-04-21' """,
|
||||||
},
|
},
|
||||||
{ "current_date":"2023-08-21",
|
{
|
||||||
"table_name":"严选",
|
"current_date": "2023-08-21",
|
||||||
"fields_list":"""["严选版权归属系", "付费模式", "结算播放份额", "付费用户结算播放份额", "数据日期"]""",
|
"table_name": "严选",
|
||||||
"question":"近3天海田飞系MPPM结算播放份额",
|
"fields_list": """["严选版权归属系", "付费模式", "结算播放份额", "付费用户结算播放份额", "数据日期"]""",
|
||||||
"prior_schema_links":"""['海田飞系'->严选版权归属系]""",
|
"question": "近3天海田飞系MPPM结算播放份额",
|
||||||
|
"prior_schema_links": """['海田飞系'->严选版权归属系]""",
|
||||||
"analysis": """让我们一步一步地思考。在问题“近3天海田飞系MPPM结算播放份额“中,我们被问:
|
"analysis": """让我们一步一步地思考。在问题“近3天海田飞系MPPM结算播放份额“中,我们被问:
|
||||||
“MPPM结算播放份额”,所以我们需要column=[结算播放份额]
|
“MPPM结算播放份额”,所以我们需要column=[结算播放份额]
|
||||||
”海田飞系“,所以我们需要column=[严选版权归属系]
|
”海田飞系“,所以我们需要column=[严选版权归属系]
|
||||||
”近3天“,所以我们需要column=[数据日期]
|
”近3天“,所以我们需要column=[数据日期]
|
||||||
基于table和columns,可能的cell values 是 = ['海田飞系', 3]。""",
|
基于table和columns,可能的cell values 是 = ['海田飞系', 3]。""",
|
||||||
"schema_links":"""["结算播放份额", "严选版权归属系", "数据日期", "'海田飞系'", 3]""",
|
"schema_links": """["结算播放份额", "严选版权归属系", "数据日期", "'海田飞系'", 3]""",
|
||||||
"sql":"""select 严选版权归属系, 结算播放份额 from 严选 where 严选版权归属系 = '海田飞系' and datediff('day', 数据日期, '2023-08-21') <= 3 """
|
"sql": """select 严选版权归属系, 结算播放份额 from 严选 where 严选版权归属系 = '海田飞系' and datediff('day', 数据日期, '2023-08-21') <= 3 """,
|
||||||
},
|
},
|
||||||
{ "current_date":"2023-05-22",
|
{
|
||||||
"table_name":"歌曲库",
|
"current_date": "2023-05-22",
|
||||||
"fields_list":"""["是否潮流人歌曲", "C音歌曲ID", "C音歌曲MID", "歌曲名", "歌曲版本", "语种", "歌曲类型", "翻唱类型", "MPPM歌曲ID", "是否严选窄口径歌曲", "是否严选宽口径歌曲", "结算播放量", "运营播放量", "付费用户结算播放量", "历史累计结算播放量", "运营搜播量", "结算搜播量", "运营完播量", "运营推播量", "近7日复播率", "日均搜播量", "数据日期"]""",
|
"table_name": "歌曲库",
|
||||||
"question":"对比近7天翻唱版和纯音乐的歌曲播放量",
|
"fields_list": """["是否潮流人歌曲", "C音歌曲ID", "C音歌曲MID", "歌曲名", "歌曲版本", "语种", "歌曲类型", "翻唱类型", "MPPM歌曲ID", "是否严选窄口径歌曲", "是否严选宽口径歌曲", "结算播放量", "运营播放量", "付费用户结算播放量", "历史累计结算播放量", "运营搜播量", "结算搜播量", "运营完播量", "运营推播量", "近7日复播率", "日均搜播量", "数据日期"]""",
|
||||||
"prior_schema_links":"""['纯音乐'->语种, '翻唱版'->歌曲版本]""",
|
"question": "对比近7天翻唱版和纯音乐的歌曲播放量",
|
||||||
|
"prior_schema_links": """['纯音乐'->语种, '翻唱版'->歌曲版本]""",
|
||||||
"analysis": """让我们一步一步地思考。在问题“对比近3天翻唱版和纯音乐的歌曲播放量“中,我们被问:
|
"analysis": """让我们一步一步地思考。在问题“对比近3天翻唱版和纯音乐的歌曲播放量“中,我们被问:
|
||||||
“歌曲播放量”,所以我们需要column=[结算播放量]
|
“歌曲播放量”,所以我们需要column=[结算播放量]
|
||||||
”翻唱版“,所以我们需要column=[歌曲版本]
|
”翻唱版“,所以我们需要column=[歌曲版本]
|
||||||
”和纯音乐的歌曲“,所以我们需要column=[语种]
|
”和纯音乐的歌曲“,所以我们需要column=[语种]
|
||||||
”近7天“,所以我们需要column=[数据日期]
|
”近7天“,所以我们需要column=[数据日期]
|
||||||
基于table和columns,可能的cell values 是 = ['翻唱版', '纯音乐', 7]。""",
|
基于table和columns,可能的cell values 是 = ['翻唱版', '纯音乐', 7]。""",
|
||||||
"schema_links":"""["结算播放量", "歌曲版本", "语种", "数据日期", "'翻唱版'", "'纯音乐'", 7]""",
|
"schema_links": """["结算播放量", "歌曲版本", "语种", "数据日期", "'翻唱版'", "'纯音乐'", 7]""",
|
||||||
"sql":"""select 歌曲版本, 语种, 结算播放量 from 歌曲库 where 歌曲版本 = '翻唱版' and 语种 = '纯音乐' and datediff('day', 数据日期, '2023-05-22') <= 7 """
|
"sql": """select 歌曲版本, 语种, 结算播放量 from 歌曲库 where 歌曲版本 = '翻唱版' and 语种 = '纯音乐' and datediff('day', 数据日期, '2023-05-22') <= 7 """,
|
||||||
},
|
},
|
||||||
{ "current_date":"2023-05-31",
|
{
|
||||||
"table_name":"艺人库",
|
"current_date": "2023-05-31",
|
||||||
"fields_list":"""["上下架状态", "歌手名", "歌手等级", "歌手类型", "歌手来源", "MPPM潮流人等级", "活跃区域", "年龄", "歌手才能", "歌手风格", "粉丝数", "潮音粉丝数", "超声波粉丝数", "推博粉丝数", "超声波歌曲数", "在架歌曲数", "超声波分享数", "独占歌曲数", "超声波在架歌曲评论数", "有播放量歌曲数", "数据日期"]""",
|
"table_name": "艺人库",
|
||||||
"question":"对比一下陈拙悬、孟梅琦、赖媚韵的粉丝数",
|
"fields_list": """["上下架状态", "歌手名", "歌手等级", "歌手类型", "歌手来源", "MPPM潮流人等级", "活跃区域", "年龄", "歌手才能", "歌手风格", "粉丝数", "潮音粉丝数", "超声波粉丝数", "推博粉丝数", "超声波歌曲数", "在架歌曲数", "超声波分享数", "独占歌曲数", "超声波在架歌曲评论数", "有播放量歌曲数", "数据日期"]""",
|
||||||
"prior_schema_links":"""['1527896'->MPPM歌手ID, '1565463'->MPPM歌手ID, '2141459'->MPPM歌手ID]""",
|
"question": "对比一下陈拙悬、孟梅琦、赖媚韵的粉丝数",
|
||||||
|
"prior_schema_links": """['1527896'->MPPM歌手ID, '1565463'->MPPM歌手ID, '2141459'->MPPM歌手ID]""",
|
||||||
"analysis": """让我们一步一步地思考。在问题“对比一下陈拙悬、孟梅琦、赖媚韵的粉丝数“中,我们被问:
|
"analysis": """让我们一步一步地思考。在问题“对比一下陈拙悬、孟梅琦、赖媚韵的粉丝数“中,我们被问:
|
||||||
“粉丝数”,所以我们需要column=[粉丝数]
|
“粉丝数”,所以我们需要column=[粉丝数]
|
||||||
”陈拙悬、孟梅琦、赖媚韵“,所以我们需要column=[歌手名]
|
”陈拙悬、孟梅琦、赖媚韵“,所以我们需要column=[歌手名]
|
||||||
基于table和columns,可能的cell values 是 = ['陈拙悬', '孟梅琦', '赖媚韵']。""",
|
基于table和columns,可能的cell values 是 = ['陈拙悬', '孟梅琦', '赖媚韵']。""",
|
||||||
"schema_links":"""["粉丝数", "歌手名", "'陈拙悬'", "'孟梅琦'", "'赖媚韵'"]""",
|
"schema_links": """["粉丝数", "歌手名", "'陈拙悬'", "'孟梅琦'", "'赖媚韵'"]""",
|
||||||
"sql":"""select 歌手名, 粉丝数 from 艺人库 where 歌手名 in ('陈拙悬', '孟梅琦', '赖媚韵') and 数据日期 = '2023-05-31' """
|
"sql": """select 歌手名, 粉丝数 from 艺人库 where 歌手名 in ('陈拙悬', '孟梅琦', '赖媚韵') and 数据日期 = '2023-05-31' """,
|
||||||
},
|
},
|
||||||
{ "current_date":"2023-07-31",
|
{
|
||||||
"table_name":"歌曲库",
|
"current_date": "2023-07-31",
|
||||||
"fields_list":"""["歌曲名", "歌曲版本", "歌曲类型", "MPPM歌曲ID", "是否严选窄口径歌曲", "是否严选宽口径歌曲", "是否潮流人歌曲", "超声波歌曲ID", "C音歌曲ID", "C音歌曲MID", "结算播放量", "运营播放量", "分享量", "收藏量", "运营搜播量", "结算搜播量", "拉新用户数", "拉活用户数", "分享率", "结算播放份额", "数据日期"]""",
|
"table_name": "歌曲库",
|
||||||
"question":"播放量大于1万的歌曲有多少",
|
"fields_list": """["歌曲名", "歌曲版本", "歌曲类型", "MPPM歌曲ID", "是否严选窄口径歌曲", "是否严选宽口径歌曲", "是否潮流人歌曲", "超声波歌曲ID", "C音歌曲ID", "C音歌曲MID", "结算播放量", "运营播放量", "分享量", "收藏量", "运营搜播量", "结算搜播量", "拉新用户数", "拉活用户数", "分享率", "结算播放份额", "数据日期"]""",
|
||||||
"prior_schema_links":"""[]""",
|
"question": "播放量大于1万的歌曲有多少",
|
||||||
|
"prior_schema_links": """[]""",
|
||||||
"analysis": """让我们一步一步地思考。在问题“播放量大于1万的歌曲有多少“中,我们被问:
|
"analysis": """让我们一步一步地思考。在问题“播放量大于1万的歌曲有多少“中,我们被问:
|
||||||
“歌曲有多少”,所以我们需要column=[歌曲名]
|
“歌曲有多少”,所以我们需要column=[歌曲名]
|
||||||
”播放量大于1万的“,所以我们需要column=[结算播放量]
|
”播放量大于1万的“,所以我们需要column=[结算播放量]
|
||||||
基于table和columns,可能的cell values 是 = [10000]。""",
|
基于table和columns,可能的cell values 是 = [10000]。""",
|
||||||
"schema_links":"""["歌曲名", "结算播放量", 10000]""",
|
"schema_links": """["歌曲名", "结算播放量", 10000]""",
|
||||||
"sql":"""select 歌曲名 from 歌曲库 where 结算播放量 > 10000 and 数据日期 = '2023-07-31' """
|
"sql": """select 歌曲名 from 歌曲库 where 结算播放量 > 10000 and 数据日期 = '2023-07-31' """,
|
||||||
},
|
},
|
||||||
{ "current_date":"2023-07-31",
|
{
|
||||||
"table_name":"内容库产品",
|
"current_date": "2023-07-31",
|
||||||
"fields_list":"""["用户名", "部门", "模块", "访问时长", "访问次数", "访问人数", "数据日期"]""",
|
"table_name": "内容库产品",
|
||||||
"question":"内容库访问时长小于1小时,且来自美术部的用户是哪些",
|
"fields_list": """["用户名", "部门", "模块", "访问时长", "访问次数", "访问人数", "数据日期"]""",
|
||||||
"prior_schema_links":"""['美术部'->部门]""",
|
"question": "内容库访问时长小于1小时,且来自美术部的用户是哪些",
|
||||||
|
"prior_schema_links": """['美术部'->部门]""",
|
||||||
"analysis": """让我们一步一步地思考。在问题“内容库访问时长小于1小时,且来自美术部的用户是哪些“中,我们被问:
|
"analysis": """让我们一步一步地思考。在问题“内容库访问时长小于1小时,且来自美术部的用户是哪些“中,我们被问:
|
||||||
“用户是哪些”,所以我们需要column=[用户名]
|
“用户是哪些”,所以我们需要column=[用户名]
|
||||||
”美术部的“,所以我们需要column=[部门]
|
”美术部的“,所以我们需要column=[部门]
|
||||||
”访问时长小于1小时“,所以我们需要column=[访问时长]
|
”访问时长小于1小时“,所以我们需要column=[访问时长]
|
||||||
基于table和columns,可能的cell values 是 = ['美术部', 1]。""",
|
基于table和columns,可能的cell values 是 = ['美术部', 1]。""",
|
||||||
"schema_links":"""["用户名", "部门", "访问时长", "'美术部'", 1]""",
|
"schema_links": """["用户名", "部门", "访问时长", "'美术部'", 1]""",
|
||||||
"sql":"""select 用户名 from 内容库产品 where 部门 = '美术部' and 访问时长 < 1 and 数据日期 = '2023-07-31' """
|
"sql": """select 用户名 from 内容库产品 where 部门 = '美术部' and 访问时长 < 1 and 数据日期 = '2023-07-31' """,
|
||||||
},
|
},
|
||||||
{ "current_date":"2023-08-31",
|
{
|
||||||
"table_name":"内容库产品",
|
"current_date": "2023-08-31",
|
||||||
"fields_list":"""["用户名", "部门", "模块", "访问时长", "访问次数", "访问人数", "数据日期"]""",
|
"table_name": "内容库产品",
|
||||||
"question":"内容库pv最高的用户有哪些",
|
"fields_list": """["用户名", "部门", "模块", "访问时长", "访问次数", "访问人数", "数据日期"]""",
|
||||||
"prior_schema_links":"""[]""",
|
"question": "内容库pv最高的用户有哪些",
|
||||||
|
"prior_schema_links": """[]""",
|
||||||
"analysis": """让我们一步一步地思考。在问题“内容库pv最高的用户有哪些“中,我们被问:
|
"analysis": """让我们一步一步地思考。在问题“内容库pv最高的用户有哪些“中,我们被问:
|
||||||
“用户有哪些”,所以我们需要column=[用户名]
|
“用户有哪些”,所以我们需要column=[用户名]
|
||||||
”pv最高的“,所以我们需要column=[访问次数]
|
”pv最高的“,所以我们需要column=[访问次数]
|
||||||
基于table和columns,可能的cell values 是 = []。""",
|
基于table和columns,可能的cell values 是 = []。""",
|
||||||
"schema_links":"""["用户名", "访问次数"]""",
|
"schema_links": """["用户名", "访问次数"]""",
|
||||||
"sql":"""select 用户名 from 内容库产品 where 数据日期 = '2023-08-31' order by 访问次数 desc limit 10 """
|
"sql": """select 用户名 from 内容库产品 where 数据日期 = '2023-08-31' order by 访问次数 desc limit 10 """,
|
||||||
},
|
},
|
||||||
{ "current_date":"2023-08-31",
|
{
|
||||||
"table_name":"艺人库",
|
"current_date": "2023-08-31",
|
||||||
"fields_list":"""["播放量层级", "播放量单调性", "播放量方差", "播放量突增类型", "播放量集中度", "歌手名", "歌手等级", "歌手类型", "歌手来源", "MPPM潮流人等级", "结算播放量", "运营播放量", "历史累计结算播放量", "有播放量歌曲数", "历史累计运营播放量", "付费用户结算播放量", "结算播放量占比", "运营播放份额", "免费用户结算播放占比", "完播量", "数据日期"]""",
|
"table_name": "艺人库",
|
||||||
"question":"近90天袁亚伟播放量平均值是多少",
|
"fields_list": """["播放量层级", "播放量单调性", "播放量方差", "播放量突增类型", "播放量集中度", "歌手名", "歌手等级", "歌手类型", "歌手来源", "MPPM潮流人等级", "结算播放量", "运营播放量", "历史累计结算播放量", "有播放量歌曲数", "历史累计运营播放量", "付费用户结算播放量", "结算播放量占比", "运营播放份额", "免费用户结算播放占比", "完播量", "数据日期"]""",
|
||||||
"prior_schema_links":"""['152789226'->MPPM歌手ID]""",
|
"question": "近90天袁亚伟播放量平均值是多少",
|
||||||
|
"prior_schema_links": """['152789226'->MPPM歌手ID]""",
|
||||||
"analysis": """让我们一步一步地思考。在问题“近90天袁亚伟播放量平均值是多少“中,我们被问:
|
"analysis": """让我们一步一步地思考。在问题“近90天袁亚伟播放量平均值是多少“中,我们被问:
|
||||||
“播放量平均值是多少”,所以我们需要column=[结算播放量]
|
“播放量平均值是多少”,所以我们需要column=[结算播放量]
|
||||||
”袁亚伟“,所以我们需要column=[歌手名]
|
”袁亚伟“,所以我们需要column=[歌手名]
|
||||||
”近90天“,所以我们需要column=[数据日期]
|
”近90天“,所以我们需要column=[数据日期]
|
||||||
基于table和columns,可能的cell values 是 = ['袁亚伟', 90]。""",
|
基于table和columns,可能的cell values 是 = ['袁亚伟', 90]。""",
|
||||||
"schema_links":"""["结算播放量", "歌手名", "数据日期", "'袁亚伟'", 90]""",
|
"schema_links": """["结算播放量", "歌手名", "数据日期", "'袁亚伟'", 90]""",
|
||||||
"sql":"""select avg(结算播放量) from 艺人库 where 歌手名 = '袁亚伟' and datediff('day', 数据日期, '2023-08-31') <= 90 """
|
"sql": """select avg(结算播放量) from 艺人库 where 歌手名 = '袁亚伟' and datediff('day', 数据日期, '2023-08-31') <= 90 """,
|
||||||
},
|
},
|
||||||
{ "current_date":"2023-08-31",
|
{
|
||||||
"table_name":"艺人库",
|
"current_date": "2023-08-31",
|
||||||
"fields_list":"""["播放量层级", "播放量单调性", "播放量方差", "播放量突增类型", "播放量集中度", "歌手名", "歌手等级", "歌手类型", "歌手来源", "MPPM潮流人等级", "结算播放量", "运营播放量", "历史累计结算播放量", "有播放量歌曲数", "历史累计运营播放量", "付费用户结算播放量", "结算播放量占比", "运营播放份额", "免费用户结算播放占比", "完播量", "数据日期"]""",
|
"table_name": "艺人库",
|
||||||
"question":"周倩倩近7天结算播放量总和是多少",
|
"fields_list": """["播放量层级", "播放量单调性", "播放量方差", "播放量突增类型", "播放量集中度", "歌手名", "歌手等级", "歌手类型", "歌手来源", "MPPM潮流人等级", "结算播放量", "运营播放量", "历史累计结算播放量", "有播放量歌曲数", "历史累计运营播放量", "付费用户结算播放量", "结算播放量占比", "运营播放份额", "免费用户结算播放占比", "完播量", "数据日期"]""",
|
||||||
"prior_schema_links":"""['199509'->MPPM歌手ID]""",
|
"question": "周倩倩近7天结算播放量总和是多少",
|
||||||
|
"prior_schema_links": """['199509'->MPPM歌手ID]""",
|
||||||
"analysis": """让我们一步一步地思考。在问题“周倩倩近7天结算播放量总和是多少“中,我们被问:
|
"analysis": """让我们一步一步地思考。在问题“周倩倩近7天结算播放量总和是多少“中,我们被问:
|
||||||
“结算播放量总和是多少”,所以我们需要column=[结算播放量]
|
“结算播放量总和是多少”,所以我们需要column=[结算播放量]
|
||||||
”周倩倩“,所以我们需要column=[歌手名]
|
”周倩倩“,所以我们需要column=[歌手名]
|
||||||
”近7天“,所以我们需要column=[数据日期]
|
”近7天“,所以我们需要column=[数据日期]
|
||||||
基于table和columns,可能的cell values 是 = ['周倩倩', 7]。""",
|
基于table和columns,可能的cell values 是 = ['周倩倩', 7]。""",
|
||||||
"schema_links":"""["结算播放量", "歌手名", "数据日期", "'周倩倩'", 7]""",
|
"schema_links": """["结算播放量", "歌手名", "数据日期", "'周倩倩'", 7]""",
|
||||||
"sql":"""select sum(结算播放量) from 艺人库 where 歌手名 = '周倩倩' and datediff('day', 数据日期, '2023-08-31') <= 7 """
|
"sql": """select sum(结算播放量) from 艺人库 where 歌手名 = '周倩倩' and datediff('day', 数据日期, '2023-08-31') <= 7 """,
|
||||||
},
|
},
|
||||||
{ "current_date":"2023-09-14",
|
{
|
||||||
"table_name":"内容库产品",
|
"current_date": "2023-09-14",
|
||||||
"fields_list":"""["部门", "模块", "用户名", "访问次数", "访问人数", "访问时长", "数据日期"]""",
|
"table_name": "内容库产品",
|
||||||
"question":"内容库访问次数大于1k的部门是哪些",
|
"fields_list": """["部门", "模块", "用户名", "访问次数", "访问人数", "访问时长", "数据日期"]""",
|
||||||
"prior_schema_links":"""[]""",
|
"question": "内容库访问次数大于1k的部门是哪些",
|
||||||
|
"prior_schema_links": """[]""",
|
||||||
"analysis": """让我们一步一步地思考。在问题“内容库访问次数大于1k的部门是哪些“中,我们被问:
|
"analysis": """让我们一步一步地思考。在问题“内容库访问次数大于1k的部门是哪些“中,我们被问:
|
||||||
“部门是哪些”,所以我们需要column=[部门]
|
“部门是哪些”,所以我们需要column=[部门]
|
||||||
”访问次数大于1k的“,所以我们需要column=[访问次数]
|
”访问次数大于1k的“,所以我们需要column=[访问次数]
|
||||||
基于table和columns,可能的cell values 是 = [1000]。""",
|
基于table和columns,可能的cell values 是 = [1000]。""",
|
||||||
"schema_links":"""["部门", "访问次数", 1000]""",
|
"schema_links": """["部门", "访问次数", 1000]""",
|
||||||
"sql":"""select 部门 from 内容库产品 where 访问次数 > 1000 and 数据日期 = '2023-09-14' """
|
"sql": """select 部门 from 内容库产品 where 访问次数 > 1000 and 数据日期 = '2023-09-14' """,
|
||||||
},
|
},
|
||||||
{ "current_date":"2023-09-18",
|
{
|
||||||
"table_name":"歌曲库",
|
"current_date": "2023-09-18",
|
||||||
"fields_list":"""["歌曲名", "MPPM歌手ID", "歌曲版本", "歌曲类型", "MPPM歌曲ID", "是否严选窄口径歌曲", "是否严选宽口径歌曲", "是否潮流人歌曲", "超声波歌曲ID", "C音歌曲ID", "C音歌曲MID", "结算播放量", "运营播放量", "分享量", "收藏量", "运营搜播量", "结算搜播量", "拉新用户数", "拉活用户数", "分享率", "结算播放份额", "数据日期"]""",
|
"table_name": "歌曲库",
|
||||||
"question":"陈亿训唱的所有的播放量大于20k的孤勇者有哪些",
|
"fields_list": """["歌曲名", "MPPM歌手ID", "歌曲版本", "歌曲类型", "MPPM歌曲ID", "是否严选窄口径歌曲", "是否严选宽口径歌曲", "是否潮流人歌曲", "超声波歌曲ID", "C音歌曲ID", "C音歌曲MID", "结算播放量", "运营播放量", "分享量", "收藏量", "运营搜播量", "结算搜播量", "拉新用户数", "拉活用户数", "分享率", "结算播放份额", "数据日期"]""",
|
||||||
"prior_schema_links":"""['199509'->MPPM歌手ID, '1527123'->MPPM歌曲ID]""",
|
"question": "陈亿训唱的所有的播放量大于20k的孤勇者有哪些",
|
||||||
|
"prior_schema_links": """['199509'->MPPM歌手ID, '1527123'->MPPM歌曲ID]""",
|
||||||
"analysis": """让我们一步一步地思考。在问题“陈亿训唱的所有的播放量大于20k的孤勇者有哪些“中,我们被问:
|
"analysis": """让我们一步一步地思考。在问题“陈亿训唱的所有的播放量大于20k的孤勇者有哪些“中,我们被问:
|
||||||
“孤勇者有哪些”,所以我们需要column=[歌曲名]
|
“孤勇者有哪些”,所以我们需要column=[歌曲名]
|
||||||
”播放量大于20k的“,所以我们需要column=[结算播放量]
|
”播放量大于20k的“,所以我们需要column=[结算播放量]
|
||||||
”陈亿训唱的“,所以我们需要column=[歌手名]
|
”陈亿训唱的“,所以我们需要column=[歌手名]
|
||||||
基于table和columns,可能的cell values 是 = [20000, '陈亿训', '孤勇者']。""",
|
基于table和columns,可能的cell values 是 = [20000, '陈亿训', '孤勇者']。""",
|
||||||
"schema_links":"""["歌曲名", "结算播放量", "歌手名", 20000, "'陈亿训'", "'孤勇者'"]""",
|
"schema_links": """["歌曲名", "结算播放量", "歌手名", 20000, "'陈亿训'", "'孤勇者'"]""",
|
||||||
"sql":"""select 歌曲名 from 歌曲库 where 结算播放量 > 20000 and 歌手名 = '陈亿训' and 歌曲名 = '孤勇者' and 数据日期 = '2023-09-18' """
|
"sql": """select 歌曲名 from 歌曲库 where 结算播放量 > 20000 and 歌手名 = '陈亿训' and 歌曲名 = '孤勇者' and 数据日期 = '2023-09-18' """,
|
||||||
},
|
},
|
||||||
{ "current_date":"2023-09-18",
|
{
|
||||||
"table_name":"歌曲库",
|
"current_date": "2023-09-18",
|
||||||
"fields_list":"""["歌曲名", "歌曲版本", "歌手名", "歌曲类型", "发布时间", "MPPM歌曲ID", "是否严选窄口径歌曲", "是否严选宽口径歌曲", "是否潮流人歌曲", "超声波歌曲ID", "C音歌曲ID", "C音歌曲MID", "结算播放量", "运营播放量", "分享量", "收藏量", "运营搜播量", "结算搜播量", "拉新用户数", "拉活用户数", "分享率", "结算播放份额", "数据日期"]""",
|
"table_name": "歌曲库",
|
||||||
"question":"周洁轮去年发布的歌曲有哪些",
|
"fields_list": """["歌曲名", "歌曲版本", "歌手名", "歌曲类型", "发布时间", "MPPM歌曲ID", "是否严选窄口径歌曲", "是否严选宽口径歌曲", "是否潮流人歌曲", "超声波歌曲ID", "C音歌曲ID", "C音歌曲MID", "结算播放量", "运营播放量", "分享量", "收藏量", "运营搜播量", "结算搜播量", "拉新用户数", "拉活用户数", "分享率", "结算播放份额", "数据日期"]""",
|
||||||
"prior_schema_links":"""['23109'->MPPM歌手ID]""",
|
"question": "周洁轮去年发布的歌曲有哪些",
|
||||||
|
"prior_schema_links": """['23109'->MPPM歌手ID]""",
|
||||||
"analysis": """让我们一步一步地思考。在问题“周洁轮去年发布的歌曲有哪些“中,我们被问:
|
"analysis": """让我们一步一步地思考。在问题“周洁轮去年发布的歌曲有哪些“中,我们被问:
|
||||||
“歌曲有哪些”,所以我们需要column=[歌曲名]
|
“歌曲有哪些”,所以我们需要column=[歌曲名]
|
||||||
”去年发布的“,所以我们需要column=[发布时间]
|
”去年发布的“,所以我们需要column=[发布时间]
|
||||||
”周洁轮“,所以我们需要column=[歌手名]
|
”周洁轮“,所以我们需要column=[歌手名]
|
||||||
基于table和columns,可能的cell values 是 = ['周洁轮', 1]。""",
|
基于table和columns,可能的cell values 是 = ['周洁轮', 1]。""",
|
||||||
"schema_links":"""["歌曲名", "发布时间", "歌手名", 1, "'周洁轮'"]""",
|
"schema_links": """["歌曲名", "发布时间", "歌手名", 1, "'周洁轮'"]""",
|
||||||
"sql":"""select 歌曲名 from 歌曲库 where datediff('year', 发布时间, '2023-09-18') <= 1 and 歌手名 = '周洁轮' and 数据日期 = '2023-09-18' """
|
"sql": """select 歌曲名 from 歌曲库 where datediff('year', 发布时间, '2023-09-18') <= 1 and 歌手名 = '周洁轮' and 数据日期 = '2023-09-18' """,
|
||||||
},
|
},
|
||||||
{ "current_date":"2023-09-11",
|
{
|
||||||
"table_name":"艺人库",
|
"current_date": "2023-09-11",
|
||||||
"fields_list":"""["播放量层级", "播放量单调性", "播放量方差", "播放量突增类型", "播放量集中度", "歌手名", "歌手等级", "歌手类型", "歌手来源", "签约日期", "MPPM潮流人等级", "结算播放量", "运营播放量", "历史累计结算播放量", "有播放量歌曲数", "历史累计运营播放量", "付费用户结算播放量", "结算播放量占比", "运营播放份额", "免费用户结算播放占比", "完播量", "数据日期"]""",
|
"table_name": "艺人库",
|
||||||
"question":"我想要近半年签约的播放量前十的歌手有哪些",
|
"fields_list": """["播放量层级", "播放量单调性", "播放量方差", "播放量突增类型", "播放量集中度", "歌手名", "歌手等级", "歌手类型", "歌手来源", "签约日期", "MPPM潮流人等级", "结算播放量", "运营播放量", "历史累计结算播放量", "有播放量歌曲数", "历史累计运营播放量", "付费用户结算播放量", "结算播放量占比", "运营播放份额", "免费用户结算播放占比", "完播量", "数据日期"]""",
|
||||||
"prior_schema_links":"""[]""",
|
"question": "我想要近半年签约的播放量前十的歌手有哪些",
|
||||||
|
"prior_schema_links": """[]""",
|
||||||
"analysis": """让我们一步一步地思考。在问题“我想要近半年签约的播放量前十的歌手“中,我们被问:
|
"analysis": """让我们一步一步地思考。在问题“我想要近半年签约的播放量前十的歌手“中,我们被问:
|
||||||
“歌手有哪些”,所以我们需要column=[歌手名]
|
“歌手有哪些”,所以我们需要column=[歌手名]
|
||||||
”播放量前十的“,所以我们需要column=[结算播放量]
|
”播放量前十的“,所以我们需要column=[结算播放量]
|
||||||
”近半年签约的“,所以我们需要column=[签约日期]
|
”近半年签约的“,所以我们需要column=[签约日期]
|
||||||
基于table和columns,可能的cell values 是 = [0.5, 10]。""",
|
基于table和columns,可能的cell values 是 = [0.5, 10]。""",
|
||||||
"schema_links":"""["歌手名", "结算播放量", "签约日期", 0.5, 10]""",
|
"schema_links": """["歌手名", "结算播放量", "签约日期", 0.5, 10]""",
|
||||||
"sql":"""select 歌手名 from 艺人库 where datediff('year', 签约日期, '2023-09-11') <= 0.5 and 数据日期 = '2023-09-11' order by 结算播放量 desc limit 10"""
|
"sql": """select 歌手名 from 艺人库 where datediff('year', 签约日期, '2023-09-11') <= 0.5 and 数据日期 = '2023-09-11' order by 结算播放量 desc limit 10""",
|
||||||
},
|
},
|
||||||
{ "current_date":"2023-08-12",
|
{
|
||||||
"table_name":"歌曲库",
|
"current_date": "2023-08-12",
|
||||||
|
"table_name": "歌曲库",
|
||||||
"fields_list": """["发行日期", "歌曲语言", "歌曲来源", "歌曲流派", "歌曲名", "歌曲版本", "歌曲类型", "发行时间", "数据日期"]""",
|
"fields_list": """["发行日期", "歌曲语言", "歌曲来源", "歌曲流派", "歌曲名", "歌曲版本", "歌曲类型", "发行时间", "数据日期"]""",
|
||||||
"question":"最近一年发行的歌曲中,有哪些在近7天播放超过一千万的",
|
"question": "最近一年发行的歌曲中,有哪些在近7天播放超过一千万的",
|
||||||
"prior_schema_links":"""[]""",
|
"prior_schema_links": """[]""",
|
||||||
"analysis": """让我们一步一步地思考。在问题“最近一年发行的歌曲中,有哪些在近7天播放超过一千万的“中,我们被问:
|
"analysis": """让我们一步一步地思考。在问题“最近一年发行的歌曲中,有哪些在近7天播放超过一千万的“中,我们被问:
|
||||||
“发行的歌曲中,有哪些”,所以我们需要column=[歌曲名]
|
“发行的歌曲中,有哪些”,所以我们需要column=[歌曲名]
|
||||||
”最近一年发行的“,所以我们需要column=[发行日期]
|
”最近一年发行的“,所以我们需要column=[发行日期]
|
||||||
”在近7天播放超过一千万的“,所以我们需要column=[数据日期, 结算播放量]
|
”在近7天播放超过一千万的“,所以我们需要column=[数据日期, 结算播放量]
|
||||||
基于table和columns,可能的cell values 是 = [1, 10000000]""",
|
基于table和columns,可能的cell values 是 = [1, 10000000]""",
|
||||||
"schema_links":"""["歌曲名", "发行日期", "数据日期", "结算播放量", 1, 10000000]""",
|
"schema_links": """["歌曲名", "发行日期", "数据日期", "结算播放量", 1, 10000000]""",
|
||||||
"sql":"""select 歌曲名 from 歌曲库 where datediff('year', 发行日期, '2023-08-12') <= 1 and datediff('day', 数据日期, '2023-08-12') <= 7 and 结算播放量 > 10000000"""
|
"sql": """select 歌曲名 from 歌曲库 where datediff('year', 发行日期, '2023-08-12') <= 1 and datediff('day', 数据日期, '2023-08-12') <= 7 and 结算播放量 > 10000000""",
|
||||||
},
|
},
|
||||||
{ "current_date":"2023-08-12",
|
{
|
||||||
"table_name":"歌曲库",
|
"current_date": "2023-08-12",
|
||||||
|
"table_name": "歌曲库",
|
||||||
"fields_list": """["发行日期", "歌曲语言", "歌曲来源", "歌曲流派", "歌曲名", "歌曲版本", "歌曲类型", "发行时间", "数据日期"]""",
|
"fields_list": """["发行日期", "歌曲语言", "歌曲来源", "歌曲流派", "歌曲名", "歌曲版本", "歌曲类型", "发行时间", "数据日期"]""",
|
||||||
"question":"今年以来发行的歌曲中,有哪些在近7天播放超过一千万的",
|
"question": "今年以来发行的歌曲中,有哪些在近7天播放超过一千万的",
|
||||||
"prior_schema_links":"""[]""",
|
"prior_schema_links": """[]""",
|
||||||
"analysis": """让我们一步一步地思考。在问题“今年以来发行的歌曲中,有哪些在近7天播放超过一千万的“中,我们被问:
|
"analysis": """让我们一步一步地思考。在问题“今年以来发行的歌曲中,有哪些在近7天播放超过一千万的“中,我们被问:
|
||||||
“发行的歌曲中,有哪些”,所以我们需要column=[歌曲名]
|
“发行的歌曲中,有哪些”,所以我们需要column=[歌曲名]
|
||||||
”今年以来发行的“,所以我们需要column=[发行日期]
|
”今年以来发行的“,所以我们需要column=[发行日期]
|
||||||
”在近7天播放超过一千万的“,所以我们需要column=[数据日期, 结算播放量]
|
”在近7天播放超过一千万的“,所以我们需要column=[数据日期, 结算播放量]
|
||||||
基于table和columns,可能的cell values 是 = [0, 7, 10000000]""",
|
基于table和columns,可能的cell values 是 = [0, 7, 10000000]""",
|
||||||
"schema_links":"""["歌曲名", "发行日期", "数据日期", "结算播放量", 0, 7, 10000000]""",
|
"schema_links": """["歌曲名", "发行日期", "数据日期", "结算播放量", 0, 7, 10000000]""",
|
||||||
"sql":"""select 歌曲名 from 歌曲库 where datediff('year', 发行日期, '2023-08-12') <= 0 and datediff('day', 数据日期, '2023-08-12') <= 7 and 结算播放量 > 10000000"""
|
"sql": """select 歌曲名 from 歌曲库 where datediff('year', 发行日期, '2023-08-12') <= 0 and datediff('day', 数据日期, '2023-08-12') <= 7 and 结算播放量 > 10000000""",
|
||||||
},
|
},
|
||||||
{ "current_date":"2023-08-12",
|
{
|
||||||
"table_name":"歌曲库",
|
"current_date": "2023-08-12",
|
||||||
|
"table_name": "歌曲库",
|
||||||
"fields_list": """["发行日期", "歌曲语言", "歌曲来源", "歌曲流派", "歌曲名", "歌曲版本", "歌曲类型", "发行时间", "数据日期"]""",
|
"fields_list": """["发行日期", "歌曲语言", "歌曲来源", "歌曲流派", "歌曲名", "歌曲版本", "歌曲类型", "发行时间", "数据日期"]""",
|
||||||
"question":"2023年以来发行的歌曲中,有哪些在近7天播放超过一千万的",
|
"question": "2023年以来发行的歌曲中,有哪些在近7天播放超过一千万的",
|
||||||
"prior_schema_links":"""['514129144'->MPPM歌曲ID]""",
|
"prior_schema_links": """['514129144'->MPPM歌曲ID]""",
|
||||||
"analysis": """让我们一步一步地思考。在问题“2023年以来发行的歌曲中,有哪些在近7天播放超过一千万的“中,我们被问:
|
"analysis": """让我们一步一步地思考。在问题“2023年以来发行的歌曲中,有哪些在近7天播放超过一千万的“中,我们被问:
|
||||||
“发行的歌曲中,有哪些”,所以我们需要column=[歌曲名]
|
“发行的歌曲中,有哪些”,所以我们需要column=[歌曲名]
|
||||||
”2023年以来发行的“,所以我们需要column=[发行日期]
|
”2023年以来发行的“,所以我们需要column=[发行日期]
|
||||||
”在近7天播放超过一千万的“,所以我们需要column=[数据日期, 结算播放量]
|
”在近7天播放超过一千万的“,所以我们需要column=[数据日期, 结算播放量]
|
||||||
基于table和columns,可能的cell values 是 = [2023, 7, 10000000]""",
|
基于table和columns,可能的cell values 是 = [2023, 7, 10000000]""",
|
||||||
"schema_links":"""["歌曲名", "发行日期", "数据日期", "结算播放量", 2023, 7, 10000000]""",
|
"schema_links": """["歌曲名", "发行日期", "数据日期", "结算播放量", 2023, 7, 10000000]""",
|
||||||
"sql":"""select 歌曲名 from 歌曲库 where YEAR(发行日期) >= 2023 and datediff('day', 数据日期, '2023-08-12') <= 7 and 结算播放量 > 10000000"""
|
"sql": """select 歌曲名 from 歌曲库 where YEAR(发行日期) >= 2023 and datediff('day', 数据日期, '2023-08-12') <= 7 and 结算播放量 > 10000000""",
|
||||||
},
|
},
|
||||||
{ "current_date":"2023-08-01",
|
{
|
||||||
"table_name":"歌曲库",
|
"current_date": "2023-08-01",
|
||||||
"fields_list":"""["歌曲名", "歌曲版本", "歌手名", "歌曲类型", "发布时间", "MPPM歌曲ID", "是否严选窄口径歌曲", "是否严选宽口径歌曲", "是否潮流人歌曲", "超声波歌曲ID", "C音歌曲ID", "C音歌曲MID", "结算播放量", "运营播放量", "分享量", "收藏量", "运营搜播量", "结算搜播量", "拉新用户数", "拉活用户数", "分享率", "结算播放份额", "数据日期"]""",
|
"table_name": "歌曲库",
|
||||||
"question":"周洁轮2023年6月之后发布的歌曲有哪些",
|
"fields_list": """["歌曲名", "歌曲版本", "歌手名", "歌曲类型", "发布时间", "MPPM歌曲ID", "是否严选窄口径歌曲", "是否严选宽口径歌曲", "是否潮流人歌曲", "超声波歌曲ID", "C音歌曲ID", "C音歌曲MID", "结算播放量", "运营播放量", "分享量", "收藏量", "运营搜播量", "结算搜播量", "拉新用户数", "拉活用户数", "分享率", "结算播放份额", "数据日期"]""",
|
||||||
"prior_schema_links":"""['23109'->MPPM歌手ID]""",
|
"question": "周洁轮2023年6月之后发布的歌曲有哪些",
|
||||||
|
"prior_schema_links": """['23109'->MPPM歌手ID]""",
|
||||||
"analysis": """让我们一步一步地思考。在问题“周洁轮2023年6月之后发布的歌曲有哪些“中,我们被问:
|
"analysis": """让我们一步一步地思考。在问题“周洁轮2023年6月之后发布的歌曲有哪些“中,我们被问:
|
||||||
“歌曲有哪些”,所以我们需要column=[歌曲名]
|
“歌曲有哪些”,所以我们需要column=[歌曲名]
|
||||||
”2023年6月之后发布的“,所以我们需要column=[发布时间]
|
”2023年6月之后发布的“,所以我们需要column=[发布时间]
|
||||||
”周洁轮“,所以我们需要column=[歌手名]
|
”周洁轮“,所以我们需要column=[歌手名]
|
||||||
基于table和columns,可能的cell values 是 = ['周洁轮', 2023, 6]。""",
|
基于table和columns,可能的cell values 是 = ['周洁轮', 2023, 6]。""",
|
||||||
"schema_links":"""["歌曲名", "发布时间", "歌手名", "周洁轮", 2023, 6]""",
|
"schema_links": """["歌曲名", "发布时间", "歌手名", "周洁轮", 2023, 6]""",
|
||||||
"sql":"""select 歌曲名 from 歌曲库 where YEAR(发布时间) >= 2023 and MONTH(发布时间) >= 6 and 歌手名 = '周洁轮' and 数据日期 = '2023-08-01' """
|
"sql": """select 歌曲名 from 歌曲库 where YEAR(发布时间) >= 2023 and MONTH(发布时间) >= 6 and 歌手名 = '周洁轮' and 数据日期 = '2023-08-01' """,
|
||||||
},
|
},
|
||||||
{ "current_date":"2023-08-01",
|
{
|
||||||
"table_name":"歌曲库",
|
"current_date": "2023-08-01",
|
||||||
"fields_list":"""["歌曲名", "歌曲版本", "歌手名", "歌曲类型", "发布时间", "MPPM歌曲ID", "是否严选窄口径歌曲", "是否严选宽口径歌曲", "是否潮流人歌曲", "超声波歌曲ID", "C音歌曲ID", "C音歌曲MID", "结算播放量", "运营播放量", "分享量", "收藏量", "运营搜播量", "结算搜播量", "拉新用户数", "拉活用户数", "分享率", "结算播放份额", "数据日期"]""",
|
"table_name": "歌曲库",
|
||||||
"question":"邓梓琦在2023年1月5日之后发布的歌曲中,有哪些播放量大于500W的?",
|
"fields_list": """["歌曲名", "歌曲版本", "歌手名", "歌曲类型", "发布时间", "MPPM歌曲ID", "是否严选窄口径歌曲", "是否严选宽口径歌曲", "是否潮流人歌曲", "超声波歌曲ID", "C音歌曲ID", "C音歌曲MID", "结算播放量", "运营播放量", "分享量", "收藏量", "运营搜播量", "结算搜播量", "拉新用户数", "拉活用户数", "分享率", "结算播放份额", "数据日期"]""",
|
||||||
"prior_schema_links":"""['2312311'->MPPM歌手ID]""",
|
"question": "邓梓琦在2023年1月5日之后发布的歌曲中,有哪些播放量大于500W的?",
|
||||||
|
"prior_schema_links": """['2312311'->MPPM歌手ID]""",
|
||||||
"analysis": """让我们一步一步地思考。在问题“邓梓琦在2023年1月5日之后发布的歌曲中,有哪些播放量大于500W的?“中,我们被问:
|
"analysis": """让我们一步一步地思考。在问题“邓梓琦在2023年1月5日之后发布的歌曲中,有哪些播放量大于500W的?“中,我们被问:
|
||||||
“播放量大于500W的”,所以我们需要column=[结算播放量]
|
“播放量大于500W的”,所以我们需要column=[结算播放量]
|
||||||
”邓梓琦在2023年1月5日之后发布的“,所以我们需要column=[发布时间]
|
”邓梓琦在2023年1月5日之后发布的“,所以我们需要column=[发布时间]
|
||||||
”邓梓琦“,所以我们需要column=[歌手名]
|
”邓梓琦“,所以我们需要column=[歌手名]
|
||||||
基于table和columns,可能的cell values 是 = ['邓梓琦', 2023, 1, 5, 5000000]。""",
|
基于table和columns,可能的cell values 是 = ['邓梓琦', 2023, 1, 5, 5000000]。""",
|
||||||
"schema_links":"""["结算播放量", "发布时间", "歌手名", "邓梓琦", 2023, 1, 5, 5000000]""",
|
"schema_links": """["结算播放量", "发布时间", "歌手名", "邓梓琦", 2023, 1, 5, 5000000]""",
|
||||||
"sql":"""select 歌曲名 from 歌曲库 where YEAR(发布时间) >= 2023 and MONTH(发布时间) >= 1 and DAY(发布时间) >= 5 and 歌手名 = '邓梓琦' and 结算播放量 > 5000000 and 数据日期 = '2023-08-01'"""
|
"sql": """select 歌曲名 from 歌曲库 where YEAR(发布时间) >= 2023 and MONTH(发布时间) >= 1 and DAY(发布时间) >= 5 and 歌手名 = '邓梓琦' and 结算播放量 > 5000000 and 数据日期 = '2023-08-01'""",
|
||||||
},
|
},
|
||||||
{ "current_date":"2023-09-17",
|
{
|
||||||
"table_name":"歌曲库",
|
"current_date": "2023-09-17",
|
||||||
"fields_list":"""["歌曲名", "歌曲版本", "歌手名", "歌曲类型", "发布时间", "MPPM歌曲ID", "是否严选窄口径歌曲", "是否严选宽口径歌曲", "是否潮流人歌曲", "超声波歌曲ID", "C音歌曲ID", "C音歌曲MID", "结算播放量", "运营播放量", "分享量", "收藏量", "运营搜播量", "结算搜播量", "拉新用户数", "拉活用户数", "分享率", "结算播放份额", "数据日期"]""",
|
"table_name": "歌曲库",
|
||||||
"question":"2023年6月以后,张亮英播放量大于200万的歌曲有哪些?",
|
"fields_list": """["歌曲名", "歌曲版本", "歌手名", "歌曲类型", "发布时间", "MPPM歌曲ID", "是否严选窄口径歌曲", "是否严选宽口径歌曲", "是否潮流人歌曲", "超声波歌曲ID", "C音歌曲ID", "C音歌曲MID", "结算播放量", "运营播放量", "分享量", "收藏量", "运营搜播量", "结算搜播量", "拉新用户数", "拉活用户数", "分享率", "结算播放份额", "数据日期"]""",
|
||||||
"prior_schema_links":"""['45453'->MPPM歌手ID]""",
|
"question": "2023年6月以后,张亮英播放量大于200万的歌曲有哪些?",
|
||||||
|
"prior_schema_links": """['45453'->MPPM歌手ID]""",
|
||||||
"analysis": """让我们一步一步地思考。在问题“2023年6月以后,张亮英播放量大于200万的歌曲有哪些?“中,我们被问:
|
"analysis": """让我们一步一步地思考。在问题“2023年6月以后,张亮英播放量大于200万的歌曲有哪些?“中,我们被问:
|
||||||
“播放量大于200万的”,所以我们需要column=[结算播放量]
|
“播放量大于200万的”,所以我们需要column=[结算播放量]
|
||||||
”2023年6月以后,张亮英“,所以我们需要column=[数据日期, 歌手名]
|
”2023年6月以后,张亮英“,所以我们需要column=[数据日期, 歌手名]
|
||||||
”歌曲有哪些“,所以我们需要column=[歌曲名]
|
”歌曲有哪些“,所以我们需要column=[歌曲名]
|
||||||
基于table和columns,可能的cell values 是 = ['张亮英', 2023, 6, 2000000]。""",
|
基于table和columns,可能的cell values 是 = ['张亮英', 2023, 6, 2000000]。""",
|
||||||
"schema_links":"""["结算播放量", "数据日期", "歌手名", "张亮英", 2023, 6, 2000000]""",
|
"schema_links": """["结算播放量", "数据日期", "歌手名", "张亮英", 2023, 6, 2000000]""",
|
||||||
"sql":"""select 歌曲名 from 歌曲库 where YEAR(数据日期) >= 2023 and MONTH(数据日期) >= 6 and 歌手名 = '张亮英' and 结算播放量 > 2000000 """
|
"sql": """select 歌曲名 from 歌曲库 where YEAR(数据日期) >= 2023 and MONTH(数据日期) >= 6 and 歌手名 = '张亮英' and 结算播放量 > 2000000 """,
|
||||||
},
|
},
|
||||||
{ "current_date":"2023-08-16",
|
{
|
||||||
"table_name":"歌曲库",
|
"current_date": "2023-08-16",
|
||||||
"fields_list":"""["歌曲名", "歌曲版本", "歌手名", "歌曲类型", "发布时间", "MPPM歌曲ID", "是否严选窄口径歌曲", "是否严选宽口径歌曲", "是否潮流人歌曲", "超声波歌曲ID", "C音歌曲ID", "C音歌曲MID", "结算播放量", "运营播放量", "分享量", "收藏量", "运营搜播量", "结算搜播量", "拉新用户数", "拉活用户数", "分享率", "结算播放份额", "数据日期"]""",
|
"table_name": "歌曲库",
|
||||||
"question":"2021年6月以后发布的李雨纯的播放量大于20万的歌曲有哪些",
|
"fields_list": """["歌曲名", "歌曲版本", "歌手名", "歌曲类型", "发布时间", "MPPM歌曲ID", "是否严选窄口径歌曲", "是否严选宽口径歌曲", "是否潮流人歌曲", "超声波歌曲ID", "C音歌曲ID", "C音歌曲MID", "结算播放量", "运营播放量", "分享量", "收藏量", "运营搜播量", "结算搜播量", "拉新用户数", "拉活用户数", "分享率", "结算播放份额", "数据日期"]""",
|
||||||
"prior_schema_links":"""['23109'->MPPM歌手ID]""",
|
"question": "2021年6月以后发布的李雨纯的播放量大于20万的歌曲有哪些",
|
||||||
|
"prior_schema_links": """['23109'->MPPM歌手ID]""",
|
||||||
"analysis": """让我们一步一步地思考。在问题“2021年6月以后发布的李雨纯的播放量大于20万的歌曲有哪些“中,我们被问:
|
"analysis": """让我们一步一步地思考。在问题“2021年6月以后发布的李雨纯的播放量大于20万的歌曲有哪些“中,我们被问:
|
||||||
“播放量大于20万的”,所以我们需要column=[结算播放量]
|
“播放量大于20万的”,所以我们需要column=[结算播放量]
|
||||||
”2021年6月以后发布的“,所以我们需要column=[发布时间]
|
”2021年6月以后发布的“,所以我们需要column=[发布时间]
|
||||||
”李雨纯“,所以我们需要column=[歌手名]
|
”李雨纯“,所以我们需要column=[歌手名]
|
||||||
基于table和columns,可能的cell values 是 = ['李雨纯', 2021, 6, 200000]。""",
|
基于table和columns,可能的cell values 是 = ['李雨纯', 2021, 6, 200000]。""",
|
||||||
"schema_links":"""["结算播放量", "发布时间", "歌手名", "李雨纯", 2021, 6, 200000]""",
|
"schema_links": """["结算播放量", "发布时间", "歌手名", "李雨纯", 2021, 6, 200000]""",
|
||||||
"sql":"""select 歌曲名 from 歌曲库 where YEAR(发布时间) >= 2021 and MONTH(发布时间) >= 6 and 歌手名 = '李雨纯' and 结算播放量 > 200000 and 数据日期 = '2023-08-16'"""
|
"sql": """select 歌曲名 from 歌曲库 where YEAR(发布时间) >= 2021 and MONTH(发布时间) >= 6 and 歌手名 = '李雨纯' and 结算播放量 > 200000 and 数据日期 = '2023-08-16'""",
|
||||||
},
|
},
|
||||||
{ "current_date":"2023-08-16",
|
{
|
||||||
"table_name":"歌曲库",
|
"current_date": "2023-08-16",
|
||||||
"fields_list":"""["歌曲名", "歌曲版本", "歌手名", "歌曲类型", "发布时间", "MPPM歌曲ID", "是否严选窄口径歌曲", "是否严选宽口径歌曲", "是否潮流人歌曲", "超声波歌曲ID", "C音歌曲ID", "C音歌曲MID", "结算播放量", "运营播放量", "分享量", "收藏量", "运营搜播量", "结算搜播量", "拉新用户数", "拉活用户数", "分享率", "结算播放份额", "数据日期"]""",
|
"table_name": "歌曲库",
|
||||||
"question":"刘锝桦在1992年4月2日到2020年5月2日之间发布的播放量大于20万的歌曲有哪些",
|
"fields_list": """["歌曲名", "歌曲版本", "歌手名", "歌曲类型", "发布时间", "MPPM歌曲ID", "是否严选窄口径歌曲", "是否严选宽口径歌曲", "是否潮流人歌曲", "超声波歌曲ID", "C音歌曲ID", "C音歌曲MID", "结算播放量", "运营播放量", "分享量", "收藏量", "运营搜播量", "结算搜播量", "拉新用户数", "拉活用户数", "分享率", "结算播放份额", "数据日期"]""",
|
||||||
"prior_schema_links":"""['4234234'->MPPM歌手ID]""",
|
"question": "刘锝桦在1992年4月2日到2020年5月2日之间发布的播放量大于20万的歌曲有哪些",
|
||||||
|
"prior_schema_links": """['4234234'->MPPM歌手ID]""",
|
||||||
"analysis": """让我们一步一步地思考。在问题“刘锝桦在1992年4月2日到2020年5月2日之间发布的播放量大于20万的歌曲有哪些“中,我们被问:
|
"analysis": """让我们一步一步地思考。在问题“刘锝桦在1992年4月2日到2020年5月2日之间发布的播放量大于20万的歌曲有哪些“中,我们被问:
|
||||||
“播放量大于20万的”,所以我们需要column=[结算播放量]
|
“播放量大于20万的”,所以我们需要column=[结算播放量]
|
||||||
”1992年4月2日到2020年5月2日之间发布的“,所以我们需要column=[发布时间]
|
”1992年4月2日到2020年5月2日之间发布的“,所以我们需要column=[发布时间]
|
||||||
”刘锝桦“,所以我们需要column=[歌手名]
|
”刘锝桦“,所以我们需要column=[歌手名]
|
||||||
基于table和columns,可能的cell values 是 = ['刘锝桦', 1992, 4, 2, 2020, 5, 2, 200000]。""",
|
基于table和columns,可能的cell values 是 = ['刘锝桦', 1992, 4, 2, 2020, 5, 2, 200000]。""",
|
||||||
"schema_links":"""["结算播放量", "发布时间", "歌手名", "刘锝桦", 1992, 4, 2, 2020, 5, 2, 200000]""",
|
"schema_links": """["结算播放量", "发布时间", "歌手名", "刘锝桦", 1992, 4, 2, 2020, 5, 2, 200000]""",
|
||||||
"sql":"""select 歌曲名 from 歌曲库 where YEAR(发布时间) >= 1992 and MONTH(发布时间) >= 4 and DAY(发布时间) >= 2 and YEAR(发布时间) <= 2020 and MONTH(发布时间) <= 5 and DAY(发布时间) <= 2 and 歌手名 = '刘锝桦' and 结算播放量 > 200000 and 数据日期 = '2023-08-16'"""
|
"sql": """select 歌曲名 from 歌曲库 where YEAR(发布时间) >= 1992 and MONTH(发布时间) >= 4 and DAY(发布时间) >= 2 and YEAR(发布时间) <= 2020 and MONTH(发布时间) <= 5 and DAY(发布时间) <= 2 and 歌手名 = '刘锝桦' and 结算播放量 > 200000 and 数据日期 = '2023-08-16'""",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"current_date":"2023-09-04",
|
"current_date": "2023-09-04",
|
||||||
"table_name":"内容库产品",
|
"table_name": "内容库产品",
|
||||||
"fields_list":"""["用户名", "部门", "模块", "访问时长", "访问次数", "访问人数", "数据日期"]""",
|
"fields_list": """["用户名", "部门", "模块", "访问时长", "访问次数", "访问人数", "数据日期"]""",
|
||||||
"question":"内容库近30天访问次数的平均数",
|
"question": "内容库近30天访问次数的平均数",
|
||||||
"prior_schema_links":"""[]""",
|
"prior_schema_links": """[]""",
|
||||||
"analysis": """让我们一步一步地思考。在问题“内容库近30天访问次数的平均数“中,我们被问:
|
"analysis": """让我们一步一步地思考。在问题“内容库近30天访问次数的平均数“中,我们被问:
|
||||||
“访问次数的平均数”,所以我们需要column=[访问次数]
|
“访问次数的平均数”,所以我们需要column=[访问次数]
|
||||||
”内容库近30天“,所以我们需要column=[数据日期]
|
”内容库近30天“,所以我们需要column=[数据日期]
|
||||||
基于table和columns,可能的cell values 是 = [30]。""",
|
基于table和columns,可能的cell values 是 = [30]。""",
|
||||||
"schema_links":"""["访问次数", "数据日期", 30]""",
|
"schema_links": """["访问次数", "数据日期", 30]""",
|
||||||
"sql":"""select avg(访问次数) from 内容库产品 where datediff('day', 数据日期, '2023-09-04') <= 30 """
|
"sql": """select avg(访问次数) from 内容库产品 where datediff('day', 数据日期, '2023-09-04') <= 30 """,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"current_date":"2023-09-04",
|
"current_date": "2023-09-04",
|
||||||
"table_name":"内容库产品",
|
"table_name": "内容库产品",
|
||||||
"fields_list":"""["用户名", "部门", "模块", "访问时长", "访问次数", "访问人数", "数据日期"]""",
|
"fields_list": """["用户名", "部门", "模块", "访问时长", "访问次数", "访问人数", "数据日期"]""",
|
||||||
"question":"内容库近半年哪个月的访问次数汇总最高",
|
"question": "内容库近半年哪个月的访问次数汇总最高",
|
||||||
"prior_schema_links":"""[]""",
|
"prior_schema_links": """[]""",
|
||||||
"analysis": """让我们一步一步地思考。在问题“内容库近半年哪个月的访问次数汇总最高“中,我们被问:
|
"analysis": """让我们一步一步地思考。在问题“内容库近半年哪个月的访问次数汇总最高“中,我们被问:
|
||||||
“访问次数汇总最高”,所以我们需要column=[访问次数]
|
“访问次数汇总最高”,所以我们需要column=[访问次数]
|
||||||
”内容库近半年“,所以我们需要column=[数据日期]
|
”内容库近半年“,所以我们需要column=[数据日期]
|
||||||
基于table和columns,可能的cell values 是 = [0.5]。""",
|
基于table和columns,可能的cell values 是 = [0.5]。""",
|
||||||
"schema_links":"""["访问次数", "数据日期", 0.5]""",
|
"schema_links": """["访问次数", "数据日期", 0.5]""",
|
||||||
"sql":"""select MONTH(数据日期), sum(访问次数) from 内容库产品 where datediff('year', 数据日期, '2023-09-04') <= 0.5 group by MONTH(数据日期) order by sum(访问次数) desc limit 1 """
|
"sql": """select MONTH(数据日期), sum(访问次数) from 内容库产品 where datediff('year', 数据日期, '2023-09-04') <= 0.5 group by MONTH(数据日期) order by sum(访问次数) desc limit 1 """,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"current_date":"2023-09-04",
|
"current_date": "2023-09-04",
|
||||||
"table_name":"内容库产品",
|
"table_name": "内容库产品",
|
||||||
"fields_list":"""["用户名", "部门", "模块", "访问时长", "访问次数", "访问人数", "数据日期"]""",
|
"fields_list": """["用户名", "部门", "模块", "访问时长", "访问次数", "访问人数", "数据日期"]""",
|
||||||
"question":"内容库近半年每个月的平均访问次数",
|
"question": "内容库近半年每个月的平均访问次数",
|
||||||
"prior_schema_links":"""[]""",
|
"prior_schema_links": """[]""",
|
||||||
"analysis": """让我们一步一步地思考。在问题“内容库近半年每个月的平均访问次数“中,我们被问:
|
"analysis": """让我们一步一步地思考。在问题“内容库近半年每个月的平均访问次数“中,我们被问:
|
||||||
“每个月的平均访问次数”,所以我们需要column=[访问次数]
|
“每个月的平均访问次数”,所以我们需要column=[访问次数]
|
||||||
”内容库近半年“,所以我们需要column=[数据日期]
|
”内容库近半年“,所以我们需要column=[数据日期]
|
||||||
基于table和columns,可能的cell values 是 = [0.5]。""",
|
基于table和columns,可能的cell values 是 = [0.5]。""",
|
||||||
"schema_links":"""["访问次数", "数据日期", 0.5]""",
|
"schema_links": """["访问次数", "数据日期", 0.5]""",
|
||||||
"sql":"""select MONTH(数据日期), avg(访问次数) from 内容库产品 where datediff('year', 数据日期, '2023-09-04') <= 0.5 group by MONTH(数据日期) """
|
"sql": """select MONTH(数据日期), avg(访问次数) from 内容库产品 where datediff('year', 数据日期, '2023-09-04') <= 0.5 group by MONTH(数据日期) """,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"current_date":"2023-09-10",
|
"current_date": "2023-09-10",
|
||||||
"table_name":"内容库产品",
|
"table_name": "内容库产品",
|
||||||
"fields_list":"""["用户名", "部门", "模块", "访问时长", "访问次数", "访问人数", "数据日期"]""",
|
"fields_list": """["用户名", "部门", "模块", "访问时长", "访问次数", "访问人数", "数据日期"]""",
|
||||||
"question":"内容库 按部门统计访问次数 top10 的部门",
|
"question": "内容库 按部门统计访问次数 top10 的部门",
|
||||||
"prior_schema_links":"""[]""",
|
"prior_schema_links": """[]""",
|
||||||
"analysis": """让我们一步一步地思考。在问题“内容库 按部门统计访问次数 top10 的部门“中,我们被问:
|
"analysis": """让我们一步一步地思考。在问题“内容库 按部门统计访问次数 top10 的部门“中,我们被问:
|
||||||
“访问次数 top10 的部门”,所以我们需要column=[访问次数]
|
“访问次数 top10 的部门”,所以我们需要column=[访问次数]
|
||||||
”内容库 按部门统计“,所以我们需要column=[部门]
|
”内容库 按部门统计“,所以我们需要column=[部门]
|
||||||
基于table和columns,可能的cell values 是 = [10]。""",
|
基于table和columns,可能的cell values 是 = [10]。""",
|
||||||
"schema_links":"""["访问次数", "部门", 10]""",
|
"schema_links": """["访问次数", "部门", 10]""",
|
||||||
"sql":"""select 部门, sum(访问次数) from 内容库产品 group by 部门 order by sum(访问次数) desc limit 10 """
|
"sql": """select 部门, sum(访问次数) from 内容库产品 group by 部门 order by sum(访问次数) desc limit 10 """,
|
||||||
}
|
},
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ def construct_plugin_prompt(tool_config):
|
|||||||
tool_name = tool_config["name"]
|
tool_name = tool_config["name"]
|
||||||
tool_description = tool_config["description"]
|
tool_description = tool_config["description"]
|
||||||
tool_examples = tool_config["examples"]
|
tool_examples = tool_config["examples"]
|
||||||
|
|
||||||
prompt = "【工具名称】\n" + tool_name + "\n"
|
prompt = "【工具名称】\n" + tool_name + "\n"
|
||||||
prompt += "【工具描述】\n" + tool_description + "\n"
|
prompt += "【工具描述】\n" + tool_description + "\n"
|
||||||
|
|
||||||
@@ -23,6 +23,7 @@ def construct_plugin_prompt(tool_config):
|
|||||||
prompt += example + "\n"
|
prompt += example + "\n"
|
||||||
return prompt
|
return prompt
|
||||||
|
|
||||||
|
|
||||||
def construct_plugin_pool_prompt(tool_config_list):
|
def construct_plugin_pool_prompt(tool_config_list):
|
||||||
tool_explain_list = []
|
tool_explain_list = []
|
||||||
for tool_config in tool_config_list:
|
for tool_config in tool_config_list:
|
||||||
@@ -35,15 +36,20 @@ def construct_plugin_pool_prompt(tool_config_list):
|
|||||||
|
|
||||||
|
|
||||||
def construct_task_prompt(query_text, tool_explain_list_str):
|
def construct_task_prompt(query_text, tool_explain_list_str):
|
||||||
instruction = """问题为:{query_text}\n请根据问题和工具的描述,选择对应的工具,完成任务。请注意,只能选择1个工具。请一步一步地分析选择工具的原因(每个工具的【工具适用问题示例】是选择的重要参考依据),并给出最终选择,输出格式为json,key为’分析过程‘, ’选择工具‘""".format(query_text=query_text)
|
instruction = """问题为:{query_text}\n请根据问题和工具的描述,选择对应的工具,完成任务。请注意,只能选择1个工具。请一步一步地分析选择工具的原因(每个工具的【工具适用问题示例】是选择的重要参考依据),并给出最终选择,输出格式为json,key为’分析过程‘, ’选择工具‘""".format(
|
||||||
|
query_text=query_text
|
||||||
|
)
|
||||||
|
|
||||||
|
prompt = "工具选择如下:\n\n{tool_explain_list_str}\n\n【任务说明】\n{instruction}".format(
|
||||||
|
instruction=instruction, tool_explain_list_str=tool_explain_list_str
|
||||||
|
)
|
||||||
|
|
||||||
prompt = "工具选择如下:\n\n{tool_explain_list_str}\n\n【任务说明】\n{instruction}".format(instruction=instruction, tool_explain_list_str=tool_explain_list_str)
|
|
||||||
|
|
||||||
return prompt
|
return prompt
|
||||||
|
|
||||||
def plugin_selection_output_parse(llm_output: str)-> Union[Mapping[str, str], None]:
|
|
||||||
|
def plugin_selection_output_parse(llm_output: str) -> Union[Mapping[str, str], None]:
|
||||||
try:
|
try:
|
||||||
pattern = r'\{[^{}]+\}'
|
pattern = r"\{[^{}]+\}"
|
||||||
find_result = re.findall(pattern, llm_output)
|
find_result = re.findall(pattern, llm_output)
|
||||||
result = find_result[0].strip()
|
result = find_result[0].strip()
|
||||||
|
|
||||||
@@ -52,20 +58,24 @@ def plugin_selection_output_parse(llm_output: str)-> Union[Mapping[str, str], No
|
|||||||
result_dict = json.loads(result)
|
result_dict = json.loads(result)
|
||||||
print("result_dict: ", result_dict)
|
print("result_dict: ", result_dict)
|
||||||
|
|
||||||
key_mapping = {
|
key_mapping = {"分析过程": "analysis", "选择工具": "toolSelection"}
|
||||||
"分析过程":"analysis",
|
|
||||||
"选择工具":"toolSelection"
|
|
||||||
}
|
|
||||||
|
|
||||||
converted_result_dict = {key_mapping[key]: value for key, value in result_dict.items() if key in key_mapping}
|
converted_result_dict = {
|
||||||
|
key_mapping[key]: value
|
||||||
|
for key, value in result_dict.items()
|
||||||
|
if key in key_mapping
|
||||||
|
}
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e)
|
print(e)
|
||||||
converted_result_dict = None
|
converted_result_dict = None
|
||||||
|
|
||||||
return converted_result_dict
|
return converted_result_dict
|
||||||
|
|
||||||
def plugins_config_format_convert(plugin_config_list: List[Mapping[str, Any]]) -> List[Mapping[str, Any]]:
|
|
||||||
|
def plugins_config_format_convert(
|
||||||
|
plugin_config_list: List[Mapping[str, Any]]
|
||||||
|
) -> List[Mapping[str, Any]]:
|
||||||
plugin_config_list_new = []
|
plugin_config_list_new = []
|
||||||
for plugin_config in plugin_config_list:
|
for plugin_config in plugin_config_list:
|
||||||
plugin_config_new = dict()
|
plugin_config_new = dict()
|
||||||
@@ -75,7 +85,9 @@ def plugins_config_format_convert(plugin_config_list: List[Mapping[str, Any]]) -
|
|||||||
parameters = plugin_config["parameters"]
|
parameters = plugin_config["parameters"]
|
||||||
|
|
||||||
examples_str = "\n".join(examples)
|
examples_str = "\n".join(examples)
|
||||||
description_new = """{plugin_desc}\n\n例如能够处理如下问题:\n{examples_str}""".format(plugin_desc=description, examples_str=examples_str)
|
description_new = """{plugin_desc}\n\n例如能够处理如下问题:\n{examples_str}""".format(
|
||||||
|
plugin_desc=description, examples_str=examples_str
|
||||||
|
)
|
||||||
|
|
||||||
plugin_config_new["name"] = name
|
plugin_config_new["name"] = name
|
||||||
plugin_config_new["description"] = description_new
|
plugin_config_new["description"] = description_new
|
||||||
@@ -84,4 +96,3 @@ def plugins_config_format_convert(plugin_config_list: List[Mapping[str, Any]]) -
|
|||||||
plugin_config_list_new.append(plugin_config_new)
|
plugin_config_list_new.append(plugin_config_new)
|
||||||
|
|
||||||
return plugin_config_list_new
|
return plugin_config_list_new
|
||||||
|
|
||||||
|
|||||||
@@ -10,12 +10,19 @@ import sys
|
|||||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
|
||||||
from plugin_call.prompt_construct import construct_plugin_pool_prompt, construct_task_prompt, plugin_selection_output_parse, plugins_config_format_convert
|
from plugin_call.prompt_construct import (
|
||||||
|
construct_plugin_pool_prompt,
|
||||||
|
construct_task_prompt,
|
||||||
|
plugin_selection_output_parse,
|
||||||
|
plugins_config_format_convert,
|
||||||
|
)
|
||||||
from util.llm_instance import llm
|
from util.llm_instance import llm
|
||||||
|
|
||||||
|
|
||||||
def plugin_selection_run(query_text: str, plugin_configs: List[Mapping[str, Any]])-> Union[Mapping[str, str], None]:
|
def plugin_selection_run(
|
||||||
|
query_text: str, plugin_configs: List[Mapping[str, Any]]
|
||||||
|
) -> Union[Mapping[str, str], None]:
|
||||||
|
|
||||||
tools_prompt = construct_plugin_pool_prompt(plugin_configs)
|
tools_prompt = construct_plugin_pool_prompt(plugin_configs)
|
||||||
|
|
||||||
task_prompt = construct_task_prompt(query_text, tools_prompt)
|
task_prompt = construct_task_prompt(query_text, tools_prompt)
|
||||||
@@ -23,4 +30,3 @@ def plugin_selection_run(query_text: str, plugin_configs: List[Mapping[str, Any]
|
|||||||
parsed_output = plugin_selection_output_parse(llm_output)
|
parsed_output = plugin_selection_output_parse(llm_output)
|
||||||
|
|
||||||
return parsed_output
|
return parsed_output
|
||||||
|
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|||||||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
|
||||||
|
|
||||||
def get_ids(documents:List[str]) -> List[str]:
|
def get_ids(documents: List[str]) -> List[str]:
|
||||||
ids = []
|
ids = []
|
||||||
for doc in documents:
|
for doc in documents:
|
||||||
ids.append(str(uuid.uuid5(uuid.NAMESPACE_URL, doc)))
|
ids.append(str(uuid.uuid5(uuid.NAMESPACE_URL, doc)))
|
||||||
@@ -19,25 +19,23 @@ def get_ids(documents:List[str]) -> List[str]:
|
|||||||
return ids
|
return ids
|
||||||
|
|
||||||
|
|
||||||
def add2preset_query_collection(collection:Collection,
|
def add2preset_query_collection(
|
||||||
preset_queries:List[str],
|
collection: Collection, preset_queries: List[str], preset_query_ids: List[str]
|
||||||
preset_query_ids:List[str]
|
) -> None:
|
||||||
) -> None:
|
|
||||||
|
|
||||||
collection.add(documents=preset_queries,
|
collection.add(documents=preset_queries, ids=preset_query_ids)
|
||||||
ids=preset_query_ids)
|
|
||||||
|
|
||||||
|
|
||||||
def update_preset_query_collection(collection:Collection,
|
def update_preset_query_collection(
|
||||||
preset_queries:List[str],
|
collection: Collection, preset_queries: List[str], preset_query_ids: List[str]
|
||||||
preset_query_ids:List[str]
|
) -> None:
|
||||||
) -> None:
|
|
||||||
|
|
||||||
collection.update(documents=preset_queries,
|
|
||||||
ids=preset_query_ids)
|
|
||||||
|
|
||||||
|
|
||||||
def query2preset_query_collection(collection:Collection, query_texts:List[str], n_results:int=10):
|
collection.update(documents=preset_queries, ids=preset_query_ids)
|
||||||
|
|
||||||
|
|
||||||
|
def query2preset_query_collection(
|
||||||
|
collection: Collection, query_texts: List[str], n_results: int = 10
|
||||||
|
):
|
||||||
collection_cnt = collection.count()
|
collection_cnt = collection.count()
|
||||||
min_n_results = 10
|
min_n_results = 10
|
||||||
min_n_results = min(collection_cnt, min_n_results)
|
min_n_results = min(collection_cnt, min_n_results)
|
||||||
@@ -56,12 +54,13 @@ def query2preset_query_collection(collection:Collection, query_texts:List[str],
|
|||||||
|
|
||||||
return res
|
return res
|
||||||
|
|
||||||
def parse_retrieval_preset_query(res:List[Mapping[str, Any]]):
|
|
||||||
parsed_res = [[] for _ in range(0, len(res['ids']))]
|
|
||||||
|
|
||||||
retrieval_ids = res['ids']
|
def parse_retrieval_preset_query(res: List[Mapping[str, Any]]):
|
||||||
retrieval_distances = res['distances']
|
parsed_res = [[] for _ in range(0, len(res["ids"]))]
|
||||||
retrieval_sentences = res['documents']
|
|
||||||
|
retrieval_ids = res["ids"]
|
||||||
|
retrieval_distances = res["distances"]
|
||||||
|
retrieval_sentences = res["documents"]
|
||||||
|
|
||||||
for query_idx in range(0, len(retrieval_ids)):
|
for query_idx in range(0, len(retrieval_ids)):
|
||||||
id_ls = retrieval_ids[query_idx]
|
id_ls = retrieval_ids[query_idx]
|
||||||
@@ -73,43 +72,41 @@ def parse_retrieval_preset_query(res:List[Mapping[str, Any]]):
|
|||||||
distance = distance_ls[idx]
|
distance = distance_ls[idx]
|
||||||
sentence = sentence_ls[idx]
|
sentence = sentence_ls[idx]
|
||||||
|
|
||||||
parsed_res[query_idx].append({
|
parsed_res[query_idx].append(
|
||||||
'id': id,
|
{"id": id, "distance": distance, "presetQuery": sentence}
|
||||||
'distance': distance,
|
)
|
||||||
'presetQuery': sentence
|
|
||||||
})
|
|
||||||
|
|
||||||
return parsed_res
|
return parsed_res
|
||||||
|
|
||||||
def preset_query_retrieval_format(query_list:List[str], retrieval_list:List[Mapping[str, Any]]):
|
|
||||||
|
def preset_query_retrieval_format(
|
||||||
|
query_list: List[str], retrieval_list: List[Mapping[str, Any]]
|
||||||
|
):
|
||||||
res = []
|
res = []
|
||||||
for query_idx in range(0, len(query_list)):
|
for query_idx in range(0, len(query_list)):
|
||||||
query = query_list[query_idx]
|
query = query_list[query_idx]
|
||||||
retrieval = retrieval_list[query_idx]
|
retrieval = retrieval_list[query_idx]
|
||||||
|
|
||||||
res.append({
|
res.append({"query": query, "retrieval": retrieval})
|
||||||
'query': query,
|
|
||||||
'retrieval': retrieval
|
|
||||||
})
|
|
||||||
|
|
||||||
return res
|
return res
|
||||||
|
|
||||||
def empty_preset_query_collection(collection:Collection) -> None:
|
|
||||||
|
def empty_preset_query_collection(collection: Collection) -> None:
|
||||||
collection.delete()
|
collection.delete()
|
||||||
|
|
||||||
def delete_preset_query_by_ids(collection:Collection, preset_query_ids:List[str]) -> None:
|
|
||||||
|
def delete_preset_query_by_ids(
|
||||||
|
collection: Collection, preset_query_ids: List[str]
|
||||||
|
) -> None:
|
||||||
collection.delete(ids=preset_query_ids)
|
collection.delete(ids=preset_query_ids)
|
||||||
|
|
||||||
def get_preset_query_by_ids(collection:Collection, preset_query_ids:List[str]):
|
|
||||||
|
def get_preset_query_by_ids(collection: Collection, preset_query_ids: List[str]):
|
||||||
res = collection.get(ids=preset_query_ids)
|
res = collection.get(ids=preset_query_ids)
|
||||||
|
|
||||||
return res
|
return res
|
||||||
|
|
||||||
def preset_query_collection_size(collection:Collection) -> int:
|
|
||||||
|
def preset_query_collection_size(collection: Collection) -> int:
|
||||||
return collection.count()
|
return collection.count()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -13,34 +13,45 @@ from chromadb.api import Collection, Documents, Embeddings
|
|||||||
|
|
||||||
from langchain.llms import OpenAI
|
from langchain.llms import OpenAI
|
||||||
|
|
||||||
from preset_query_db import (get_ids, add2preset_query_collection,
|
from preset_query_db import (
|
||||||
query2preset_query_collection, parse_retrieval_preset_query,
|
get_ids,
|
||||||
preset_query_retrieval_format, empty_preset_query_collection, preset_query_collection_size)
|
add2preset_query_collection,
|
||||||
|
query2preset_query_collection,
|
||||||
|
parse_retrieval_preset_query,
|
||||||
|
preset_query_retrieval_format,
|
||||||
|
empty_preset_query_collection,
|
||||||
|
preset_query_collection_size,
|
||||||
|
)
|
||||||
|
|
||||||
from util.text2vec import Text2VecEmbeddingFunction
|
from util.text2vec import Text2VecEmbeddingFunction
|
||||||
|
|
||||||
from run_config import CHROMA_DB_PERSIST_PATH, PRESET_QUERY_COLLECTION_NAME
|
from run_config import CHROMA_DB_PERSIST_PATH, PRESET_QUERY_COLLECTION_NAME
|
||||||
from util.chromadb_instance import client
|
from util.chromadb_instance import client
|
||||||
|
|
||||||
|
|
||||||
emb_func = Text2VecEmbeddingFunction()
|
emb_func = Text2VecEmbeddingFunction()
|
||||||
|
|
||||||
collection = client.get_or_create_collection(name=PRESET_QUERY_COLLECTION_NAME,
|
collection = client.get_or_create_collection(
|
||||||
embedding_function=emb_func,
|
name=PRESET_QUERY_COLLECTION_NAME,
|
||||||
metadata={"hnsw:space": "cosine"}
|
embedding_function=emb_func,
|
||||||
) # Get a collection object from an existing collection, by name. If it doesn't exist, create it.
|
metadata={"hnsw:space": "cosine"},
|
||||||
|
) # Get a collection object from an existing collection, by name. If it doesn't exist, create it.
|
||||||
|
|
||||||
print("init_preset_query_collection_size: ", preset_query_collection_size(collection))
|
print("init_preset_query_collection_size: ", preset_query_collection_size(collection))
|
||||||
|
|
||||||
|
|
||||||
def preset_query_retrieval_run(collection:Collection, query_texts_list:List[str], n_results:int=5):
|
def preset_query_retrieval_run(
|
||||||
retrieval_res = query2preset_query_collection(collection=collection,
|
collection: Collection, query_texts_list: List[str], n_results: int = 5
|
||||||
query_texts=query_texts_list,
|
):
|
||||||
n_results=n_results)
|
retrieval_res = query2preset_query_collection(
|
||||||
|
collection=collection, query_texts=query_texts_list, n_results=n_results
|
||||||
|
)
|
||||||
|
|
||||||
parsed_retrieval_res = parse_retrieval_preset_query(retrieval_res)
|
parsed_retrieval_res = parse_retrieval_preset_query(retrieval_res)
|
||||||
parsed_retrieval_res_format = preset_query_retrieval_format(query_texts_list, parsed_retrieval_res)
|
parsed_retrieval_res_format = preset_query_retrieval_format(
|
||||||
|
query_texts_list, parsed_retrieval_res
|
||||||
|
)
|
||||||
|
|
||||||
print('parsed_retrieval_res_format: ', parsed_retrieval_res_format)
|
print("parsed_retrieval_res_format: ", parsed_retrieval_res_format)
|
||||||
|
|
||||||
return parsed_retrieval_res_format
|
return parsed_retrieval_res_format
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ OPENAI_API_KEY = "YOUR_API_KEY"
|
|||||||
|
|
||||||
TEMPERATURE = 0.0
|
TEMPERATURE = 0.0
|
||||||
|
|
||||||
CHROMA_DB_PERSIST_DIR = 'chm_db'
|
CHROMA_DB_PERSIST_DIR = "chm_db"
|
||||||
PRESET_QUERY_COLLECTION_NAME = "preset_query_collection"
|
PRESET_QUERY_COLLECTION_NAME = "preset_query_collection"
|
||||||
TEXT2DSL_COLLECTION_NAME = "text2dsl_collection"
|
TEXT2DSL_COLLECTION_NAME = "text2dsl_collection"
|
||||||
TEXT2DSL_FEW_SHOTS_EXAMPLE_NUM = 15
|
TEXT2DSL_FEW_SHOTS_EXAMPLE_NUM = 15
|
||||||
@@ -21,9 +21,9 @@ CHROMA_DB_PERSIST_PATH = os.path.join(PROJECT_DIR_PATH, CHROMA_DB_PERSIST_DIR)
|
|||||||
|
|
||||||
HF_TEXT2VEC_MODEL_NAME = "GanymedeNil/text2vec-large-chinese"
|
HF_TEXT2VEC_MODEL_NAME = "GanymedeNil/text2vec-large-chinese"
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
print('PROJECT_DIR_PATH: ', PROJECT_DIR_PATH)
|
print("PROJECT_DIR_PATH: ", PROJECT_DIR_PATH)
|
||||||
print('EMB_MODEL_PATH: ', HF_TEXT2VEC_MODEL_NAME)
|
print("EMB_MODEL_PATH: ", HF_TEXT2VEC_MODEL_NAME)
|
||||||
print('CHROMA_DB_PERSIST_PATH: ', CHROMA_DB_PERSIST_PATH)
|
print("CHROMA_DB_PERSIST_PATH: ", CHROMA_DB_PERSIST_PATH)
|
||||||
print('LLMPARSER_HOST: ', LLMPARSER_HOST)
|
print("LLMPARSER_HOST: ", LLMPARSER_HOST)
|
||||||
print('LLMPARSER_PORT: ', LLMPARSER_PORT)
|
print("LLMPARSER_PORT: ", LLMPARSER_PORT)
|
||||||
|
|||||||
@@ -22,20 +22,34 @@ from util.text2vec import Text2VecEmbeddingFunction, hg_embedding
|
|||||||
from util.chromadb_instance import client as chromadb_client, empty_chroma_collection_2
|
from util.chromadb_instance import client as chromadb_client, empty_chroma_collection_2
|
||||||
from run_config import TEXT2DSL_COLLECTION_NAME, TEXT2DSL_FEW_SHOTS_EXAMPLE_NUM
|
from run_config import TEXT2DSL_COLLECTION_NAME, TEXT2DSL_FEW_SHOTS_EXAMPLE_NUM
|
||||||
|
|
||||||
def reload_sql_example_collection(vectorstore:Chroma,
|
|
||||||
sql_examplars:List[Mapping[str, str]],
|
def reload_sql_example_collection(
|
||||||
sql_example_selector:SemanticSimilarityExampleSelector,
|
vectorstore: Chroma,
|
||||||
example_nums:int
|
sql_examplars: List[Mapping[str, str]],
|
||||||
):
|
sql_example_selector: SemanticSimilarityExampleSelector,
|
||||||
|
example_nums: int,
|
||||||
|
):
|
||||||
print("original sql_examples_collection size:", vectorstore._collection.count())
|
print("original sql_examples_collection size:", vectorstore._collection.count())
|
||||||
new_collection = empty_chroma_collection_2(collection=vectorstore._collection)
|
new_collection = empty_chroma_collection_2(collection=vectorstore._collection)
|
||||||
vectorstore._collection = new_collection
|
vectorstore._collection = new_collection
|
||||||
|
|
||||||
print("emptied sql_examples_collection size:", vectorstore._collection.count())
|
print("emptied sql_examples_collection size:", vectorstore._collection.count())
|
||||||
|
|
||||||
sql_example_selector = SemanticSimilarityExampleSelector(vectorstore=sql_examples_vectorstore, k=example_nums,
|
sql_example_selector = SemanticSimilarityExampleSelector(
|
||||||
input_keys=["question"],
|
vectorstore=sql_examples_vectorstore,
|
||||||
example_keys=["table_name", "fields_list", "prior_schema_links", "question", "analysis", "schema_links", "current_date", "sql"])
|
k=example_nums,
|
||||||
|
input_keys=["question"],
|
||||||
|
example_keys=[
|
||||||
|
"table_name",
|
||||||
|
"fields_list",
|
||||||
|
"prior_schema_links",
|
||||||
|
"question",
|
||||||
|
"analysis",
|
||||||
|
"schema_links",
|
||||||
|
"current_date",
|
||||||
|
"sql",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
for example in sql_examplars:
|
for example in sql_examplars:
|
||||||
sql_example_selector.add_example(example)
|
sql_example_selector.add_example(example)
|
||||||
@@ -45,20 +59,36 @@ def reload_sql_example_collection(vectorstore:Chroma,
|
|||||||
return vectorstore, sql_example_selector
|
return vectorstore, sql_example_selector
|
||||||
|
|
||||||
|
|
||||||
sql_examples_vectorstore = Chroma(collection_name=TEXT2DSL_COLLECTION_NAME,
|
sql_examples_vectorstore = Chroma(
|
||||||
embedding_function=hg_embedding,
|
collection_name=TEXT2DSL_COLLECTION_NAME,
|
||||||
client=chromadb_client)
|
embedding_function=hg_embedding,
|
||||||
|
client=chromadb_client,
|
||||||
|
)
|
||||||
|
|
||||||
example_nums = TEXT2DSL_FEW_SHOTS_EXAMPLE_NUM
|
example_nums = TEXT2DSL_FEW_SHOTS_EXAMPLE_NUM
|
||||||
|
|
||||||
sql_example_selector = SemanticSimilarityExampleSelector(vectorstore=sql_examples_vectorstore, k=example_nums,
|
sql_example_selector = SemanticSimilarityExampleSelector(
|
||||||
input_keys=["question"],
|
vectorstore=sql_examples_vectorstore,
|
||||||
example_keys=["table_name", "fields_list", "prior_schema_links", "question", "analysis", "schema_links", "current_date", "sql"])
|
k=example_nums,
|
||||||
|
input_keys=["question"],
|
||||||
|
example_keys=[
|
||||||
|
"table_name",
|
||||||
|
"fields_list",
|
||||||
|
"prior_schema_links",
|
||||||
|
"question",
|
||||||
|
"analysis",
|
||||||
|
"schema_links",
|
||||||
|
"current_date",
|
||||||
|
"sql",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
if sql_examples_vectorstore._collection.count() > 0:
|
if sql_examples_vectorstore._collection.count() > 0:
|
||||||
print("examples already in sql_vectorstore")
|
print("examples already in sql_vectorstore")
|
||||||
print("init sql_vectorstore size:", sql_examples_vectorstore._collection.count())
|
print("init sql_vectorstore size:", sql_examples_vectorstore._collection.count())
|
||||||
|
|
||||||
print("sql_examplars size:", len(sql_examplars))
|
print("sql_examplars size:", len(sql_examplars))
|
||||||
sql_examples_vectorstore, sql_example_selector = reload_sql_example_collection(sql_examples_vectorstore, sql_examplars, sql_example_selector, example_nums)
|
sql_examples_vectorstore, sql_example_selector = reload_sql_example_collection(
|
||||||
|
sql_examples_vectorstore, sql_examplars, sql_example_selector, example_nums
|
||||||
|
)
|
||||||
print("added sql_vectorstore size:", sql_examples_vectorstore._collection.count())
|
print("added sql_vectorstore size:", sql_examples_vectorstore._collection.count())
|
||||||
|
|||||||
@@ -13,17 +13,31 @@ from few_shot_example.sql_exampler import examplars as sql_examplars
|
|||||||
from run_config import LLMPARSER_HOST, LLMPARSER_PORT
|
from run_config import LLMPARSER_HOST, LLMPARSER_PORT
|
||||||
|
|
||||||
|
|
||||||
def text2dsl_setting_update(llm_parser_host:str, llm_parser_port:str,
|
def text2dsl_setting_update(
|
||||||
sql_examplars:List[Mapping[str, str]], example_nums:int, is_shortcut:bool):
|
llm_parser_host: str,
|
||||||
|
llm_parser_port: str,
|
||||||
|
sql_examplars: List[Mapping[str, str]],
|
||||||
|
example_nums: int,
|
||||||
|
is_shortcut: bool,
|
||||||
|
):
|
||||||
|
|
||||||
url = f"http://{llm_parser_host}:{llm_parser_port}/query2sql_setting_update/"
|
url = f"http://{llm_parser_host}:{llm_parser_port}/query2sql_setting_update/"
|
||||||
print("url: ", url)
|
print("url: ", url)
|
||||||
payload = {"sqlExamplars":sql_examplars, "exampleNums":example_nums, "isShortcut":is_shortcut}
|
payload = {
|
||||||
headers = {'content-type': 'application/json'}
|
"sqlExamplars": sql_examplars,
|
||||||
|
"exampleNums": example_nums,
|
||||||
|
"isShortcut": is_shortcut,
|
||||||
|
}
|
||||||
|
headers = {"content-type": "application/json"}
|
||||||
response = requests.post(url, data=json.dumps(payload), headers=headers)
|
response = requests.post(url, data=json.dumps(payload), headers=headers)
|
||||||
print(response.text)
|
print(response.text)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
text2dsl_setting_update(LLMPARSER_HOST, LLMPARSER_PORT,
|
text2dsl_setting_update(
|
||||||
sql_examplars, TEXT2DSL_FEW_SHOTS_EXAMPLE_NUM, TEXT2DSL_IS_SHORTCUT)
|
LLMPARSER_HOST,
|
||||||
|
LLMPARSER_PORT,
|
||||||
|
sql_examplars,
|
||||||
|
TEXT2DSL_FEW_SHOTS_EXAMPLE_NUM,
|
||||||
|
TEXT2DSL_IS_SHORTCUT,
|
||||||
|
)
|
||||||
|
|||||||
@@ -1,21 +1,25 @@
|
|||||||
# -*- coding:utf-8 -*-
|
# -*- coding:utf-8 -*-
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
|
||||||
def schema_link_parse(schema_link_output):
|
def schema_link_parse(schema_link_output):
|
||||||
try:
|
try:
|
||||||
schema_link_output = schema_link_output.strip()
|
schema_link_output = schema_link_output.strip()
|
||||||
pattern = r'Schema_links:(.*)'
|
pattern = r"Schema_links:(.*)"
|
||||||
schema_link_output = re.findall(pattern, schema_link_output, re.DOTALL)[0].strip()
|
schema_link_output = re.findall(pattern, schema_link_output, re.DOTALL)[
|
||||||
|
0
|
||||||
|
].strip()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e)
|
print(e)
|
||||||
schema_link_output = None
|
schema_link_output = None
|
||||||
|
|
||||||
return schema_link_output
|
return schema_link_output
|
||||||
|
|
||||||
|
|
||||||
def combo_schema_link_parse(schema_linking_sql_combo_output: str):
|
def combo_schema_link_parse(schema_linking_sql_combo_output: str):
|
||||||
try:
|
try:
|
||||||
schema_linking_sql_combo_output = schema_linking_sql_combo_output.strip()
|
schema_linking_sql_combo_output = schema_linking_sql_combo_output.strip()
|
||||||
pattern = r'Schema_links:(\[.*?\])'
|
pattern = r"Schema_links:(\[.*?\])"
|
||||||
schema_links_match = re.search(pattern, schema_linking_sql_combo_output)
|
schema_links_match = re.search(pattern, schema_linking_sql_combo_output)
|
||||||
|
|
||||||
if schema_links_match:
|
if schema_links_match:
|
||||||
@@ -28,10 +32,11 @@ def combo_schema_link_parse(schema_linking_sql_combo_output: str):
|
|||||||
|
|
||||||
return schema_links
|
return schema_links
|
||||||
|
|
||||||
|
|
||||||
def combo_sql_parse(schema_linking_sql_combo_output: str):
|
def combo_sql_parse(schema_linking_sql_combo_output: str):
|
||||||
try:
|
try:
|
||||||
schema_linking_sql_combo_output = schema_linking_sql_combo_output.strip()
|
schema_linking_sql_combo_output = schema_linking_sql_combo_output.strip()
|
||||||
pattern = r'SQL:(.*)'
|
pattern = r"SQL:(.*)"
|
||||||
sql_match = re.search(pattern, schema_linking_sql_combo_output)
|
sql_match = re.search(pattern, schema_linking_sql_combo_output)
|
||||||
|
|
||||||
if sql_match:
|
if sql_match:
|
||||||
|
|||||||
@@ -11,17 +11,31 @@ from langchain.prompts.few_shot import FewShotPromptTemplate
|
|||||||
from langchain.prompts.example_selector import SemanticSimilarityExampleSelector
|
from langchain.prompts.example_selector import SemanticSimilarityExampleSelector
|
||||||
|
|
||||||
|
|
||||||
def schema_linking_exampler(user_query: str,
|
def schema_linking_exampler(
|
||||||
domain_name: str,
|
user_query: str,
|
||||||
fields_list: List[str],
|
domain_name: str,
|
||||||
prior_schema_links: Mapping[str,str],
|
fields_list: List[str],
|
||||||
example_selector: SemanticSimilarityExampleSelector,
|
prior_schema_links: Mapping[str, str],
|
||||||
) -> str:
|
example_selector: SemanticSimilarityExampleSelector,
|
||||||
|
) -> str:
|
||||||
|
|
||||||
prior_schema_links_str = '['+ ','.join(["""'{}'->{}""".format(k,v) for k,v in prior_schema_links.items()]) + ']'
|
prior_schema_links_str = (
|
||||||
|
"["
|
||||||
|
+ ",".join(["""'{}'->{}""".format(k, v) for k, v in prior_schema_links.items()])
|
||||||
|
+ "]"
|
||||||
|
)
|
||||||
|
|
||||||
example_prompt_template = PromptTemplate(input_variables=["table_name", "fields_list", "prior_schema_links", "question", "analysis", "schema_links"],
|
example_prompt_template = PromptTemplate(
|
||||||
template="Table {table_name}, columns = {fields_list}, prior_schema_links = {prior_schema_links}\n问题:{question}\n分析:{analysis} 所以Schema_links是:\nSchema_links:{schema_links}")
|
input_variables=[
|
||||||
|
"table_name",
|
||||||
|
"fields_list",
|
||||||
|
"prior_schema_links",
|
||||||
|
"question",
|
||||||
|
"analysis",
|
||||||
|
"schema_links",
|
||||||
|
],
|
||||||
|
template="Table {table_name}, columns = {fields_list}, prior_schema_links = {prior_schema_links}\n问题:{question}\n分析:{analysis} 所以Schema_links是:\nSchema_links:{schema_links}",
|
||||||
|
)
|
||||||
|
|
||||||
instruction = "# 根据数据库的表结构,参考先验信息,找出为每个问题生成SQL查询语句的schema_links"
|
instruction = "# 根据数据库的表结构,参考先验信息,找出为每个问题生成SQL查询语句的schema_links"
|
||||||
|
|
||||||
@@ -30,81 +44,121 @@ def schema_linking_exampler(user_query: str,
|
|||||||
schema_linking_example_prompt_template = FewShotPromptTemplate(
|
schema_linking_example_prompt_template = FewShotPromptTemplate(
|
||||||
example_selector=example_selector,
|
example_selector=example_selector,
|
||||||
example_prompt=example_prompt_template,
|
example_prompt=example_prompt_template,
|
||||||
example_separator="\n\n",
|
example_separator="\n\n",
|
||||||
prefix=instruction,
|
prefix=instruction,
|
||||||
input_variables=["table_name", "fields_list", "prior_schema_links", "question"],
|
input_variables=["table_name", "fields_list", "prior_schema_links", "question"],
|
||||||
suffix=schema_linking_prompt
|
suffix=schema_linking_prompt,
|
||||||
)
|
)
|
||||||
|
|
||||||
schema_linking_example_prompt = schema_linking_example_prompt_template.format(table_name=domain_name,
|
schema_linking_example_prompt = schema_linking_example_prompt_template.format(
|
||||||
fields_list=fields_list,
|
table_name=domain_name,
|
||||||
prior_schema_links=prior_schema_links_str,
|
fields_list=fields_list,
|
||||||
question=user_query)
|
prior_schema_links=prior_schema_links_str,
|
||||||
|
question=user_query,
|
||||||
|
)
|
||||||
|
|
||||||
return schema_linking_example_prompt
|
return schema_linking_example_prompt
|
||||||
|
|
||||||
|
|
||||||
def sql_exampler(user_query: str,
|
def sql_exampler(
|
||||||
domain_name: str,
|
user_query: str,
|
||||||
schema_link_str: str,
|
domain_name: str,
|
||||||
data_date: str,
|
schema_link_str: str,
|
||||||
example_selector: SemanticSimilarityExampleSelector,
|
data_date: str,
|
||||||
) -> str:
|
example_selector: SemanticSimilarityExampleSelector,
|
||||||
|
) -> str:
|
||||||
|
|
||||||
instruction = "# 根据schema_links为每个问题生成SQL查询语句"
|
instruction = "# 根据schema_links为每个问题生成SQL查询语句"
|
||||||
|
|
||||||
sql_example_prompt_template = PromptTemplate(input_variables=["question", "current_date", "table_name", "schema_links", "sql"],
|
sql_example_prompt_template = PromptTemplate(
|
||||||
template="问题:{question}\nCurrent_date:{current_date}\nTable {table_name}\nSchema_links:{schema_links}\nSQL:{sql}")
|
input_variables=[
|
||||||
|
"question",
|
||||||
|
"current_date",
|
||||||
|
"table_name",
|
||||||
|
"schema_links",
|
||||||
|
"sql",
|
||||||
|
],
|
||||||
|
template="问题:{question}\nCurrent_date:{current_date}\nTable {table_name}\nSchema_links:{schema_links}\nSQL:{sql}",
|
||||||
|
)
|
||||||
|
|
||||||
sql_prompt = "问题:{question}\nCurrent_date:{current_date}\nTable {table_name}\nSchema_links:{schema_links}\nSQL:"
|
sql_prompt = "问题:{question}\nCurrent_date:{current_date}\nTable {table_name}\nSchema_links:{schema_links}\nSQL:"
|
||||||
|
|
||||||
sql_example_prompt_template = FewShotPromptTemplate(
|
sql_example_prompt_template = FewShotPromptTemplate(
|
||||||
example_selector=example_selector,
|
example_selector=example_selector,
|
||||||
example_prompt=sql_example_prompt_template,
|
example_prompt=sql_example_prompt_template,
|
||||||
example_separator="\n\n",
|
example_separator="\n\n",
|
||||||
prefix=instruction,
|
prefix=instruction,
|
||||||
input_variables=["question", "current_date", "table_name", "schema_links"],
|
input_variables=["question", "current_date", "table_name", "schema_links"],
|
||||||
suffix=sql_prompt
|
suffix=sql_prompt,
|
||||||
)
|
)
|
||||||
|
|
||||||
sql_example_prompt = sql_example_prompt_template.format(question=user_query,
|
sql_example_prompt = sql_example_prompt_template.format(
|
||||||
current_date=data_date,
|
question=user_query,
|
||||||
table_name=domain_name,
|
current_date=data_date,
|
||||||
schema_links=schema_link_str)
|
table_name=domain_name,
|
||||||
|
schema_links=schema_link_str,
|
||||||
|
)
|
||||||
|
|
||||||
return sql_example_prompt
|
return sql_example_prompt
|
||||||
|
|
||||||
|
|
||||||
def schema_linking_sql_combo_examplar(user_query: str,
|
def schema_linking_sql_combo_examplar(
|
||||||
domain_name: str,
|
user_query: str,
|
||||||
data_date : str,
|
domain_name: str,
|
||||||
fields_list: List[str],
|
data_date: str,
|
||||||
prior_schema_links: Mapping[str,str],
|
fields_list: List[str],
|
||||||
example_selector: SemanticSimilarityExampleSelector) -> str:
|
prior_schema_links: Mapping[str, str],
|
||||||
|
example_selector: SemanticSimilarityExampleSelector,
|
||||||
prior_schema_links_str = '['+ ','.join(["""'{}'->{}""".format(k,v) for k,v in prior_schema_links.items()]) + ']'
|
) -> str:
|
||||||
|
|
||||||
example_prompt_template = PromptTemplate(input_variables=["table_name", "fields_list", "prior_schema_links", "current_date", "question", "analysis", "schema_links", "sql"],
|
prior_schema_links_str = (
|
||||||
template="Table {table_name}, columns = {fields_list}, prior_schema_links = {prior_schema_links}\nCurrent_date:{current_date}\n问题:{question}\n分析:{analysis} 所以Schema_links是:\nSchema_links:{schema_links}\nSQL:{sql}")
|
"["
|
||||||
|
+ ",".join(["""'{}'->{}""".format(k, v) for k, v in prior_schema_links.items()])
|
||||||
|
+ "]"
|
||||||
|
)
|
||||||
|
|
||||||
instruction = "# 根据数据库的表结构,参考先验信息,找出为每个问题生成SQL查询语句的schema_links,再根据schema_links为每个问题生成SQL查询语句"
|
example_prompt_template = PromptTemplate(
|
||||||
|
input_variables=[
|
||||||
|
"table_name",
|
||||||
|
"fields_list",
|
||||||
|
"prior_schema_links",
|
||||||
|
"current_date",
|
||||||
|
"question",
|
||||||
|
"analysis",
|
||||||
|
"schema_links",
|
||||||
|
"sql",
|
||||||
|
],
|
||||||
|
template="Table {table_name}, columns = {fields_list}, prior_schema_links = {prior_schema_links}\nCurrent_date:{current_date}\n问题:{question}\n分析:{analysis} 所以Schema_links是:\nSchema_links:{schema_links}\nSQL:{sql}",
|
||||||
|
)
|
||||||
|
|
||||||
|
instruction = (
|
||||||
|
"# 根据数据库的表结构,参考先验信息,找出为每个问题生成SQL查询语句的schema_links,再根据schema_links为每个问题生成SQL查询语句"
|
||||||
|
)
|
||||||
|
|
||||||
schema_linking_sql_combo_prompt = "Table {table_name}, columns = {fields_list}, prior_schema_links = {prior_schema_links}\nCurrent_date:{current_date}\n问题:{question}\n分析: 让我们一步一步地思考。"
|
schema_linking_sql_combo_prompt = "Table {table_name}, columns = {fields_list}, prior_schema_links = {prior_schema_links}\nCurrent_date:{current_date}\n问题:{question}\n分析: 让我们一步一步地思考。"
|
||||||
|
|
||||||
schema_linking_sql_combo_example_prompt_template = FewShotPromptTemplate(
|
schema_linking_sql_combo_example_prompt_template = FewShotPromptTemplate(
|
||||||
example_selector=example_selector,
|
example_selector=example_selector,
|
||||||
example_prompt=example_prompt_template,
|
example_prompt=example_prompt_template,
|
||||||
example_separator="\n\n",
|
example_separator="\n\n",
|
||||||
prefix=instruction,
|
prefix=instruction,
|
||||||
input_variables=["table_name", "fields_list", "prior_schema_links", "current_date", "question"],
|
input_variables=[
|
||||||
suffix=schema_linking_sql_combo_prompt
|
"table_name",
|
||||||
|
"fields_list",
|
||||||
|
"prior_schema_links",
|
||||||
|
"current_date",
|
||||||
|
"question",
|
||||||
|
],
|
||||||
|
suffix=schema_linking_sql_combo_prompt,
|
||||||
|
)
|
||||||
|
|
||||||
|
schema_linking_sql_combo_example_prompt = (
|
||||||
|
schema_linking_sql_combo_example_prompt_template.format(
|
||||||
|
table_name=domain_name,
|
||||||
|
fields_list=fields_list,
|
||||||
|
prior_schema_links=prior_schema_links_str,
|
||||||
|
current_date=data_date,
|
||||||
|
question=user_query,
|
||||||
)
|
)
|
||||||
|
)
|
||||||
schema_linking_sql_combo_example_prompt = schema_linking_sql_combo_example_prompt_template.format(table_name=domain_name,
|
|
||||||
fields_list=fields_list,
|
|
||||||
prior_schema_links=prior_schema_links_str,
|
|
||||||
current_date=data_date,
|
|
||||||
question=user_query)
|
|
||||||
return schema_linking_sql_combo_example_prompt
|
return schema_linking_sql_combo_example_prompt
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -7,133 +7,182 @@ import sys
|
|||||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
|
||||||
from sql.prompt_maker import schema_linking_exampler, sql_exampler, schema_linking_sql_combo_examplar
|
from sql.prompt_maker import (
|
||||||
from sql.constructor import sql_examples_vectorstore, sql_example_selector, reload_sql_example_collection
|
schema_linking_exampler,
|
||||||
from sql.output_parser import schema_link_parse, combo_schema_link_parse, combo_sql_parse
|
sql_exampler,
|
||||||
|
schema_linking_sql_combo_examplar,
|
||||||
|
)
|
||||||
|
from sql.constructor import (
|
||||||
|
sql_examples_vectorstore,
|
||||||
|
sql_example_selector,
|
||||||
|
reload_sql_example_collection,
|
||||||
|
)
|
||||||
|
from sql.output_parser import (
|
||||||
|
schema_link_parse,
|
||||||
|
combo_schema_link_parse,
|
||||||
|
combo_sql_parse,
|
||||||
|
)
|
||||||
|
|
||||||
from util.llm_instance import llm
|
from util.llm_instance import llm
|
||||||
from run_config import TEXT2DSL_IS_SHORTCUT
|
from run_config import TEXT2DSL_IS_SHORTCUT
|
||||||
|
|
||||||
|
|
||||||
class Text2DSLAgent(object):
|
class Text2DSLAgent(object):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.schema_linking_exampler = schema_linking_exampler
|
self.schema_linking_exampler = schema_linking_exampler
|
||||||
self.sql_exampler = sql_exampler
|
self.sql_exampler = sql_exampler
|
||||||
|
|
||||||
self.schema_linking_sql_combo_exampler = schema_linking_sql_combo_examplar
|
self.schema_linking_sql_combo_exampler = schema_linking_sql_combo_examplar
|
||||||
|
|
||||||
self.sql_examples_vectorstore = sql_examples_vectorstore
|
self.sql_examples_vectorstore = sql_examples_vectorstore
|
||||||
self.sql_example_selector = sql_example_selector
|
self.sql_example_selector = sql_example_selector
|
||||||
|
|
||||||
self.schema_link_parse = schema_link_parse
|
self.schema_link_parse = schema_link_parse
|
||||||
self.combo_schema_link_parse = combo_schema_link_parse
|
self.combo_schema_link_parse = combo_schema_link_parse
|
||||||
self.combo_sql_parse = combo_sql_parse
|
self.combo_sql_parse = combo_sql_parse
|
||||||
|
|
||||||
self.llm = llm
|
self.llm = llm
|
||||||
|
|
||||||
self.is_shortcut = TEXT2DSL_IS_SHORTCUT
|
self.is_shortcut = TEXT2DSL_IS_SHORTCUT
|
||||||
|
|
||||||
def update_examples(self, sql_examples, example_nums, is_shortcut):
|
def update_examples(self, sql_examples, example_nums, is_shortcut):
|
||||||
self.sql_examples_vectorstore, self.sql_example_selector = reload_sql_example_collection(self.sql_examples_vectorstore,
|
(
|
||||||
sql_examples,
|
self.sql_examples_vectorstore,
|
||||||
self.sql_example_selector,
|
self.sql_example_selector,
|
||||||
example_nums)
|
) = reload_sql_example_collection(
|
||||||
self.is_shortcut = is_shortcut
|
self.sql_examples_vectorstore,
|
||||||
|
sql_examples,
|
||||||
|
self.sql_example_selector,
|
||||||
|
example_nums,
|
||||||
|
)
|
||||||
|
self.is_shortcut = is_shortcut
|
||||||
|
|
||||||
def query2sql(self, query_text: str,
|
def query2sql(
|
||||||
schema : Union[dict, None] = None,
|
self,
|
||||||
current_date: str = None,
|
query_text: str,
|
||||||
linking: Union[List[Mapping[str, str]], None] = None
|
schema: Union[dict, None] = None,
|
||||||
):
|
current_date: str = None,
|
||||||
|
linking: Union[List[Mapping[str, str]], None] = None,
|
||||||
|
):
|
||||||
|
|
||||||
print("query_text: ", query_text)
|
print("query_text: ", query_text)
|
||||||
print("schema: ", schema)
|
print("schema: ", schema)
|
||||||
print("current_date: ", current_date)
|
print("current_date: ", current_date)
|
||||||
print("prior_schema_links: ", linking)
|
print("prior_schema_links: ", linking)
|
||||||
|
|
||||||
if linking is not None:
|
if linking is not None:
|
||||||
prior_schema_links = {item['fieldValue']:item['fieldName'] for item in linking}
|
prior_schema_links = {
|
||||||
else:
|
item["fieldValue"]: item["fieldName"] for item in linking
|
||||||
prior_schema_links = {}
|
}
|
||||||
|
else:
|
||||||
|
prior_schema_links = {}
|
||||||
|
|
||||||
model_name = schema['modelName']
|
model_name = schema["modelName"]
|
||||||
fields_list = schema['fieldNameList']
|
fields_list = schema["fieldNameList"]
|
||||||
|
|
||||||
schema_linking_prompt = self.schema_linking_exampler(query_text, model_name, fields_list, prior_schema_links, self.sql_example_selector)
|
schema_linking_prompt = self.schema_linking_exampler(
|
||||||
print("schema_linking_prompt->", schema_linking_prompt)
|
query_text,
|
||||||
schema_link_output = self.llm(schema_linking_prompt)
|
model_name,
|
||||||
schema_link_str = self.schema_link_parse(schema_link_output)
|
fields_list,
|
||||||
|
prior_schema_links,
|
||||||
sql_prompt = self.sql_exampler(query_text, model_name, schema_link_str, current_date, self.sql_example_selector)
|
self.sql_example_selector,
|
||||||
print("sql_prompt->", sql_prompt)
|
)
|
||||||
sql_output = self.llm(sql_prompt)
|
print("schema_linking_prompt->", schema_linking_prompt)
|
||||||
|
schema_link_output = self.llm(schema_linking_prompt)
|
||||||
|
schema_link_str = self.schema_link_parse(schema_link_output)
|
||||||
|
|
||||||
resp = dict()
|
sql_prompt = self.sql_exampler(
|
||||||
resp['query'] = query_text
|
query_text,
|
||||||
resp['model'] = model_name
|
model_name,
|
||||||
resp['fields'] = fields_list
|
schema_link_str,
|
||||||
resp['priorSchemaLinking'] = linking
|
current_date,
|
||||||
resp['dataDate'] = current_date
|
self.sql_example_selector,
|
||||||
|
)
|
||||||
|
print("sql_prompt->", sql_prompt)
|
||||||
|
sql_output = self.llm(sql_prompt)
|
||||||
|
|
||||||
resp['analysisOutput'] = schema_link_output
|
resp = dict()
|
||||||
resp['schemaLinkStr'] = schema_link_str
|
resp["query"] = query_text
|
||||||
|
resp["model"] = model_name
|
||||||
resp['sqlOutput'] = sql_output
|
resp["fields"] = fields_list
|
||||||
|
resp["priorSchemaLinking"] = linking
|
||||||
|
resp["dataDate"] = current_date
|
||||||
|
|
||||||
print("resp: ", resp)
|
resp["analysisOutput"] = schema_link_output
|
||||||
|
resp["schemaLinkStr"] = schema_link_str
|
||||||
|
|
||||||
return resp
|
resp["sqlOutput"] = sql_output
|
||||||
|
|
||||||
def query2sqlcombo(self, query_text: str,
|
print("resp: ", resp)
|
||||||
schema : Union[dict, None] = None,
|
|
||||||
current_date: str = None,
|
|
||||||
linking: Union[List[Mapping[str, str]], None] = None
|
|
||||||
):
|
|
||||||
|
|
||||||
print("query_text: ", query_text)
|
return resp
|
||||||
print("schema: ", schema)
|
|
||||||
print("current_date: ", current_date)
|
|
||||||
print("prior_schema_links: ", linking)
|
|
||||||
|
|
||||||
if linking is not None:
|
def query2sqlcombo(
|
||||||
prior_schema_links = {item['fieldValue']:item['fieldName'] for item in linking}
|
self,
|
||||||
else:
|
query_text: str,
|
||||||
prior_schema_links = {}
|
schema: Union[dict, None] = None,
|
||||||
|
current_date: str = None,
|
||||||
|
linking: Union[List[Mapping[str, str]], None] = None,
|
||||||
|
):
|
||||||
|
|
||||||
model_name = schema['modelName']
|
print("query_text: ", query_text)
|
||||||
fields_list = schema['fieldNameList']
|
print("schema: ", schema)
|
||||||
|
print("current_date: ", current_date)
|
||||||
|
print("prior_schema_links: ", linking)
|
||||||
|
|
||||||
schema_linking_sql_combo_prompt = self.schema_linking_sql_combo_exampler(query_text, model_name, current_date, fields_list,
|
if linking is not None:
|
||||||
prior_schema_links, self.sql_example_selector)
|
prior_schema_links = {
|
||||||
print("schema_linking_sql_combo_prompt->", schema_linking_sql_combo_prompt)
|
item["fieldValue"]: item["fieldName"] for item in linking
|
||||||
schema_linking_sql_combo_output = self.llm(schema_linking_sql_combo_prompt)
|
}
|
||||||
|
else:
|
||||||
|
prior_schema_links = {}
|
||||||
|
|
||||||
schema_linking_str = self.combo_schema_link_parse(schema_linking_sql_combo_output)
|
model_name = schema["modelName"]
|
||||||
sql_str = self.combo_sql_parse(schema_linking_sql_combo_output)
|
fields_list = schema["fieldNameList"]
|
||||||
|
|
||||||
resp = dict()
|
schema_linking_sql_combo_prompt = self.schema_linking_sql_combo_exampler(
|
||||||
resp['query'] = query_text
|
query_text,
|
||||||
resp['model'] = model_name
|
model_name,
|
||||||
resp['fields'] = fields_list
|
current_date,
|
||||||
resp['priorSchemaLinking'] = prior_schema_links
|
fields_list,
|
||||||
resp['dataDate'] = current_date
|
prior_schema_links,
|
||||||
|
self.sql_example_selector,
|
||||||
|
)
|
||||||
|
print("schema_linking_sql_combo_prompt->", schema_linking_sql_combo_prompt)
|
||||||
|
schema_linking_sql_combo_output = self.llm(schema_linking_sql_combo_prompt)
|
||||||
|
|
||||||
resp['analysisOutput'] = schema_linking_sql_combo_output
|
schema_linking_str = self.combo_schema_link_parse(
|
||||||
resp['schemaLinkStr'] = schema_linking_str
|
schema_linking_sql_combo_output
|
||||||
resp['sqlOutput'] = sql_str
|
)
|
||||||
|
sql_str = self.combo_sql_parse(schema_linking_sql_combo_output)
|
||||||
|
|
||||||
print("resp: ", resp)
|
resp = dict()
|
||||||
|
resp["query"] = query_text
|
||||||
|
resp["model"] = model_name
|
||||||
|
resp["fields"] = fields_list
|
||||||
|
resp["priorSchemaLinking"] = prior_schema_links
|
||||||
|
resp["dataDate"] = current_date
|
||||||
|
|
||||||
return resp
|
resp["analysisOutput"] = schema_linking_sql_combo_output
|
||||||
|
resp["schemaLinkStr"] = schema_linking_str
|
||||||
|
resp["sqlOutput"] = sql_str
|
||||||
|
|
||||||
def query2sql_run(self, query_text: str,
|
print("resp: ", resp)
|
||||||
schema : Union[dict, None] = None,
|
|
||||||
current_date: str = None,
|
return resp
|
||||||
linking: Union[List[Mapping[str, str]], None] = None):
|
|
||||||
|
def query2sql_run(
|
||||||
|
self,
|
||||||
|
query_text: str,
|
||||||
|
schema: Union[dict, None] = None,
|
||||||
|
current_date: str = None,
|
||||||
|
linking: Union[List[Mapping[str, str]], None] = None,
|
||||||
|
):
|
||||||
|
|
||||||
|
if self.is_shortcut:
|
||||||
|
return self.query2sqlcombo(query_text, schema, current_date, linking)
|
||||||
|
else:
|
||||||
|
return self.query2sql(query_text, schema, current_date, linking)
|
||||||
|
|
||||||
if self.is_shortcut:
|
|
||||||
return self.query2sqlcombo(query_text, schema, current_date, linking)
|
|
||||||
else:
|
|
||||||
return self.query2sql(query_text, schema, current_date, linking)
|
|
||||||
|
|
||||||
text2sql_agent = Text2DSLAgent()
|
text2sql_agent = Text2DSLAgent()
|
||||||
|
|
||||||
|
|||||||
@@ -13,11 +13,19 @@ from fastapi import FastAPI, HTTPException
|
|||||||
|
|
||||||
from sql.run import text2sql_agent
|
from sql.run import text2sql_agent
|
||||||
|
|
||||||
from preset_retrieval.run import preset_query_retrieval_run, collection as preset_query_collection
|
from preset_retrieval.run import (
|
||||||
from preset_retrieval.preset_query_db import (add2preset_query_collection, update_preset_query_collection,
|
preset_query_retrieval_run,
|
||||||
empty_preset_query_collection, delete_preset_query_by_ids,
|
collection as preset_query_collection,
|
||||||
update_preset_query_collection, get_preset_query_by_ids,
|
)
|
||||||
preset_query_collection_size)
|
from preset_retrieval.preset_query_db import (
|
||||||
|
add2preset_query_collection,
|
||||||
|
update_preset_query_collection,
|
||||||
|
empty_preset_query_collection,
|
||||||
|
delete_preset_query_by_ids,
|
||||||
|
update_preset_query_collection,
|
||||||
|
get_preset_query_by_ids,
|
||||||
|
preset_query_collection_size,
|
||||||
|
)
|
||||||
|
|
||||||
from plugin_call.run import plugin_selection_run
|
from plugin_call.run import plugin_selection_run
|
||||||
|
|
||||||
@@ -27,62 +35,64 @@ from run_config import LLMPARSER_PORT
|
|||||||
app = FastAPI()
|
app = FastAPI()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@app.post("/query2sql/")
|
@app.post("/query2sql/")
|
||||||
async def din_query2sql(query_body: Mapping[str, Any]):
|
async def din_query2sql(query_body: Mapping[str, Any]):
|
||||||
if 'queryText' not in query_body:
|
if "queryText" not in query_body:
|
||||||
raise HTTPException(status_code=400,
|
raise HTTPException(status_code=400, detail="query_text is not in query_body")
|
||||||
detail="query_text is not in query_body")
|
|
||||||
else:
|
else:
|
||||||
query_text = query_body['queryText']
|
query_text = query_body["queryText"]
|
||||||
|
|
||||||
if 'schema' not in query_body:
|
if "schema" not in query_body:
|
||||||
raise HTTPException(status_code=400, detail="schema is not in query_body")
|
raise HTTPException(status_code=400, detail="schema is not in query_body")
|
||||||
else:
|
else:
|
||||||
schema = query_body['schema']
|
schema = query_body["schema"]
|
||||||
|
|
||||||
if 'currentDate' not in query_body:
|
if "currentDate" not in query_body:
|
||||||
raise HTTPException(status_code=400, detail="currentDate is not in query_body")
|
raise HTTPException(status_code=400, detail="currentDate is not in query_body")
|
||||||
else:
|
else:
|
||||||
current_date = query_body['currentDate']
|
current_date = query_body["currentDate"]
|
||||||
|
|
||||||
if 'linking' not in query_body:
|
if "linking" not in query_body:
|
||||||
linking = None
|
linking = None
|
||||||
else:
|
else:
|
||||||
linking = query_body['linking']
|
linking = query_body["linking"]
|
||||||
|
|
||||||
resp = text2sql_agent.query2sql_run(query_text=query_text,
|
resp = text2sql_agent.query2sql_run(
|
||||||
schema=schema, current_date=current_date, linking=linking)
|
query_text=query_text, schema=schema, current_date=current_date, linking=linking
|
||||||
|
)
|
||||||
|
|
||||||
return resp
|
return resp
|
||||||
|
|
||||||
|
|
||||||
@app.post("/query2sql_setting_update/")
|
@app.post("/query2sql_setting_update/")
|
||||||
async def query2sql_setting_update(query_body: Mapping[str, Any]):
|
async def query2sql_setting_update(query_body: Mapping[str, Any]):
|
||||||
if 'sqlExamplars' not in query_body:
|
if "sqlExamplars" not in query_body:
|
||||||
raise HTTPException(status_code=400,
|
raise HTTPException(status_code=400, detail="sqlExamplars is not in query_body")
|
||||||
detail="sqlExamplars is not in query_body")
|
|
||||||
else:
|
else:
|
||||||
sql_examplars = query_body['sqlExamplars']
|
sql_examplars = query_body["sqlExamplars"]
|
||||||
|
|
||||||
if 'exampleNums' not in query_body:
|
if "exampleNums" not in query_body:
|
||||||
raise HTTPException(status_code=400, detail="exampleNums is not in query_body")
|
raise HTTPException(status_code=400, detail="exampleNums is not in query_body")
|
||||||
else:
|
else:
|
||||||
example_nums = query_body['exampleNums']
|
example_nums = query_body["exampleNums"]
|
||||||
|
|
||||||
if 'isShortcut' not in query_body:
|
if "isShortcut" not in query_body:
|
||||||
raise HTTPException(status_code=400, detail="isShortcut is not in query_body")
|
raise HTTPException(status_code=400, detail="isShortcut is not in query_body")
|
||||||
else:
|
else:
|
||||||
is_shortcut = query_body['isShortcut']
|
is_shortcut = query_body["isShortcut"]
|
||||||
|
|
||||||
text2sql_agent.update_examples(sql_examples=sql_examplars, example_nums=example_nums, is_shortcut=is_shortcut)
|
text2sql_agent.update_examples(
|
||||||
|
sql_examples=sql_examplars, example_nums=example_nums, is_shortcut=is_shortcut
|
||||||
|
)
|
||||||
|
|
||||||
return "success"
|
return "success"
|
||||||
|
|
||||||
|
|
||||||
@app.post("/preset_query_retrival/")
|
@app.post("/preset_query_retrival/")
|
||||||
async def preset_query_retrival(query_text_list: List[str], n_results: int = 5):
|
async def preset_query_retrival(query_text_list: List[str], n_results: int = 5):
|
||||||
parsed_retrieval_res_format = preset_query_retrieval_run(preset_query_collection, query_text_list, n_results)
|
parsed_retrieval_res_format = preset_query_retrieval_run(
|
||||||
|
preset_query_collection, query_text_list, n_results
|
||||||
|
)
|
||||||
|
|
||||||
return parsed_retrieval_res_format
|
return parsed_retrieval_res_format
|
||||||
|
|
||||||
@@ -93,27 +103,32 @@ async def preset_query_add(preset_info_list: List[Mapping[str, str]]):
|
|||||||
preset_query_ids = []
|
preset_query_ids = []
|
||||||
|
|
||||||
for preset_info in preset_info_list:
|
for preset_info in preset_info_list:
|
||||||
preset_queries.append(preset_info['preset_query'])
|
preset_queries.append(preset_info["preset_query"])
|
||||||
preset_query_ids.append(preset_info['preset_query_id'])
|
preset_query_ids.append(preset_info["preset_query_id"])
|
||||||
|
|
||||||
add2preset_query_collection(collection=preset_query_collection,
|
add2preset_query_collection(
|
||||||
preset_queries=preset_queries,
|
collection=preset_query_collection,
|
||||||
preset_query_ids=preset_query_ids)
|
preset_queries=preset_queries,
|
||||||
|
preset_query_ids=preset_query_ids,
|
||||||
|
)
|
||||||
|
|
||||||
return "success"
|
return "success"
|
||||||
|
|
||||||
|
|
||||||
@app.post("/preset_query_update/")
|
@app.post("/preset_query_update/")
|
||||||
async def preset_query_update(preset_info_list: List[Mapping[str, str]]):
|
async def preset_query_update(preset_info_list: List[Mapping[str, str]]):
|
||||||
preset_queries = []
|
preset_queries = []
|
||||||
preset_query_ids = []
|
preset_query_ids = []
|
||||||
|
|
||||||
for preset_info in preset_info_list:
|
for preset_info in preset_info_list:
|
||||||
preset_queries.append(preset_info['preset_query'])
|
preset_queries.append(preset_info["preset_query"])
|
||||||
preset_query_ids.append(preset_info['preset_query_id'])
|
preset_query_ids.append(preset_info["preset_query_id"])
|
||||||
|
|
||||||
update_preset_query_collection(collection=preset_query_collection,
|
update_preset_query_collection(
|
||||||
preset_queries=preset_queries,
|
collection=preset_query_collection,
|
||||||
preset_query_ids=preset_query_ids)
|
preset_queries=preset_queries,
|
||||||
|
preset_query_ids=preset_query_ids,
|
||||||
|
)
|
||||||
|
|
||||||
return "success"
|
return "success"
|
||||||
|
|
||||||
@@ -124,39 +139,50 @@ async def preset_query_empty():
|
|||||||
|
|
||||||
return "success"
|
return "success"
|
||||||
|
|
||||||
|
|
||||||
@app.post("/preset_delete_by_ids/")
|
@app.post("/preset_delete_by_ids/")
|
||||||
async def preset_delete_by_ids(preset_query_ids: List[str]):
|
async def preset_delete_by_ids(preset_query_ids: List[str]):
|
||||||
delete_preset_query_by_ids(collection=preset_query_collection, preset_query_ids=preset_query_ids)
|
delete_preset_query_by_ids(
|
||||||
|
collection=preset_query_collection, preset_query_ids=preset_query_ids
|
||||||
|
)
|
||||||
|
|
||||||
return "success"
|
return "success"
|
||||||
|
|
||||||
|
|
||||||
@app.post("/preset_get_by_ids/")
|
@app.post("/preset_get_by_ids/")
|
||||||
async def preset_get_by_ids(preset_query_ids: List[str]):
|
async def preset_get_by_ids(preset_query_ids: List[str]):
|
||||||
preset_queries = get_preset_query_by_ids(collection=preset_query_collection, preset_query_ids=preset_query_ids)
|
preset_queries = get_preset_query_by_ids(
|
||||||
|
collection=preset_query_collection, preset_query_ids=preset_query_ids
|
||||||
|
)
|
||||||
|
|
||||||
return preset_queries
|
return preset_queries
|
||||||
|
|
||||||
|
|
||||||
@app.get("/preset_query_size/")
|
@app.get("/preset_query_size/")
|
||||||
async def preset_query_size():
|
async def preset_query_size():
|
||||||
size = preset_query_collection_size(collection=preset_query_collection)
|
size = preset_query_collection_size(collection=preset_query_collection)
|
||||||
|
|
||||||
return size
|
return size
|
||||||
|
|
||||||
|
|
||||||
@app.post("/plugin_selection/")
|
@app.post("/plugin_selection/")
|
||||||
async def tool_selection(query_body: Mapping[str, Any]):
|
async def tool_selection(query_body: Mapping[str, Any]):
|
||||||
if 'queryText' not in query_body:
|
if "queryText" not in query_body:
|
||||||
raise HTTPException(status_code=400, detail="query_text is not in query_body")
|
raise HTTPException(status_code=400, detail="query_text is not in query_body")
|
||||||
else:
|
else:
|
||||||
query_text = query_body['queryText']
|
query_text = query_body["queryText"]
|
||||||
|
|
||||||
if 'pluginConfigs' not in query_body:
|
if "pluginConfigs" not in query_body:
|
||||||
raise HTTPException(status_code=400, detail="pluginConfigs is not in query_body")
|
raise HTTPException(
|
||||||
|
status_code=400, detail="pluginConfigs is not in query_body"
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
plugin_configs = query_body['pluginConfigs']
|
plugin_configs = query_body["pluginConfigs"]
|
||||||
|
|
||||||
resp = plugin_selection_run(query_text=query_text, plugin_configs=plugin_configs)
|
resp = plugin_selection_run(query_text=query_text, plugin_configs=plugin_configs)
|
||||||
|
|
||||||
return resp
|
return resp
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
uvicorn.run(app, host=LLMPARSER_HOST, port=LLMPARSER_PORT)
|
uvicorn.run(app, host=LLMPARSER_HOST, port=LLMPARSER_PORT)
|
||||||
|
|||||||
@@ -7,13 +7,15 @@ from chromadb.config import Settings
|
|||||||
|
|
||||||
from run_config import CHROMA_DB_PERSIST_PATH
|
from run_config import CHROMA_DB_PERSIST_PATH
|
||||||
|
|
||||||
client = chromadb.Client(Settings(
|
client = chromadb.Client(
|
||||||
chroma_db_impl="duckdb+parquet",
|
Settings(
|
||||||
persist_directory=CHROMA_DB_PERSIST_PATH # Optional, defaults to .chromadb/ in the current directory
|
chroma_db_impl="duckdb+parquet",
|
||||||
))
|
persist_directory=CHROMA_DB_PERSIST_PATH, # Optional, defaults to .chromadb/ in the current directory
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def empty_chroma_collection_2(collection:Collection):
|
def empty_chroma_collection_2(collection: Collection):
|
||||||
collection_name = collection.name
|
collection_name = collection.name
|
||||||
client = collection._client
|
client = collection._client
|
||||||
metadata = collection.metadata
|
metadata = collection.metadata
|
||||||
@@ -21,17 +23,18 @@ def empty_chroma_collection_2(collection:Collection):
|
|||||||
|
|
||||||
client.delete_collection(collection_name)
|
client.delete_collection(collection_name)
|
||||||
|
|
||||||
new_collection = client.get_or_create_collection(name=collection_name,
|
new_collection = client.get_or_create_collection(
|
||||||
metadata=metadata,
|
name=collection_name, metadata=metadata, embedding_function=embedding_function
|
||||||
embedding_function=embedding_function)
|
)
|
||||||
|
|
||||||
size_of_new_collection = new_collection.count()
|
size_of_new_collection = new_collection.count()
|
||||||
|
|
||||||
print(f'Collection {collection_name} emptied. Size of new collection: {size_of_new_collection}')
|
print(
|
||||||
|
f"Collection {collection_name} emptied. Size of new collection: {size_of_new_collection}"
|
||||||
|
)
|
||||||
|
|
||||||
return new_collection
|
return new_collection
|
||||||
|
|
||||||
|
|
||||||
def empty_chroma_collection(collection:Collection):
|
def empty_chroma_collection(collection: Collection):
|
||||||
collection.delete()
|
collection.delete()
|
||||||
|
|
||||||
|
|||||||
@@ -4,5 +4,6 @@ from langchain.llms import OpenAI
|
|||||||
from run_config import MODEL_NAME, OPENAI_API_KEY, TEMPERATURE
|
from run_config import MODEL_NAME, OPENAI_API_KEY, TEMPERATURE
|
||||||
|
|
||||||
|
|
||||||
llm = OpenAI(openai_api_key=OPENAI_API_KEY, model_name=MODEL_NAME,
|
llm = OpenAI(
|
||||||
temperature=TEMPERATURE)
|
openai_api_key=OPENAI_API_KEY, model_name=MODEL_NAME, temperature=TEMPERATURE
|
||||||
|
)
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ from run_config import HF_TEXT2VEC_MODEL_NAME
|
|||||||
|
|
||||||
hg_embedding = HuggingFaceEmbeddings(model_name=HF_TEXT2VEC_MODEL_NAME)
|
hg_embedding = HuggingFaceEmbeddings(model_name=HF_TEXT2VEC_MODEL_NAME)
|
||||||
|
|
||||||
|
|
||||||
class Text2VecEmbeddingFunction(EmbeddingFunction):
|
class Text2VecEmbeddingFunction(EmbeddingFunction):
|
||||||
def __call__(self, texts: Documents) -> Embeddings:
|
def __call__(self, texts: Documents) -> Embeddings:
|
||||||
|
|
||||||
@@ -16,13 +17,8 @@ class Text2VecEmbeddingFunction(EmbeddingFunction):
|
|||||||
|
|
||||||
return embeddings
|
return embeddings
|
||||||
|
|
||||||
def get_embeddings(documents:List[str]) -> List[List[float]]:
|
|
||||||
|
def get_embeddings(documents: List[str]) -> List[List[float]]:
|
||||||
embeddings = hg_embedding.embed_documents(documents)
|
embeddings = hg_embedding.embed_documents(documents)
|
||||||
|
|
||||||
return embeddings
|
return embeddings
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
<mapper namespace="com.tencent.supersonic.chat.persistence.mapper.ChatQueryDOMapper">
|
<mapper namespace="com.tencent.supersonic.chat.persistence.mapper.ChatQueryDOMapper">
|
||||||
<resultMap id="BaseResultMap" type="com.tencent.supersonic.chat.persistence.dataobject.ChatQueryDO">
|
<resultMap id="BaseResultMap" type="com.tencent.supersonic.chat.persistence.dataobject.ChatQueryDO">
|
||||||
<id column="question_id" jdbcType="BIGINT" property="questionId" />
|
<id column="question_id" jdbcType="BIGINT" property="questionId" />
|
||||||
<result column="agent_id" jdbcType="BIGINT" property="agentId" />
|
<result column="agent_id" jdbcType="INTEGER" property="agentId" />
|
||||||
<result column="create_time" jdbcType="TIMESTAMP" property="createTime" />
|
<result column="create_time" jdbcType="TIMESTAMP" property="createTime" />
|
||||||
<result column="user_name" jdbcType="VARCHAR" property="userName" />
|
<result column="user_name" jdbcType="VARCHAR" property="userName" />
|
||||||
<result column="query_state" jdbcType="INTEGER" property="queryState" />
|
<result column="query_state" jdbcType="INTEGER" property="queryState" />
|
||||||
@@ -77,7 +77,7 @@
|
|||||||
query_state, chat_id, score,
|
query_state, chat_id, score,
|
||||||
feedback, query_text, query_result
|
feedback, query_text, query_result
|
||||||
)
|
)
|
||||||
values (#{questionId,jdbcType=BIGINT}, #{agentId,jdbcType=BIGINT}, #{createTime,jdbcType=TIMESTAMP}, #{userName,jdbcType=VARCHAR},
|
values (#{questionId,jdbcType=BIGINT}, #{agentId,jdbcType=INTEGER}, #{createTime,jdbcType=TIMESTAMP}, #{userName,jdbcType=VARCHAR},
|
||||||
#{queryState,jdbcType=INTEGER}, #{chatId,jdbcType=BIGINT}, #{score,jdbcType=INTEGER},
|
#{queryState,jdbcType=INTEGER}, #{chatId,jdbcType=BIGINT}, #{score,jdbcType=INTEGER},
|
||||||
#{feedback,jdbcType=VARCHAR}, #{queryText,jdbcType=LONGVARCHAR}, #{queryResult,jdbcType=LONGVARCHAR}
|
#{feedback,jdbcType=VARCHAR}, #{queryText,jdbcType=LONGVARCHAR}, #{queryResult,jdbcType=LONGVARCHAR}
|
||||||
)
|
)
|
||||||
@@ -98,9 +98,6 @@
|
|||||||
<if test="chatId != null">
|
<if test="chatId != null">
|
||||||
chat_id = #{chatId,jdbcType=BIGINT},
|
chat_id = #{chatId,jdbcType=BIGINT},
|
||||||
</if>
|
</if>
|
||||||
<if test="agentId != null">
|
|
||||||
agent_id = #{agentId,jdbcType=INTEGER},
|
|
||||||
</if>
|
|
||||||
<if test="score != null">
|
<if test="score != null">
|
||||||
score = #{score,jdbcType=INTEGER},
|
score = #{score,jdbcType=INTEGER},
|
||||||
</if>
|
</if>
|
||||||
@@ -116,5 +113,4 @@
|
|||||||
</set>
|
</set>
|
||||||
where question_id = #{questionId,jdbcType=BIGINT}
|
where question_id = #{questionId,jdbcType=BIGINT}
|
||||||
</update>
|
</update>
|
||||||
|
|
||||||
</mapper>
|
</mapper>
|
||||||
|
|||||||
@@ -59,7 +59,7 @@
|
|||||||
join (
|
join (
|
||||||
select distinct chat_id
|
select distinct chat_id
|
||||||
from s2_chat_query
|
from s2_chat_query
|
||||||
where query_state = 0 and agent_id = ${agentId}
|
where query_state = 1 and agent_id = ${agentId}
|
||||||
order by chat_id desc
|
order by chat_id desc
|
||||||
limit #{start}, #{limit}
|
limit #{start}, #{limit}
|
||||||
) q2
|
) q2
|
||||||
|
|||||||
@@ -1,45 +0,0 @@
|
|||||||
package com.tencent.supersonic.chat.corrector;
|
|
||||||
|
|
||||||
import static org.mockito.ArgumentMatchers.any;
|
|
||||||
|
|
||||||
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
|
|
||||||
import com.tencent.supersonic.chat.api.pojo.SemanticCorrectInfo;
|
|
||||||
import com.tencent.supersonic.chat.api.pojo.SemanticParseInfo;
|
|
||||||
import com.tencent.supersonic.chat.parser.llm.dsl.DSLDateHelper;
|
|
||||||
import org.junit.Assert;
|
|
||||||
import org.junit.jupiter.api.Test;
|
|
||||||
import org.mockito.MockedStatic;
|
|
||||||
import org.mockito.Mockito;
|
|
||||||
|
|
||||||
class DateFieldCorrectorTest {
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void corrector() {
|
|
||||||
MockedStatic<DSLDateHelper> dslDateHelper = Mockito.mockStatic(DSLDateHelper.class);
|
|
||||||
|
|
||||||
dslDateHelper.when(() -> DSLDateHelper.getReferenceDate(any())).thenReturn("2023-08-14");
|
|
||||||
DateFieldCorrector dateFieldCorrector = new DateFieldCorrector();
|
|
||||||
SemanticParseInfo parseInfo = new SemanticParseInfo();
|
|
||||||
SchemaElement model = new SchemaElement();
|
|
||||||
model.setId(2L);
|
|
||||||
parseInfo.setModel(model);
|
|
||||||
SemanticCorrectInfo semanticCorrectInfo = SemanticCorrectInfo.builder()
|
|
||||||
.sql("select count(歌曲名) from 歌曲库 ")
|
|
||||||
.parseInfo(parseInfo)
|
|
||||||
.build();
|
|
||||||
|
|
||||||
dateFieldCorrector.correct(semanticCorrectInfo);
|
|
||||||
|
|
||||||
Assert.assertEquals("SELECT count(歌曲名) FROM 歌曲库 WHERE 数据日期 = '2023-08-14'", semanticCorrectInfo.getSql());
|
|
||||||
|
|
||||||
semanticCorrectInfo = SemanticCorrectInfo.builder()
|
|
||||||
.sql("select count(歌曲名) from 歌曲库 where 数据日期 = '2023-08-14'")
|
|
||||||
.parseInfo(parseInfo)
|
|
||||||
.build();
|
|
||||||
|
|
||||||
dateFieldCorrector.correct(semanticCorrectInfo);
|
|
||||||
|
|
||||||
Assert.assertEquals("select count(歌曲名) from 歌曲库 where 数据日期 = '2023-08-14'", semanticCorrectInfo.getSql());
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,65 +0,0 @@
|
|||||||
package com.tencent.supersonic.chat.corrector;
|
|
||||||
|
|
||||||
import com.tencent.supersonic.chat.api.pojo.SemanticCorrectInfo;
|
|
||||||
import com.tencent.supersonic.chat.api.pojo.SemanticParseInfo;
|
|
||||||
import com.tencent.supersonic.chat.parser.llm.dsl.DSLParseResult;
|
|
||||||
import com.tencent.supersonic.chat.query.llm.dsl.LLMReq;
|
|
||||||
import com.tencent.supersonic.chat.query.llm.dsl.LLMReq.ElementValue;
|
|
||||||
import com.tencent.supersonic.common.pojo.Constants;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
import org.junit.Assert;
|
|
||||||
import org.junit.jupiter.api.Test;
|
|
||||||
|
|
||||||
class FieldNameCorrectorTest {
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void corrector() {
|
|
||||||
|
|
||||||
FieldNameCorrector corrector = new FieldNameCorrector();
|
|
||||||
SemanticCorrectInfo semanticCorrectInfo = SemanticCorrectInfo.builder()
|
|
||||||
.sql("select 歌曲名 from 歌曲库 where 专辑照片 = '七里香' and 专辑名 = '流行' and 数据日期 = '2023-08-19'")
|
|
||||||
.build();
|
|
||||||
|
|
||||||
SemanticParseInfo parseInfo = new SemanticParseInfo();
|
|
||||||
|
|
||||||
DSLParseResult dslParseResult = new DSLParseResult();
|
|
||||||
LLMReq llmReq = new LLMReq();
|
|
||||||
List<ElementValue> linking = new ArrayList<>();
|
|
||||||
ElementValue elementValue = new ElementValue();
|
|
||||||
elementValue.setFieldValue("流行");
|
|
||||||
elementValue.setFieldName("歌曲风格");
|
|
||||||
linking.add(elementValue);
|
|
||||||
|
|
||||||
ElementValue elementValue2 = new ElementValue();
|
|
||||||
elementValue2.setFieldValue("七里香");
|
|
||||||
elementValue2.setFieldName("歌曲名");
|
|
||||||
linking.add(elementValue2);
|
|
||||||
|
|
||||||
ElementValue elementValue3 = new ElementValue();
|
|
||||||
elementValue3.setFieldValue("周杰伦");
|
|
||||||
elementValue3.setFieldName("歌手名");
|
|
||||||
linking.add(elementValue3);
|
|
||||||
|
|
||||||
ElementValue elementValue4 = new ElementValue();
|
|
||||||
elementValue4.setFieldValue("流行");
|
|
||||||
elementValue4.setFieldName("歌曲流派");
|
|
||||||
linking.add(elementValue4);
|
|
||||||
|
|
||||||
llmReq.setLinking(linking);
|
|
||||||
dslParseResult.setLlmReq(llmReq);
|
|
||||||
|
|
||||||
Map<String, Object> properties = new HashMap<>();
|
|
||||||
properties.put(Constants.CONTEXT, dslParseResult);
|
|
||||||
|
|
||||||
parseInfo.setProperties(properties);
|
|
||||||
semanticCorrectInfo.setParseInfo(parseInfo);
|
|
||||||
|
|
||||||
corrector.correct(semanticCorrectInfo);
|
|
||||||
|
|
||||||
Assert.assertEquals("SELECT 歌曲名 FROM 歌曲库 WHERE 歌曲名 = '七里香' AND 歌曲流派 = '流行' AND 数据日期 = '2023-08-19'",
|
|
||||||
semanticCorrectInfo.getSql());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,71 +0,0 @@
|
|||||||
package com.tencent.supersonic.chat.corrector;
|
|
||||||
|
|
||||||
import static org.mockito.Mockito.when;
|
|
||||||
|
|
||||||
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
|
|
||||||
import com.tencent.supersonic.chat.api.pojo.SchemaValueMap;
|
|
||||||
import com.tencent.supersonic.chat.api.pojo.SemanticCorrectInfo;
|
|
||||||
import com.tencent.supersonic.chat.api.pojo.SemanticParseInfo;
|
|
||||||
import com.tencent.supersonic.chat.api.pojo.SemanticSchema;
|
|
||||||
import com.tencent.supersonic.common.util.ContextUtils;
|
|
||||||
import com.tencent.supersonic.knowledge.service.SchemaService;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.List;
|
|
||||||
import org.junit.Assert;
|
|
||||||
import org.junit.jupiter.api.Test;
|
|
||||||
import org.mockito.MockedStatic;
|
|
||||||
import org.mockito.Mockito;
|
|
||||||
|
|
||||||
class FieldValueCorrectorTest {
|
|
||||||
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void corrector() {
|
|
||||||
|
|
||||||
MockedStatic<ContextUtils> mockContextUtils = Mockito.mockStatic(ContextUtils.class);
|
|
||||||
|
|
||||||
SchemaService mockSchemaService = Mockito.mock(SchemaService.class);
|
|
||||||
|
|
||||||
SemanticSchema mockSemanticSchema = Mockito.mock(SemanticSchema.class);
|
|
||||||
|
|
||||||
List<SchemaElement> dimensions = new ArrayList<>();
|
|
||||||
List<SchemaValueMap> schemaValueMaps = new ArrayList<>();
|
|
||||||
SchemaValueMap value1 = new SchemaValueMap();
|
|
||||||
value1.setBizName("杰伦");
|
|
||||||
value1.setTechName("周杰伦");
|
|
||||||
value1.setAlias(Arrays.asList("周杰倫", "Jay Chou", "周董", "周先生"));
|
|
||||||
schemaValueMaps.add(value1);
|
|
||||||
|
|
||||||
SchemaElement schemaElement = SchemaElement.builder()
|
|
||||||
.bizName("singer_name")
|
|
||||||
.name("歌手名")
|
|
||||||
.model(2L)
|
|
||||||
.schemaValueMaps(schemaValueMaps)
|
|
||||||
.build();
|
|
||||||
dimensions.add(schemaElement);
|
|
||||||
|
|
||||||
when(mockSemanticSchema.getDimensions()).thenReturn(dimensions);
|
|
||||||
when(mockSchemaService.getSemanticSchema()).thenReturn(mockSemanticSchema);
|
|
||||||
mockContextUtils.when(() -> ContextUtils.getBean(SchemaService.class)).thenReturn(mockSchemaService);
|
|
||||||
|
|
||||||
SemanticParseInfo parseInfo = new SemanticParseInfo();
|
|
||||||
SchemaElement model = new SchemaElement();
|
|
||||||
model.setId(2L);
|
|
||||||
parseInfo.setModel(model);
|
|
||||||
SemanticCorrectInfo semanticCorrectInfo = SemanticCorrectInfo.builder()
|
|
||||||
.sql("select count(song_name) from 歌曲库 where singer_name = '周先生'")
|
|
||||||
.parseInfo(parseInfo)
|
|
||||||
.build();
|
|
||||||
|
|
||||||
FieldValueCorrector corrector = new FieldValueCorrector();
|
|
||||||
corrector.correct(semanticCorrectInfo);
|
|
||||||
|
|
||||||
Assert.assertEquals("SELECT count(song_name) FROM 歌曲库 WHERE singer_name = '周杰伦'", semanticCorrectInfo.getSql());
|
|
||||||
|
|
||||||
semanticCorrectInfo.setSql("select count(song_name) from 歌曲库 where singer_name = '杰伦'");
|
|
||||||
corrector.correct(semanticCorrectInfo);
|
|
||||||
|
|
||||||
Assert.assertEquals("SELECT count(song_name) FROM 歌曲库 WHERE singer_name = '周杰伦'", semanticCorrectInfo.getSql());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,46 +0,0 @@
|
|||||||
package com.tencent.supersonic.chat.corrector;
|
|
||||||
|
|
||||||
import com.tencent.supersonic.chat.api.pojo.SemanticCorrectInfo;
|
|
||||||
import org.junit.Assert;
|
|
||||||
import org.junit.jupiter.api.Test;
|
|
||||||
|
|
||||||
class SelectFieldAppendCorrectorTest {
|
|
||||||
|
|
||||||
@Test
|
|
||||||
void corrector() {
|
|
||||||
SelectFieldAppendCorrector corrector = new SelectFieldAppendCorrector();
|
|
||||||
SemanticCorrectInfo semanticCorrectInfo = SemanticCorrectInfo.builder()
|
|
||||||
.sql("select 歌曲名 from 歌曲库 where datediff('day', 发布日期, '2023-08-09') <= 1 and 歌手名 = '邓紫棋' "
|
|
||||||
+ "and sys_imp_date = '2023-08-09' and 歌曲发布时 = '2023-08-01' order by 播放量 desc limit 11")
|
|
||||||
.build();
|
|
||||||
|
|
||||||
corrector.correct(semanticCorrectInfo);
|
|
||||||
|
|
||||||
Assert.assertEquals(
|
|
||||||
"SELECT 歌曲名, 歌手名, 播放量, 歌曲发布时, 发布日期 FROM 歌曲库 WHERE "
|
|
||||||
+ "datediff('day', 发布日期, '2023-08-09') <= 1 AND 歌手名 = '邓紫棋' "
|
|
||||||
+ "AND sys_imp_date = '2023-08-09' AND 歌曲发布时 = '2023-08-01'"
|
|
||||||
+ " ORDER BY 播放量 DESC LIMIT 11", semanticCorrectInfo.getSql());
|
|
||||||
|
|
||||||
semanticCorrectInfo.setSql("select 用户名 from 内容库产品 where datediff('day', 数据日期, '2023-09-14') <= 30"
|
|
||||||
+ " group by 用户名 having sum(访问次数) > 2000");
|
|
||||||
|
|
||||||
corrector.correct(semanticCorrectInfo);
|
|
||||||
|
|
||||||
Assert.assertEquals(
|
|
||||||
"SELECT 用户名, sum(访问次数) FROM 内容库产品 WHERE "
|
|
||||||
+ "datediff('day', 数据日期, '2023-09-14') <= 30 "
|
|
||||||
+ "GROUP BY 用户名 HAVING sum(访问次数) > 2000", semanticCorrectInfo.getSql());
|
|
||||||
|
|
||||||
semanticCorrectInfo.setSql("SELECT 用户名, sum(访问次数) FROM 内容库产品 WHERE "
|
|
||||||
+ "datediff('day', 数据日期, '2023-09-14') <= 30 "
|
|
||||||
+ "GROUP BY 用户名 HAVING sum(访问次数) > 2000");
|
|
||||||
|
|
||||||
corrector.correct(semanticCorrectInfo);
|
|
||||||
|
|
||||||
Assert.assertEquals(
|
|
||||||
"SELECT 用户名, sum(访问次数) FROM 内容库产品 WHERE "
|
|
||||||
+ "datediff('day', 数据日期, '2023-09-14') <= 30 "
|
|
||||||
+ "GROUP BY 用户名 HAVING sum(访问次数) > 2000", semanticCorrectInfo.getSql());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -7,8 +7,8 @@ import com.tencent.supersonic.knowledge.service.WordService;
|
|||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
import org.apache.commons.collections.CollectionUtils;
|
import org.apache.commons.collections.CollectionUtils;
|
||||||
import org.springframework.beans.factory.annotation.Autowired;
|
import org.springframework.beans.factory.annotation.Autowired;
|
||||||
import org.springframework.boot.context.event.ApplicationStartedEvent;
|
import org.springframework.boot.CommandLineRunner;
|
||||||
import org.springframework.context.ApplicationListener;
|
import org.springframework.core.annotation.Order;
|
||||||
import org.springframework.scheduling.annotation.Scheduled;
|
import org.springframework.scheduling.annotation.Scheduled;
|
||||||
import org.springframework.stereotype.Component;
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
@@ -17,7 +17,8 @@ import java.util.concurrent.CompletableFuture;
|
|||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
@Component
|
@Component
|
||||||
public class ApplicationStartedListener implements ApplicationListener<ApplicationStartedEvent> {
|
@Order(5)
|
||||||
|
public class ApplicationStartedListener implements CommandLineRunner {
|
||||||
|
|
||||||
@Autowired
|
@Autowired
|
||||||
private KnowledgeService knowledgeService;
|
private KnowledgeService knowledgeService;
|
||||||
@@ -27,7 +28,7 @@ public class ApplicationStartedListener implements ApplicationListener<Applicati
|
|||||||
private SchemaService schemaService;
|
private SchemaService schemaService;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void onApplicationEvent(ApplicationStartedEvent event) {
|
public void run(String... args) {
|
||||||
updateKnowledgeDimValue();
|
updateKnowledgeDimValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -4,18 +4,13 @@ import com.google.common.cache.Cache;
|
|||||||
import com.google.common.cache.CacheBuilder;
|
import com.google.common.cache.CacheBuilder;
|
||||||
import com.tencent.supersonic.chat.api.component.SemanticLayer;
|
import com.tencent.supersonic.chat.api.component.SemanticLayer;
|
||||||
import com.tencent.supersonic.chat.api.pojo.ModelSchema;
|
import com.tencent.supersonic.chat.api.pojo.ModelSchema;
|
||||||
import com.tencent.supersonic.common.pojo.ResultData;
|
|
||||||
import com.tencent.supersonic.semantic.api.model.response.ModelSchemaResp;
|
import com.tencent.supersonic.semantic.api.model.response.ModelSchemaResp;
|
||||||
import com.tencent.supersonic.semantic.api.model.response.QueryResultWithSchemaResp;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
import lombok.SneakyThrows;
|
import lombok.SneakyThrows;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
import org.springframework.core.ParameterizedTypeReference;
|
|
||||||
import org.springframework.util.CollectionUtils;
|
import org.springframework.util.CollectionUtils;
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
@@ -24,10 +19,6 @@ public abstract class BaseSemanticLayer implements SemanticLayer {
|
|||||||
protected final Cache<String, List<ModelSchemaResp>> modelSchemaCache =
|
protected final Cache<String, List<ModelSchemaResp>> modelSchemaCache =
|
||||||
CacheBuilder.newBuilder().expireAfterWrite(10, TimeUnit.SECONDS).build();
|
CacheBuilder.newBuilder().expireAfterWrite(10, TimeUnit.SECONDS).build();
|
||||||
|
|
||||||
protected ParameterizedTypeReference<ResultData<QueryResultWithSchemaResp>> structTypeRef =
|
|
||||||
new ParameterizedTypeReference<ResultData<QueryResultWithSchemaResp>>() {
|
|
||||||
};
|
|
||||||
|
|
||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
public List<ModelSchemaResp> fetchModelSchema(List<Long> ids, Boolean cacheEnable) {
|
public List<ModelSchemaResp> fetchModelSchema(List<Long> ids, Boolean cacheEnable) {
|
||||||
if (cacheEnable) {
|
if (cacheEnable) {
|
||||||
|
|||||||
@@ -57,17 +57,13 @@ public class LocalSemanticLayer extends BaseSemanticLayer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@SneakyThrows
|
||||||
public QueryResultWithSchemaResp queryByDsl(QueryDslReq queryDslReq, User user) {
|
public QueryResultWithSchemaResp queryByDsl(QueryDslReq queryDslReq, User user) {
|
||||||
try {
|
queryService = ContextUtils.getBean(QueryService.class);
|
||||||
queryService = ContextUtils.getBean(QueryService.class);
|
Object object = queryService.queryBySql(queryDslReq, user);
|
||||||
Object object = queryService.queryBySql(queryDslReq, user);
|
QueryResultWithSchemaResp queryResultWithSchemaResp = JsonUtil.toObject(JsonUtil.toString(object),
|
||||||
QueryResultWithSchemaResp queryResultWithSchemaResp = JsonUtil.toObject(JsonUtil.toString(object),
|
|
||||||
QueryResultWithSchemaResp.class);
|
QueryResultWithSchemaResp.class);
|
||||||
return queryResultWithSchemaResp;
|
return queryResultWithSchemaResp;
|
||||||
} catch (Exception e) {
|
|
||||||
log.info("queryByDsl has an exception:{}", e);
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ public enum AggOperatorEnum {
|
|||||||
|
|
||||||
SUM("SUM"),
|
SUM("SUM"),
|
||||||
|
|
||||||
DISTINCT("DISTINCT"),
|
COUNT_DISTINCT("COUNT_DISTINCT"),
|
||||||
|
|
||||||
TOPN("TOPN"),
|
TOPN("TOPN"),
|
||||||
|
|
||||||
|
|||||||
32
dev/reformat
Executable file
@@ -0,0 +1,32 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
#
|
||||||
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
# contributor license agreements. See the NOTICE file distributed with
|
||||||
|
# this work for additional information regarding copyright ownership.
|
||||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
set -x
|
||||||
|
|
||||||
|
|
||||||
|
PROFILES="-P "
|
||||||
|
|
||||||
|
# python style checks rely on `black` in path
|
||||||
|
if ! command -v black &> /dev/null
|
||||||
|
then
|
||||||
|
echo "Skip Python lint since 'black' is not available. Please install 'black' by running 'pip install black==22.3.0'"
|
||||||
|
else
|
||||||
|
PROFILES="${PROFILES} spotless-python"
|
||||||
|
fi
|
||||||
|
|
||||||
|
mvn spotless:apply $PROFILES
|
||||||
|
Before Width: | Height: | Size: 14 KiB |
|
Before Width: | Height: | Size: 84 KiB |
|
Before Width: | Height: | Size: 90 KiB |
|
Before Width: | Height: | Size: 220 KiB |
|
Before Width: | Height: | Size: 108 KiB |
|
Before Width: | Height: | Size: 260 KiB |
|
Before Width: | Height: | Size: 102 KiB |
|
Before Width: | Height: | Size: 173 KiB |
|
Before Width: | Height: | Size: 28 KiB |
|
Before Width: | Height: | Size: 133 KiB |
|
Before Width: | Height: | Size: 358 KiB |
|
Before Width: | Height: | Size: 114 KiB |
|
Before Width: | Height: | Size: 31 KiB |
|
Before Width: | Height: | Size: 297 KiB |
|
Before Width: | Height: | Size: 18 KiB |
|
Before Width: | Height: | Size: 275 KiB |
|
Before Width: | Height: | Size: 295 KiB |
|
Before Width: | Height: | Size: 9.8 KiB |
|
Before Width: | Height: | Size: 111 KiB |
|
Before Width: | Height: | Size: 28 KiB |
|
Before Width: | Height: | Size: 107 KiB |
|
Before Width: | Height: | Size: 69 KiB |
|
Before Width: | Height: | Size: 48 KiB |
|
Before Width: | Height: | Size: 68 KiB |
|
Before Width: | Height: | Size: 182 KiB After Width: | Height: | Size: 155 KiB |
@@ -1,26 +0,0 @@
|
|||||||
# LLM模型配置
|
|
||||||
|
|
||||||
### **简介**
|
|
||||||
|
|
||||||
语言模型的使用是超音数的重要一环。能显著增强对用户的问题的理解能力,是通过对话形式与用户交互的基石之一。在本项目中对语言模型能力的应用主要在 LLM 和 Embedding 两方面;默认使用的模型中,LLM选用闭源模型 gpt-3.5-turbo-16k,Embedding模型选用开源模型 GanymedeNil/text2vec-large-chinese。用户可以根据自己实际需求进行配置更改。
|
|
||||||
|
|
||||||
|
|
||||||
### **配置方式**
|
|
||||||
<div align="left" >
|
|
||||||
<img src=../images/nlp_config.png width="70%"/>
|
|
||||||
<p>图1-1 LLM配置文件</p>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
1. LLM模型相关的配置,在 supersonic/chat/core/src/main/python/llm/run_config.py 进行配置。
|
|
||||||
2. LLM采用OpenAI的闭源模型 gpt-3.5-turbo-16k,在使用时需要提供OpenAI的API-Key才能调用LLM模型,通过 OPENAI_API_KEY 变量进行配置。
|
|
||||||
3. Embedding模型采用开源模型 GanymedeNil/text2vec-large-chinese,通过 HF_TEXT2VEC_MODEL_NAME 变量进行位置,为了使用方便采用托管在HuggingFace的源,初次启动时自动下载模型文件。
|
|
||||||
|
|
||||||
### **FAQ**
|
|
||||||
1. 可以用开源的LLM模型替代OpenAI的GPT模型吗?
|
|
||||||
- 暂时不能。我们测试过大部分主流的开源LLM,在实际使用中,在本项目需要LLM提供的逻辑推理和代码生成场景上,开源模型还不能满足需求。
|
|
||||||
- 我们会持续跟进开源LLM的最新进展,在有满足要求的开源LLM后,在项目中集成私有化部署开源LLM的能力。
|
|
||||||
2. GPT4、GPT3.5、GPT3.5-16k 这几个模型用哪个比较好?
|
|
||||||
- GPT3.5、GPT3.5-16k 均能基本满足要求,但会有输出结果不稳定的情况;GPT3.5的token长度限制为4k,在现有CoT策略下,容易出现超过长度限制的情况。
|
|
||||||
- GPT4的输出更稳定,但费用成本远超GPT3.5,可以根据实际使用场景进行选择。
|
|
||||||
3. Embedding模型用其他的可以吗?
|
|
||||||
- 可以。可以以该项目[text2vec]([URL](https://github.com/shibing624/text2vec))的榜单作为参考,然后在HuggingFace找到对应模型的model card,修改HF_TEXT2VEC_MODEL_NAME变量的取值。
|
|
||||||
@@ -1,29 +0,0 @@
|
|||||||
# text2sql功能相关配置
|
|
||||||
|
|
||||||
### **简介**
|
|
||||||
text2sql的功能实现,高度依赖对LLM的应用。通过LLM生成SQL的过程中,利用小样本(few-shots-examples)通过思维链(chain-of-thoughts)的方式对LLM in-context-learning的能力进行引导,对于生成较为稳定且符合下游语法解析规则的SQL非常重要。用户可以根据自身需要,对样本池及样本的数量进行配置,使其更加符合自身业务特点。
|
|
||||||
|
|
||||||
### **配置方式**
|
|
||||||
1. 样本池的配置。
|
|
||||||
- supersonic/chat/core/src/main/python/few_shot_example/sql_exampler.py 为样本池配置文件。用户可以以已有的样本作为参考,配置更贴近自身业务需求的样本,用于更好的引导LLM生成SQL。
|
|
||||||
2. 样本数量的配置。
|
|
||||||
- 在 supersonic/chat/core/src/main/python/run_config.py 中通过 TEXT2DSL_FEW_SHOTS_EXAMPLE_NUM 变量进行配置。
|
|
||||||
- 默认值为15,为项目在内部实践后较优的经验值。样本少太少,对导致LLM在生成SQL的过程中缺少引导和示范,生成的SQL会更不稳定;样本太多,会增加生成SQL需要的时间和LLM的token消耗(或超过LLM的token上限)。
|
|
||||||
3. SQL生成方式的配置
|
|
||||||
- 在 supersonic/chat/core/src/main/python/run_config.py 中通过 TEXT2DSL_IS_SHORTCUT 变量进行配置。
|
|
||||||
- 默认值为False;当为False时,会调用2次LLM生成SQL;当为True时,会只调用1次LLM生成SQL。相较于2次LLM调用生成的SQL,耗时会减少30-40%,token的消耗量会减少30%左右,但生成的SQL正确率会有所下降。
|
|
||||||
<div align="left" >
|
|
||||||
<img src=../images/text2sql_config.png width="70%"/>
|
|
||||||
<p>图1-1 配置文件</p>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
### **运行中更新配置的脚本**
|
|
||||||
1. 如果在启动项目后,用户需要对text2sql功能的相关配置进行调试,可以在修改相关配置文件后,通过以下2种方式让配置在项目运行中让配置生效。
|
|
||||||
- 执行 supersonic-daemon.sh reload llmparser
|
|
||||||
- 执行 python examples_reload_run.py
|
|
||||||
### **FAQ**
|
|
||||||
1. 生成一个SQL需要消耗的的LLM token数量太多了,按照openAI对token的收费标准,生成一个SQL太贵了,可以少用一些token吗?
|
|
||||||
- 可以。 用户可以根据自身需求,如配置方式1.中所示,修改样本池中的样本,选用一些更加简短的样本。如配置方式2.中所示,减少使用的样本数量。配置方式3.中所示,只调用1次LLM生成SQL。
|
|
||||||
- 需要注意,样本和样本数量的选择对生成SQL的质量有很大的影响。过于激进的降低输入的token数量可能会降低生成SQL的质量。需要用户根据自身业务特点实测后进行平衡。
|
|
||||||
|
|
||||||
|
|
||||||
@@ -31,12 +31,9 @@ com.tencent.supersonic.auth.api.authentication.adaptor.UserAdaptor=\
|
|||||||
|
|
||||||
|
|
||||||
com.tencent.supersonic.chat.api.component.SemanticCorrector=\
|
com.tencent.supersonic.chat.api.component.SemanticCorrector=\
|
||||||
com.tencent.supersonic.chat.corrector.DateFieldCorrector, \
|
com.tencent.supersonic.chat.corrector.GlobalCorrector, \
|
||||||
com.tencent.supersonic.chat.corrector.FunctionAliasCorrector, \
|
com.tencent.supersonic.chat.corrector.TableCorrector, \
|
||||||
com.tencent.supersonic.chat.corrector.FieldNameCorrector, \
|
com.tencent.supersonic.chat.corrector.GroupByCorrector, \
|
||||||
com.tencent.supersonic.chat.corrector.FieldCorrector, \
|
com.tencent.supersonic.chat.corrector.SelectCorrector, \
|
||||||
com.tencent.supersonic.chat.corrector.FunctionCorrector, \
|
com.tencent.supersonic.chat.corrector.WhereCorrector, \
|
||||||
com.tencent.supersonic.chat.corrector.TableNameCorrector, \
|
com.tencent.supersonic.chat.corrector.HavingCorrector
|
||||||
com.tencent.supersonic.chat.corrector.QueryFilterAppend, \
|
|
||||||
com.tencent.supersonic.chat.corrector.SelectFieldAppendCorrector, \
|
|
||||||
com.tencent.supersonic.chat.corrector.FieldValueCorrector
|
|
||||||
@@ -0,0 +1,199 @@
|
|||||||
|
package com.tencent.supersonic;
|
||||||
|
|
||||||
|
import com.tencent.supersonic.auth.api.authentication.pojo.User;
|
||||||
|
import com.tencent.supersonic.common.pojo.enums.AggOperatorEnum;
|
||||||
|
import com.tencent.supersonic.semantic.api.model.enums.DimensionTypeEnum;
|
||||||
|
import com.tencent.supersonic.semantic.api.model.enums.IdentifyTypeEnum;
|
||||||
|
import com.tencent.supersonic.semantic.api.model.pojo.Dim;
|
||||||
|
import com.tencent.supersonic.semantic.api.model.pojo.DimensionTimeTypeParams;
|
||||||
|
import com.tencent.supersonic.semantic.api.model.pojo.Identify;
|
||||||
|
import com.tencent.supersonic.semantic.api.model.pojo.Measure;
|
||||||
|
import com.tencent.supersonic.semantic.api.model.request.DatasourceReq;
|
||||||
|
import com.tencent.supersonic.semantic.api.model.request.DomainReq;
|
||||||
|
import com.tencent.supersonic.semantic.api.model.request.ModelReq;
|
||||||
|
import com.tencent.supersonic.semantic.model.domain.DatasourceService;
|
||||||
|
import com.tencent.supersonic.semantic.model.domain.DomainService;
|
||||||
|
import com.tencent.supersonic.semantic.model.domain.ModelService;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.springframework.beans.factory.annotation.Autowired;
|
||||||
|
import org.springframework.beans.factory.annotation.Value;
|
||||||
|
import org.springframework.boot.CommandLineRunner;
|
||||||
|
import org.springframework.core.annotation.Order;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
@Component
|
||||||
|
@Slf4j
|
||||||
|
@Order(2)
|
||||||
|
public class LoadBenchMarkDemo implements CommandLineRunner {
|
||||||
|
|
||||||
|
private User user = User.getFakeUser();
|
||||||
|
|
||||||
|
@Value("${spring.h2.demo.enabled:false}")
|
||||||
|
private boolean demoEnable;
|
||||||
|
|
||||||
|
@Autowired
|
||||||
|
private DomainService domainService;
|
||||||
|
@Autowired
|
||||||
|
private ModelService modelService;
|
||||||
|
@Autowired
|
||||||
|
private DatasourceService datasourceService;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void run(String... args) {
|
||||||
|
if (!demoEnable) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
addDomain();
|
||||||
|
addModel_1();
|
||||||
|
addDatasource_1();
|
||||||
|
addDatasource_2();
|
||||||
|
addDatasource_3();
|
||||||
|
addDatasource_4();
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("Failed to add bench mark demo data", e);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addDomain() {
|
||||||
|
DomainReq domainReq = new DomainReq();
|
||||||
|
domainReq.setName("测评数据-音乐");
|
||||||
|
domainReq.setBizName("music");
|
||||||
|
domainReq.setParentId(0L);
|
||||||
|
domainReq.setViewers(Arrays.asList("admin", "tom", "jack"));
|
||||||
|
domainReq.setViewOrgs(Collections.singletonList("admin"));
|
||||||
|
domainReq.setAdmins(Collections.singletonList("admin"));
|
||||||
|
domainReq.setAdminOrgs(Collections.emptyList());
|
||||||
|
domainService.createDomain(domainReq, user);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addModel_1() {
|
||||||
|
ModelReq modelReq = new ModelReq();
|
||||||
|
modelReq.setName("测评数据-音乐");
|
||||||
|
modelReq.setBizName("music");
|
||||||
|
modelReq.setDomainId(2L);
|
||||||
|
modelReq.setViewers(Arrays.asList("admin", "tom", "jack"));
|
||||||
|
modelReq.setViewOrgs(Collections.singletonList("admin"));
|
||||||
|
modelReq.setAdmins(Collections.singletonList("admin"));
|
||||||
|
modelReq.setAdminOrgs(Collections.emptyList());
|
||||||
|
modelService.createModel(modelReq, user);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addDatasource_1() throws Exception {
|
||||||
|
DatasourceReq datasourceReq = new DatasourceReq();
|
||||||
|
datasourceReq.setModelId(3L);
|
||||||
|
datasourceReq.setName("艺术类型");
|
||||||
|
datasourceReq.setBizName("genre");
|
||||||
|
datasourceReq.setDescription("艺术类型");
|
||||||
|
datasourceReq.setDatabaseId(1L);
|
||||||
|
|
||||||
|
List<Dim> dimensions = new ArrayList<>();
|
||||||
|
Dim dimension1 = new Dim("", "imp_date", DimensionTypeEnum.time.name(), 0);
|
||||||
|
dimension1.setTypeParams(new DimensionTimeTypeParams());
|
||||||
|
dimensions.add(dimension1);
|
||||||
|
dimensions.add(new Dim("活跃区域", "most_popular_in", DimensionTypeEnum.categorical.name(), 1));
|
||||||
|
datasourceReq.setDimensions(dimensions);
|
||||||
|
|
||||||
|
List<Identify> identifiers = new ArrayList<>();
|
||||||
|
identifiers.add(new Identify("音乐类型名称", IdentifyTypeEnum.primary.name(), "g_name"));
|
||||||
|
datasourceReq.setIdentifiers(identifiers);
|
||||||
|
|
||||||
|
List<Measure> measures = new ArrayList<>();
|
||||||
|
Measure measure = new Measure("评分", "rating", AggOperatorEnum.SUM.name(), 0);
|
||||||
|
measures.add(measure);
|
||||||
|
datasourceReq.setMeasures(measures);
|
||||||
|
|
||||||
|
datasourceReq.setQueryType("sql_query");
|
||||||
|
datasourceReq.setSqlQuery("SELECT g_name, rating, most_popular_in FROM genre");
|
||||||
|
datasourceService.createDatasource(datasourceReq, user);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addDatasource_2() throws Exception {
|
||||||
|
DatasourceReq datasourceReq = new DatasourceReq();
|
||||||
|
datasourceReq.setModelId(3L);
|
||||||
|
datasourceReq.setName("艺术家");
|
||||||
|
datasourceReq.setBizName("artist");
|
||||||
|
datasourceReq.setDescription("艺术家");
|
||||||
|
datasourceReq.setDatabaseId(1L);
|
||||||
|
|
||||||
|
List<Dim> dimensions = new ArrayList<>();
|
||||||
|
dimensions.add(new Dim("国籍", "country", DimensionTypeEnum.categorical.name(), 1));
|
||||||
|
dimensions.add(new Dim("性别", "gender", DimensionTypeEnum.categorical.name(), 1));
|
||||||
|
datasourceReq.setDimensions(dimensions);
|
||||||
|
|
||||||
|
List<Identify> identifiers = new ArrayList<>();
|
||||||
|
identifiers.add(new Identify("艺术家名称", IdentifyTypeEnum.primary.name(), "artist_name"));
|
||||||
|
identifiers.add(new Identify("音乐类型名称", IdentifyTypeEnum.foreign.name(), "g_name"));
|
||||||
|
datasourceReq.setIdentifiers(identifiers);
|
||||||
|
|
||||||
|
datasourceReq.setMeasures(Collections.emptyList());
|
||||||
|
|
||||||
|
datasourceReq.setQueryType("sql_query");
|
||||||
|
datasourceReq.setSqlQuery("SELECT artist_name, country, gender, g_name FROM artist");
|
||||||
|
datasourceService.createDatasource(datasourceReq, user);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addDatasource_3() throws Exception {
|
||||||
|
DatasourceReq datasourceReq = new DatasourceReq();
|
||||||
|
datasourceReq.setModelId(3L);
|
||||||
|
datasourceReq.setName("文件");
|
||||||
|
datasourceReq.setBizName("files");
|
||||||
|
datasourceReq.setDescription("文件");
|
||||||
|
datasourceReq.setDatabaseId(1L);
|
||||||
|
|
||||||
|
List<Dim> dimensions = new ArrayList<>();
|
||||||
|
dimensions.add(new Dim("持续时间", "duration", DimensionTypeEnum.categorical.name(), 1));
|
||||||
|
dimensions.add(new Dim("文件格式", "formats", DimensionTypeEnum.categorical.name(), 1));
|
||||||
|
datasourceReq.setDimensions(dimensions);
|
||||||
|
|
||||||
|
List<Identify> identifiers = new ArrayList<>();
|
||||||
|
identifiers.add(new Identify("歌曲ID", IdentifyTypeEnum.primary.name(), "f_id"));
|
||||||
|
identifiers.add(new Identify("艺术家名称", IdentifyTypeEnum.foreign.name(), "artist_name"));
|
||||||
|
datasourceReq.setIdentifiers(identifiers);
|
||||||
|
|
||||||
|
datasourceReq.setMeasures(Collections.emptyList());
|
||||||
|
|
||||||
|
datasourceReq.setQueryType("sql_query");
|
||||||
|
datasourceReq.setSqlQuery("SELECT f_id, artist_name, file_size, duration, formats FROM files");
|
||||||
|
datasourceService.createDatasource(datasourceReq, user);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addDatasource_4() throws Exception {
|
||||||
|
DatasourceReq datasourceReq = new DatasourceReq();
|
||||||
|
datasourceReq.setModelId(3L);
|
||||||
|
datasourceReq.setName("歌曲");
|
||||||
|
datasourceReq.setBizName("song");
|
||||||
|
datasourceReq.setDescription("歌曲");
|
||||||
|
datasourceReq.setDatabaseId(1L);
|
||||||
|
|
||||||
|
List<Dim> dimensions = new ArrayList<>();
|
||||||
|
Dim dimension1 = new Dim("", "imp_date", DimensionTypeEnum.time.name(), 0);
|
||||||
|
dimension1.setTypeParams(new DimensionTimeTypeParams());
|
||||||
|
dimensions.add(dimension1);
|
||||||
|
dimensions.add(new Dim("国家", "country", DimensionTypeEnum.categorical.name(), 1));
|
||||||
|
dimensions.add(new Dim("语种", "languages", DimensionTypeEnum.categorical.name(), 1));
|
||||||
|
dimensions.add(new Dim("发行时间", "releasedate", DimensionTypeEnum.categorical.name(), 1));
|
||||||
|
datasourceReq.setDimensions(dimensions);
|
||||||
|
|
||||||
|
List<Identify> identifiers = new ArrayList<>();
|
||||||
|
identifiers.add(new Identify("歌曲名称", IdentifyTypeEnum.primary.name(), "song_name"));
|
||||||
|
identifiers.add(new Identify("歌曲ID", IdentifyTypeEnum.foreign.name(), "f_id"));
|
||||||
|
datasourceReq.setIdentifiers(identifiers);
|
||||||
|
|
||||||
|
List<Measure> measures = new ArrayList<>();
|
||||||
|
measures.add(new Measure("分辨率", "resolution", AggOperatorEnum.SUM.name(), 1));
|
||||||
|
measures.add(new Measure("评分", "rating", AggOperatorEnum.SUM.name(), 1));
|
||||||
|
datasourceReq.setMeasures(measures);
|
||||||
|
|
||||||
|
datasourceReq.setQueryType("sql_query");
|
||||||
|
datasourceReq.setSqlQuery("SELECT imp_date, song_name, artist_name, country, f_id, g_name, "
|
||||||
|
+ " rating, languages, releasedate, resolution FROM song");
|
||||||
|
datasourceService.createDatasource(datasourceReq, user);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@@ -0,0 +1,334 @@
|
|||||||
|
package com.tencent.supersonic;
|
||||||
|
|
||||||
|
import com.google.common.collect.Lists;
|
||||||
|
import com.tencent.supersonic.auth.api.authentication.pojo.User;
|
||||||
|
import com.tencent.supersonic.auth.api.authorization.pojo.AuthGroup;
|
||||||
|
import com.tencent.supersonic.auth.api.authorization.pojo.AuthRule;
|
||||||
|
import com.tencent.supersonic.auth.api.authorization.service.AuthService;
|
||||||
|
import com.tencent.supersonic.common.pojo.enums.AggOperatorEnum;
|
||||||
|
import com.tencent.supersonic.common.pojo.enums.AggregateTypeEnum;
|
||||||
|
import com.tencent.supersonic.common.pojo.enums.SensitiveLevelEnum;
|
||||||
|
import com.tencent.supersonic.semantic.api.model.enums.DimensionTypeEnum;
|
||||||
|
import com.tencent.supersonic.semantic.api.model.enums.IdentifyTypeEnum;
|
||||||
|
import com.tencent.supersonic.semantic.api.model.enums.SemanticTypeEnum;
|
||||||
|
import com.tencent.supersonic.semantic.api.model.pojo.Dim;
|
||||||
|
import com.tencent.supersonic.semantic.api.model.pojo.DimensionTimeTypeParams;
|
||||||
|
import com.tencent.supersonic.semantic.api.model.pojo.Entity;
|
||||||
|
import com.tencent.supersonic.semantic.api.model.pojo.Identify;
|
||||||
|
import com.tencent.supersonic.semantic.api.model.pojo.Measure;
|
||||||
|
import com.tencent.supersonic.semantic.api.model.pojo.MetricTypeParams;
|
||||||
|
import com.tencent.supersonic.semantic.api.model.request.DatabaseReq;
|
||||||
|
import com.tencent.supersonic.semantic.api.model.request.DatasourceReq;
|
||||||
|
import com.tencent.supersonic.semantic.api.model.request.DimensionReq;
|
||||||
|
import com.tencent.supersonic.semantic.api.model.request.DomainReq;
|
||||||
|
import com.tencent.supersonic.semantic.api.model.request.MetricReq;
|
||||||
|
import com.tencent.supersonic.semantic.api.model.request.ModelReq;
|
||||||
|
import com.tencent.supersonic.semantic.model.domain.DatabaseService;
|
||||||
|
import com.tencent.supersonic.semantic.model.domain.DatasourceService;
|
||||||
|
import com.tencent.supersonic.semantic.model.domain.DimensionService;
|
||||||
|
import com.tencent.supersonic.semantic.model.domain.DomainService;
|
||||||
|
import com.tencent.supersonic.semantic.model.domain.MetricService;
|
||||||
|
import com.tencent.supersonic.semantic.model.domain.ModelService;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.springframework.beans.factory.annotation.Autowired;
|
||||||
|
import org.springframework.beans.factory.annotation.Value;
|
||||||
|
import org.springframework.boot.CommandLineRunner;
|
||||||
|
import org.springframework.core.annotation.Order;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
@Component
|
||||||
|
@Slf4j
|
||||||
|
@Order(1)
|
||||||
|
public class LoadModelDataDemo implements CommandLineRunner {
|
||||||
|
|
||||||
|
private User user = User.getFakeUser();
|
||||||
|
|
||||||
|
@Value("${spring.h2.demo.enabled:false}")
|
||||||
|
private boolean demoEnable;
|
||||||
|
|
||||||
|
@Autowired
|
||||||
|
private DatabaseService databaseService;
|
||||||
|
@Autowired
|
||||||
|
private DomainService domainService;
|
||||||
|
@Autowired
|
||||||
|
private ModelService modelService;
|
||||||
|
@Autowired
|
||||||
|
private DatasourceService datasourceService;
|
||||||
|
@Autowired
|
||||||
|
private DimensionService dimensionService;
|
||||||
|
@Autowired
|
||||||
|
private MetricService metricService;
|
||||||
|
@Autowired
|
||||||
|
private AuthService authService;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void run(String... args) {
|
||||||
|
if (!demoEnable) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
addDatabase();
|
||||||
|
addDomain();
|
||||||
|
addModel_1();
|
||||||
|
addDatasource_1();
|
||||||
|
addDatasource_2();
|
||||||
|
addDatasource_3();
|
||||||
|
addModel_2();
|
||||||
|
addDatasource_4();
|
||||||
|
updateDimension();
|
||||||
|
updateMetric();
|
||||||
|
addAuthGroup_1();
|
||||||
|
addAuthGroup_2();
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("Failed to add model demo data", e);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addDatabase() {
|
||||||
|
DatabaseReq databaseReq = new DatabaseReq();
|
||||||
|
databaseReq.setName("H2数据实例");
|
||||||
|
databaseReq.setDescription("样例数据库实例");
|
||||||
|
databaseReq.setType("h2");
|
||||||
|
databaseReq.setUrl("jdbc:h2:mem:semantic;DATABASE_TO_UPPER=false");
|
||||||
|
databaseReq.setUsername("root");
|
||||||
|
databaseReq.setPassword("semantic");
|
||||||
|
databaseService.createOrUpdateDatabase(databaseReq, user);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addDomain() {
|
||||||
|
DomainReq domainReq = new DomainReq();
|
||||||
|
domainReq.setName("超音数");
|
||||||
|
domainReq.setBizName("supersonic");
|
||||||
|
domainReq.setParentId(0L);
|
||||||
|
domainReq.setViewers(Arrays.asList("admin", "tom", "jack"));
|
||||||
|
domainReq.setViewOrgs(Collections.singletonList("admin"));
|
||||||
|
domainReq.setAdmins(Collections.singletonList("admin"));
|
||||||
|
domainReq.setAdminOrgs(Collections.emptyList());
|
||||||
|
domainService.createDomain(domainReq, user);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addModel_1() {
|
||||||
|
ModelReq modelReq = new ModelReq();
|
||||||
|
modelReq.setName("超音数");
|
||||||
|
modelReq.setBizName("supersonic");
|
||||||
|
modelReq.setDomainId(1L);
|
||||||
|
modelReq.setViewers(Arrays.asList("admin", "tom", "jack"));
|
||||||
|
modelReq.setViewOrgs(Collections.singletonList("admin"));
|
||||||
|
modelReq.setAdmins(Collections.singletonList("admin"));
|
||||||
|
modelReq.setAdminOrgs(Collections.emptyList());
|
||||||
|
modelService.createModel(modelReq, user);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addDatasource_1() throws Exception {
|
||||||
|
DatasourceReq datasourceReq = new DatasourceReq();
|
||||||
|
datasourceReq.setName("用户部门");
|
||||||
|
datasourceReq.setBizName("user_department");
|
||||||
|
datasourceReq.setDescription("用户部门");
|
||||||
|
datasourceReq.setDatabaseId(1L);
|
||||||
|
|
||||||
|
List<Identify> identifiers = new ArrayList<>();
|
||||||
|
identifiers.add(new Identify("用户名", IdentifyTypeEnum.primary.name(), "user_name"));
|
||||||
|
datasourceReq.setIdentifiers(identifiers);
|
||||||
|
|
||||||
|
List<Dim> dimensions = new ArrayList<>();
|
||||||
|
dimensions.add(new Dim("部门", "department",
|
||||||
|
DimensionTypeEnum.categorical.name(), 1));
|
||||||
|
datasourceReq.setDimensions(dimensions);
|
||||||
|
|
||||||
|
datasourceReq.setMeasures(Collections.emptyList());
|
||||||
|
datasourceReq.setQueryType("table_query");
|
||||||
|
datasourceReq.setTableQuery("PUBLIC.s2_user_department");
|
||||||
|
datasourceReq.setModelId(1L);
|
||||||
|
datasourceService.createDatasource(datasourceReq, user);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addDatasource_2() throws Exception {
|
||||||
|
DatasourceReq datasourceReq = new DatasourceReq();
|
||||||
|
datasourceReq.setName("PVUV统计");
|
||||||
|
datasourceReq.setBizName("s2_pv_uv_statis");
|
||||||
|
datasourceReq.setDescription("PVUV统计");
|
||||||
|
datasourceReq.setDatabaseId(1L);
|
||||||
|
|
||||||
|
List<Identify> identifiers = new ArrayList<>();
|
||||||
|
identifiers.add(new Identify("用户名", IdentifyTypeEnum.primary.name(), "user_name"));
|
||||||
|
datasourceReq.setIdentifiers(identifiers);
|
||||||
|
|
||||||
|
List<Dim> dimensions = new ArrayList<>();
|
||||||
|
Dim dimension1 = new Dim("", "imp_date", DimensionTypeEnum.time.name(), 0);
|
||||||
|
dimension1.setTypeParams(new DimensionTimeTypeParams());
|
||||||
|
dimensions.add(dimension1);
|
||||||
|
Dim dimension2 = new Dim("", "page", DimensionTypeEnum.categorical.name(), 0);
|
||||||
|
dimensions.add(dimension2);
|
||||||
|
datasourceReq.setDimensions(dimensions);
|
||||||
|
|
||||||
|
List<Measure> measures = new ArrayList<>();
|
||||||
|
Measure measure1 = new Measure("访问次数", "pv", AggOperatorEnum.SUM.name(), 1);
|
||||||
|
measures.add(measure1);
|
||||||
|
|
||||||
|
Measure measure2 = new Measure("访问人数", "uv", AggOperatorEnum.COUNT_DISTINCT.name(), 1);
|
||||||
|
measures.add(measure2);
|
||||||
|
|
||||||
|
datasourceReq.setMeasures(measures);
|
||||||
|
datasourceReq.setSqlQuery("SELECT imp_date, user_name, page, 1 as pv, user_name as uv FROM s2_pv_uv_statis");
|
||||||
|
datasourceReq.setQueryType("sql_query");
|
||||||
|
datasourceReq.setModelId(1L);
|
||||||
|
datasourceService.createDatasource(datasourceReq, user);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addDatasource_3() throws Exception {
|
||||||
|
DatasourceReq datasourceReq = new DatasourceReq();
|
||||||
|
datasourceReq.setName("停留时长统计");
|
||||||
|
datasourceReq.setBizName("s2_stay_time_statis");
|
||||||
|
datasourceReq.setDescription("停留时长统计");
|
||||||
|
datasourceReq.setDatabaseId(1L);
|
||||||
|
|
||||||
|
List<Identify> identifiers = new ArrayList<>();
|
||||||
|
identifiers.add(new Identify("用户名", IdentifyTypeEnum.primary.name(), "user_name"));
|
||||||
|
datasourceReq.setIdentifiers(identifiers);
|
||||||
|
|
||||||
|
List<Dim> dimensions = new ArrayList<>();
|
||||||
|
Dim dimension1 = new Dim("", "imp_date", DimensionTypeEnum.time.name(), 0);
|
||||||
|
dimension1.setTypeParams(new DimensionTimeTypeParams());
|
||||||
|
dimensions.add(dimension1);
|
||||||
|
Dim dimension2 = new Dim("页面", "page", DimensionTypeEnum.categorical.name(), 1);
|
||||||
|
dimensions.add(dimension2);
|
||||||
|
datasourceReq.setDimensions(dimensions);
|
||||||
|
|
||||||
|
List<Measure> measures = new ArrayList<>();
|
||||||
|
Measure measure1 = new Measure("停留时长", "stay_hours", AggregateTypeEnum.SUM.name(), 1);
|
||||||
|
measures.add(measure1);
|
||||||
|
|
||||||
|
datasourceReq.setMeasures(measures);
|
||||||
|
datasourceReq.setTableQuery("PUBLIC.s2_stay_time_statis");
|
||||||
|
datasourceReq.setQueryType("table_query");
|
||||||
|
datasourceReq.setModelId(1L);
|
||||||
|
datasourceService.createDatasource(datasourceReq, user);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addModel_2() {
|
||||||
|
ModelReq modelReq = new ModelReq();
|
||||||
|
modelReq.setName("艺人库");
|
||||||
|
modelReq.setBizName("singer");
|
||||||
|
modelReq.setDomainId(1L);
|
||||||
|
modelReq.setViewers(Arrays.asList("admin", "tom", "jack"));
|
||||||
|
modelReq.setViewOrgs(Collections.singletonList("admin"));
|
||||||
|
modelReq.setAdmins(Collections.singletonList("admin"));
|
||||||
|
modelReq.setAdminOrgs(Collections.emptyList());
|
||||||
|
modelReq.setEntity(new Entity(7L, Arrays.asList("歌手", "艺人")));
|
||||||
|
modelService.createModel(modelReq, user);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addDatasource_4() throws Exception {
|
||||||
|
DatasourceReq datasourceReq = new DatasourceReq();
|
||||||
|
datasourceReq.setName("艺人库");
|
||||||
|
datasourceReq.setBizName("singer");
|
||||||
|
datasourceReq.setDescription("艺人库");
|
||||||
|
datasourceReq.setDatabaseId(1L);
|
||||||
|
|
||||||
|
List<Identify> identifiers = new ArrayList<>();
|
||||||
|
identifiers.add(new Identify("歌手名", IdentifyTypeEnum.primary.name(), "singer_name"));
|
||||||
|
datasourceReq.setIdentifiers(identifiers);
|
||||||
|
|
||||||
|
List<Dim> dimensions = new ArrayList<>();
|
||||||
|
Dim dimension1 = new Dim("", "imp_date", DimensionTypeEnum.time.name(), 0);
|
||||||
|
dimension1.setTypeParams(new DimensionTimeTypeParams());
|
||||||
|
dimensions.add(dimension1);
|
||||||
|
dimensions.add(new Dim("活跃区域", "act_area",
|
||||||
|
DimensionTypeEnum.categorical.name(), 1));
|
||||||
|
dimensions.add(new Dim("代表作", "song_name",
|
||||||
|
DimensionTypeEnum.categorical.name(), 1));
|
||||||
|
dimensions.add(new Dim("风格", "genre",
|
||||||
|
DimensionTypeEnum.categorical.name(), 1));
|
||||||
|
datasourceReq.setDimensions(dimensions);
|
||||||
|
|
||||||
|
Measure measure1 = new Measure("播放量", "js_play_cnt", "sum", 1);
|
||||||
|
Measure measure2 = new Measure("下载量", "down_cnt", "sum", 1);
|
||||||
|
Measure measure3 = new Measure("收藏量", "favor_cnt", "sum", 1);
|
||||||
|
datasourceReq.setMeasures(Lists.newArrayList(measure1, measure2, measure3));
|
||||||
|
datasourceReq.setQueryType("table_query");
|
||||||
|
datasourceReq.setTableQuery("PUBLIC.singer");
|
||||||
|
datasourceReq.setModelId(2L);
|
||||||
|
datasourceService.createDatasource(datasourceReq, user);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void updateDimension() throws Exception {
|
||||||
|
DimensionReq dimensionReq = new DimensionReq();
|
||||||
|
dimensionReq.setModelId(1L);
|
||||||
|
dimensionReq.setType(DimensionTypeEnum.categorical.name());
|
||||||
|
dimensionReq.setId(3L);
|
||||||
|
dimensionReq.setName("页面");
|
||||||
|
dimensionReq.setBizName("page");
|
||||||
|
dimensionReq.setDatasourceId(3L);
|
||||||
|
dimensionReq.setAlias("page");
|
||||||
|
dimensionReq.setSemanticType(SemanticTypeEnum.CATEGORY.name());
|
||||||
|
dimensionReq.setSensitiveLevel(2);
|
||||||
|
dimensionReq.setDescription("页面");
|
||||||
|
dimensionReq.setExpr("page");
|
||||||
|
dimensionReq.setDimValueMaps(Collections.emptyList());
|
||||||
|
dimensionService.updateDimension(dimensionReq, user);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void updateMetric() throws Exception {
|
||||||
|
MetricReq metricReq = new MetricReq();
|
||||||
|
metricReq.setModelId(1L);
|
||||||
|
metricReq.setId(3L);
|
||||||
|
metricReq.setName("停留时长");
|
||||||
|
metricReq.setBizName("stay_hours");
|
||||||
|
metricReq.setSensitiveLevel(SensitiveLevelEnum.HIGH.getCode());
|
||||||
|
metricReq.setDescription("停留时长");
|
||||||
|
metricReq.setTags(Collections.singletonList("核心指标"));
|
||||||
|
metricReq.setAlias("访问时长");
|
||||||
|
MetricTypeParams metricTypeParams = new MetricTypeParams();
|
||||||
|
metricTypeParams.setExpr("s2_stay_time_statis_stay_hours");
|
||||||
|
List<Measure> measures = new ArrayList<>();
|
||||||
|
Measure measure = new Measure("停留时长",
|
||||||
|
"s2_stay_time_statis_stay_hours", AggOperatorEnum.SUM.getOperator(), 1);
|
||||||
|
measure.setDatasourceId(3L);
|
||||||
|
measures.add(measure);
|
||||||
|
metricTypeParams.setMeasures(measures);
|
||||||
|
metricReq.setTypeParams(metricTypeParams);
|
||||||
|
metricService.updateExprMetric(metricReq, user);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addAuthGroup_1() {
|
||||||
|
AuthGroup authGroupReq = new AuthGroup();
|
||||||
|
authGroupReq.setModelId("1");
|
||||||
|
authGroupReq.setName("admin-permission");
|
||||||
|
|
||||||
|
List<AuthRule> authRules = new ArrayList<>();
|
||||||
|
AuthRule authRule = new AuthRule();
|
||||||
|
authRule.setMetrics(Collections.singletonList("stay_hours"));
|
||||||
|
authRule.setDimensions(Collections.singletonList("page"));
|
||||||
|
authRules.add(authRule);
|
||||||
|
|
||||||
|
authGroupReq.setAuthRules(authRules);
|
||||||
|
authGroupReq.setAuthorizedUsers(Collections.singletonList("jack"));
|
||||||
|
authGroupReq.setAuthorizedDepartmentIds(Collections.emptyList());
|
||||||
|
authService.addOrUpdateAuthGroup(authGroupReq);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addAuthGroup_2() {
|
||||||
|
AuthGroup authGroupReq = new AuthGroup();
|
||||||
|
authGroupReq.setModelId("1");
|
||||||
|
authGroupReq.setName("tom_sales_permission");
|
||||||
|
|
||||||
|
List<AuthRule> authRules = new ArrayList<>();
|
||||||
|
AuthRule authRule = new AuthRule();
|
||||||
|
authRule.setMetrics(Collections.singletonList("stay_hours"));
|
||||||
|
authRule.setDimensions(Collections.singletonList("page"));
|
||||||
|
authRules.add(authRule);
|
||||||
|
|
||||||
|
authGroupReq.setAuthRules(authRules);
|
||||||
|
authGroupReq.setDimensionFilters(Collections.singletonList("department in ('sales')"));
|
||||||
|
authGroupReq.setDimensionFilterDescription("部门 in [sales]");
|
||||||
|
authGroupReq.setAuthorizedUsers(Collections.singletonList("tom"));
|
||||||
|
authGroupReq.setAuthorizedDepartmentIds(Collections.emptyList());
|
||||||
|
authService.addOrUpdateAuthGroup(authGroupReq);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@@ -31,12 +31,9 @@ com.tencent.supersonic.auth.api.authentication.adaptor.UserAdaptor=\
|
|||||||
com.tencent.supersonic.auth.authentication.adaptor.DefaultUserAdaptor
|
com.tencent.supersonic.auth.authentication.adaptor.DefaultUserAdaptor
|
||||||
|
|
||||||
com.tencent.supersonic.chat.api.component.SemanticCorrector=\
|
com.tencent.supersonic.chat.api.component.SemanticCorrector=\
|
||||||
com.tencent.supersonic.chat.corrector.DateFieldCorrector, \
|
com.tencent.supersonic.chat.corrector.GlobalCorrector, \
|
||||||
com.tencent.supersonic.chat.corrector.FunctionAliasCorrector, \
|
com.tencent.supersonic.chat.corrector.TableCorrector, \
|
||||||
com.tencent.supersonic.chat.corrector.FieldNameCorrector, \
|
com.tencent.supersonic.chat.corrector.GroupByCorrector, \
|
||||||
com.tencent.supersonic.chat.corrector.FieldCorrector, \
|
com.tencent.supersonic.chat.corrector.SelectCorrector, \
|
||||||
com.tencent.supersonic.chat.corrector.FunctionCorrector, \
|
com.tencent.supersonic.chat.corrector.WhereCorrector, \
|
||||||
com.tencent.supersonic.chat.corrector.TableNameCorrector, \
|
com.tencent.supersonic.chat.corrector.HavingCorrector
|
||||||
com.tencent.supersonic.chat.corrector.QueryFilterAppend, \
|
|
||||||
com.tencent.supersonic.chat.corrector.SelectFieldAppendCorrector, \
|
|
||||||
com.tencent.supersonic.chat.corrector.FieldValueCorrector
|
|
||||||
|
|||||||
@@ -0,0 +1,31 @@
|
|||||||
|
孟加拉国 _3_8 9000
|
||||||
|
锡尔赫特、吉大港、库斯蒂亚 _3_8 9000
|
||||||
|
加拿大 _3_8 9000
|
||||||
|
美国 _3_8 9000
|
||||||
|
tagore _3_9 9000
|
||||||
|
nazrul _3_9 9000
|
||||||
|
民间 _3_9 9000
|
||||||
|
现代 _3_9 9000
|
||||||
|
蓝调 _3_9 9000
|
||||||
|
流行 _3_9 9000
|
||||||
|
孟加拉国 _3_10 9000
|
||||||
|
印度 _3_10 9000
|
||||||
|
美国 _3_10 9000
|
||||||
|
英国 _3_10 9000
|
||||||
|
男性 _3_11 9000
|
||||||
|
女性 _3_11 9000
|
||||||
|
Shrikanta _3_12 9000
|
||||||
|
Prity _3_12 9000
|
||||||
|
Farida _3_12 9000
|
||||||
|
Topu _3_12 9000
|
||||||
|
Enrique _3_12 9000
|
||||||
|
Michel _3_12 9000
|
||||||
|
mp4 _3_14 9000
|
||||||
|
mp3 _3_14 9000
|
||||||
|
孟加拉语 _3_16 9000
|
||||||
|
英文 _3_16 9000
|
||||||
|
Tumi#长袍#尼罗布 _3_18 9000
|
||||||
|
舒克诺#帕塔尔#努普尔#帕埃 _3_18 9000
|
||||||
|
阿米·奥帕尔·霍伊 _3_18 9000
|
||||||
|
我的爱 _3_18 9000
|
||||||
|
打败它 _3_18 9000
|
||||||
@@ -5,32 +5,6 @@ insert into s2_user (id, `name`, password, display_name, email) values (3, 'tom'
|
|||||||
insert into s2_user (id, `name`, password, display_name, email, is_admin) values (4, 'lucy','123456','lucy','lucy@xx.com', 1);
|
insert into s2_user (id, `name`, password, display_name, email, is_admin) values (4, 'lucy','123456','lucy','lucy@xx.com', 1);
|
||||||
insert into s2_user (id, `name`, password, display_name, email) values (5, 'alice','123456','alice','alice@xx.com');
|
insert into s2_user (id, `name`, password, display_name, email) values (5, 'alice','123456','alice','alice@xx.com');
|
||||||
|
|
||||||
-- sample models
|
|
||||||
insert into s2_domain (id, `name`, biz_name, parent_id, status, created_at, created_by, updated_at, updated_by, `admin`, admin_org, viewer, view_org) VALUES(1, '超音数', 'supersonic', 0, 1, '2023-05-24 00:00:00', 'admin', '2023-05-24 00:00:00', 'admin', 'admin', '', 'admin,tom,jack', 'admin' );
|
|
||||||
insert into s2_model (id, `name`, biz_name, domain_id, created_at, created_by, updated_at, updated_by, `admin`, admin_org, is_open, viewer, view_org, entity) VALUES(1, '超音数', 'supersonic', 1, '2023-05-24 00:00:00', 'admin', '2023-05-24 00:00:00', 'admin', 'admin', '', 0, 'admin,tom,jack', 'admin','' );
|
|
||||||
insert into s2_model (id, `name`, biz_name, domain_id, created_at, created_by, updated_at, updated_by, `admin`, admin_org, is_open, viewer, view_org, entity) VALUES(2, '艺人库', 'singer', 1, '2023-05-24 00:00:00', 'admin', '2023-05-24 00:00:00', 'admin', 'admin', '', 0, 'admin,tom,jack', 'admin','{"entityId": 7, "names": ["歌手", "艺人"]}' );
|
|
||||||
insert into s2_database (id, `name`, description, `type` ,config ,created_at ,created_by ,updated_at ,updated_by, `admin`) VALUES(1, 'H2数据实例', '', 'h2', '{"password":"semantic","url":"jdbc:h2:mem:semantic;DATABASE_TO_UPPER=false","userName":"root"}', '2023-05-24 00:00:00', 'admin', '2023-05-24 00:00:00', 'admin', 'admin');
|
|
||||||
insert into s2_datasource (id , model_id, `name`, biz_name, description, database_id ,datasource_detail, created_at, created_by, updated_at, updated_by ) VALUES(1, 1, '停留时长统计', 's2_stay_time_statis', '停留时长统计', 1, '{"dimensions":[{"bizName":"imp_date","dateFormat":"yyyy-MM-dd","expr":"imp_date","isCreateDimension":0,"type":"time","typeParams":{"isPrimary":"true","timeGranularity":"day"}},{"bizName":"page","dateFormat":"yyyy-MM-dd","expr":"page","isCreateDimension":0,"type":"categorical"}],"identifiers":[{"bizName":"user_name","name":"用户名","type":"primary"}],"measures":[{"agg":"sum","bizName":"s2_stay_time_statis_stay_hours","expr":"stay_hours","isCreateMetric":1,"name":"停留时长"}],"queryType":"sql_query","sqlQuery":"SELECT imp_date, page,user_name,stay_hours FROM s2_stay_time_statis"}', '2023-05-25 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin');
|
|
||||||
insert into s2_datasource (id , model_id, `name`, biz_name, description, database_id ,datasource_detail, created_at, created_by, updated_at, updated_by ) VALUES(2, 1, 'PVUV统计', 's2_pv_uv_statis', 'PVUV统计', 1, '{"dimensions":[{"bizName":"imp_date","dateFormat":"yyyy-MM-dd","expr":"imp_date","isCreateDimension":0,"type":"time","typeParams":{"isPrimary":"true","timeGranularity":"day"}},{"bizName":"page","dateFormat":"yyyy-MM-dd","expr":"page","isCreateDimension":0,"type":"categorical"}],"identifiers":[{"bizName":"user_name","name":"用户名","type":"primary"}],"measures":[{"agg":"sum","bizName":"s2_pv_uv_statis_pv","expr":"pv","isCreateMetric":1,"name":"访问次数"},{"agg":"count_distinct","bizName":"s2_pv_uv_statis_uv","expr":"uv","isCreateMetric":1,"name":"访问人数"}],"queryType":"sql_query","sqlQuery":"SELECT imp_date, user_name,page,1 as pv, user_name as uv FROM s2_pv_uv_statis"}', '2023-05-25 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin');
|
|
||||||
insert into s2_datasource (id , model_id, `name`, biz_name, description, database_id ,datasource_detail, created_at, created_by, updated_at, updated_by ) VALUES(3, 1, '用户部门', 'user_department', '用户部门', 1, '{"dimensions":[{"bizName":"department","dateFormat":"yyyy-MM-dd","expr":"department","isCreateDimension":1,"name":"部门","type":"categorical"}],"identifiers":[{"bizName":"user_name","name":"用户名","type":"primary"}],"measures":[],"queryType":"sql_query","sqlQuery":"SELECT user_name,department FROM s2_user_department"}', '2023-05-25 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin');
|
|
||||||
insert into s2_datasource (id , model_id, `name`, biz_name, description, database_id ,datasource_detail, created_at, created_by, updated_at, updated_by ) VALUES(4, 2, '艺人库', 'singer', '艺人库', 1, '{"dimensions":[{"bizName":"imp_date","dateFormat":"yyyy-MM-dd","expr":"imp_date","isCreateDimension":0,"type":"time","typeParams":{"isPrimary":"true","timeGranularity":"day"}},{"bizName":"act_area","dateFormat":"yyyy-MM-dd","expr":"act_area","isCreateDimension":1,"name":"活跃区域","type":"categorical"},{"bizName":"song_name","dateFormat":"yyyy-MM-dd","expr":"song_name","isCreateDimension":1,"name":"代表作","type":"categorical"},{"bizName":"genre","dateFormat":"yyyy-MM-dd","expr":"genre","isCreateDimension":1,"name":"风格","type":"categorical"}],"identifiers":[{"bizName":"singer_name","name":"歌手名","type":"primary"}],"measures":[{"agg":"sum","bizName":"music_down_cnt","expr":"down_cnt","isCreateMetric":1,"name":"下载量"},{"agg":"sum","bizName":"music_js_play_cnt","expr":"js_play_cnt","isCreateMetric":1,"name":"播放量"},{"agg":"sum","bizName":"music_favor_cnt","expr":"favor_cnt","isCreateMetric":1,"name":"收藏量"}],"queryType":"sql_query","sqlQuery":"SELECT imp_date,singer_name,act_area,song_name,genre,js_play_cnt,down_cnt,favor_cnt FROM singer "}', '2023-05-25 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin');
|
|
||||||
insert into s2_datasource_rela (id , model_id, `datasource_from`, datasource_to, join_key, created_at, created_by, updated_at, updated_by ) VALUES(1, 1, 1, 2, 'user_name', '2023-05-25 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin');
|
|
||||||
insert into s2_datasource_rela (id , model_id, `datasource_from`, datasource_to, join_key, created_at, created_by, updated_at, updated_by ) VALUES(2, 1, 1, 3, 'user_name', '2023-05-25 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin');
|
|
||||||
insert into s2_datasource_rela (id , model_id, `datasource_from`, datasource_to, join_key, created_at, created_by, updated_at, updated_by ) VALUES(3, 1, 2, 3, 'user_name', '2023-05-25 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin');
|
|
||||||
insert into s2_dimension (id , model_id, datasource_id, `name`, biz_name, description, status, sensitive_level, `type`, type_params, expr, created_at, created_by, updated_at, updated_by, semantic_type, dim_value_maps) VALUES(1, 1, 3, '部门', 'department', '部门', 1, 0, 'categorical', NULL, 'department', '2023-05-24 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin', 'CATEGORY', '[{"alias":["人力资源","人力"],"bizName":"人力资源","techName":"HR"},{"alias":["营销","销售"],"bizName":"营销部门","techName":"sales"}]');
|
|
||||||
insert into s2_dimension (id , model_id, datasource_id, `name`, biz_name, description, status, sensitive_level, `type`, type_params, expr, created_at, created_by, updated_at, updated_by, semantic_type) VALUES(2, 1, 1, '用户名', 'user_name', '用户名', 1, 0, 'primary', NULL, 'user_name', '2023-05-24 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin', 'CATEGORY');
|
|
||||||
insert into s2_dimension (id , model_id, datasource_id, `name`, biz_name, description, status, sensitive_level, `type`, type_params, expr, created_at, created_by, updated_at, updated_by, semantic_type) VALUES(3, 1, 2, '页面', 'page', '页面', 1, 2, 'categorical', NULL, 'page', '2023-05-24 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin', 'CATEGORY');
|
|
||||||
insert into s2_dimension (id , model_id, datasource_id, `name`, biz_name, description, status, sensitive_level, `type`, type_params, expr, created_at, created_by, updated_at, updated_by, semantic_type) VALUES(4, 2, 4, '活跃区域', 'act_area', '活跃区域', 1, 2, 'categorical', NULL, 'act_area', '2023-05-24 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin', 'CATEGORY');
|
|
||||||
insert into s2_dimension (id , model_id, datasource_id, `name`, biz_name, description, status, sensitive_level, `type`, type_params, expr, created_at, created_by, updated_at, updated_by, semantic_type) VALUES(5, 2, 4, '代表作', 'song_name', '代表作', 1, 2, 'categorical', NULL, 'song_name', '2023-05-24 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin', 'CATEGORY');
|
|
||||||
insert into s2_dimension (id , model_id, datasource_id, `name`, biz_name, description, status, sensitive_level, `type`, type_params, expr, created_at, created_by, updated_at, updated_by, semantic_type) VALUES(6, 2, 4, '风格', 'genre', '风格', 1, 2, 'categorical', NULL, 'genre', '2023-05-24 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin', 'CATEGORY');
|
|
||||||
insert into s2_dimension (id , model_id, datasource_id, `name`, biz_name, description, status, sensitive_level, `type`, type_params, expr, created_at, created_by, updated_at, updated_by, semantic_type) VALUES(7, 2, 4, '歌手名', 'singer_name', '歌手名', 1, 2, 'categorical', NULL, 'singer_name', '2023-05-24 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin', 'CATEGORY');
|
|
||||||
insert into s2_metric (id, model_id, `name`, biz_name, description, status, sensitive_level, `type`, type_params, created_at, created_by, updated_at, updated_by, data_format_type, data_format) VALUES(1, 1, '停留时长', 'stay_hours', '停留时长', 1, 2, 'ATOMIC', '{"expr":"s2_stay_time_statis_stay_hours","measures":[{"agg":"sum","expr":"stay_hours","isCreateMetric":1,"datasourceId":1,"bizName":"s2_stay_time_statis_stay_hours","name":"s2_stay_time_statis_stay_hours"}]}' , '2023-05-24 17:00:00', 'admin', '2023-05-25 00:00:00', 'admin', NULL, NULL );
|
|
||||||
insert into s2_metric (id, model_id, `name`, biz_name, description, status, sensitive_level, `type`, type_params, created_at, created_by, updated_at, updated_by, data_format_type, data_format) VALUES(2, 1, '访问次数', 'pv', '访问次数', 1, 0, 'ATOMIC', ' {"expr":"s2_pv_uv_statis_pv","measures":[{"agg":"sum","bizName":"s2_pv_uv_statis_pv","datasourceId":2,"expr":"pv","isCreateMetric":1,"name":"s2_pv_uv_statis_pv"}]}' , '2023-05-24 17:00:00', 'admin', '2023-05-25 00:00:00', 'admin', NULL, NULL );
|
|
||||||
insert into s2_metric (id, model_id, `name`, biz_name, description, status, sensitive_level, `type`, type_params, created_at, created_by, updated_at, updated_by, data_format_type, data_format) VALUES(3, 1, '访问人数', 'uv', '访问人数', 1, 0, 'ATOMIC', ' {"expr":"s2_pv_uv_statis_uv","measures":[{"agg":"count_distinct","bizName":"s2_pv_uv_statis_uv","datasourceId":2,"expr":"uv","isCreateMetric":1,"name":"s2_pv_uv_statis_uv"}]}' , '2023-05-24 17:00:00', 'admin', '2023-05-25 00:00:00', 'admin', NULL, NULL );
|
|
||||||
insert into s2_metric (id, model_id, `name`, biz_name, description, status, sensitive_level, `type`, type_params, created_at, created_by, updated_at, updated_by, data_format_type, data_format) VALUES(4, 2, '播放量', 'js_play_cnt', '播放量', 1, 2, 'ATOMIC', '{"expr":"music_js_play_cnt","measures":[{"agg":"sum","expr":"js_play_cnt","isCreateMetric":1,"datasourceId":4,"bizName":"music_js_play_cnt","name":"music_js_play_cnt"}]}' , '2023-05-24 17:00:00', 'admin', '2023-05-25 00:00:00', 'admin', NULL, NULL );
|
|
||||||
insert into s2_metric (id, model_id, `name`, biz_name, description, status, sensitive_level, `type`, type_params, created_at, created_by, updated_at, updated_by, data_format_type, data_format) VALUES(5, 2, '下载量', 'down_cnt', '下载量', 1, 0, 'ATOMIC', ' {"expr":"music_down_cnt","measures":[{"agg":"sum","bizName":"music_down_cnt","datasourceId":4,"expr":"down_cnt","isCreateMetric":1,"name":"music_down_cnt"}]}' , '2023-05-24 17:00:00', 'admin', '2023-05-25 00:00:00', 'admin', NULL, NULL );
|
|
||||||
insert into s2_metric (id, model_id, `name`, biz_name, description, status, sensitive_level, `type`, type_params, created_at, created_by, updated_at, updated_by, data_format_type, data_format) VALUES(6, 2, '收藏量', 'favor_cnt', '收藏量', 1, 0, 'ATOMIC', ' {"expr":"music_favor_cnt","measures":[{"agg":"sum","bizName":"music_favor_cnt","datasourceId":4,"expr":"favor_cnt","isCreateMetric":1,"name":"music_favor_cnt"}]}' , '2023-05-24 17:00:00', 'admin', '2023-05-25 00:00:00', 'admin', NULL, NULL );
|
|
||||||
|
|
||||||
insert into s2_available_date_info(`item_id` ,`type` ,`date_format` ,`start_date` ,`end_date` ,`unavailable_date` ,`created_at` ,`created_by` ,`updated_at` ,`updated_by` )
|
insert into s2_available_date_info(`item_id` ,`type` ,`date_format` ,`start_date` ,`end_date` ,`unavailable_date` ,`created_at` ,`created_by` ,`updated_at` ,`updated_by` )
|
||||||
values (1, 'dimension', 'yyyy-MM-dd', DATEADD('DAY', -28, CURRENT_DATE()), DATEADD('DAY', -1, CURRENT_DATE()), '[]', '2023-06-01', 'admin', '2023-06-01', 'admin');
|
values (1, 'dimension', 'yyyy-MM-dd', DATEADD('DAY', -28, CURRENT_DATE()), DATEADD('DAY', -1, CURRENT_DATE()), '[]', '2023-06-01', 'admin', '2023-06-01', 'admin');
|
||||||
insert into s2_available_date_info(`item_id` ,`type` ,`date_format` ,`start_date` ,`end_date` ,`unavailable_date` ,`created_at` ,`created_by` ,`updated_at` ,`updated_by` )
|
insert into s2_available_date_info(`item_id` ,`type` ,`date_format` ,`start_date` ,`end_date` ,`unavailable_date` ,`created_at` ,`created_by` ,`updated_at` ,`updated_by` )
|
||||||
@@ -38,11 +12,6 @@ values (2, 'dimension', 'yyyy-MM-dd', DATEADD('DAY', -28, CURRENT_DATE()), DATEA
|
|||||||
insert into s2_available_date_info(`item_id` ,`type` ,`date_format` ,`start_date` ,`end_date` ,`unavailable_date` ,`created_at` ,`created_by` ,`updated_at` ,`updated_by` )
|
insert into s2_available_date_info(`item_id` ,`type` ,`date_format` ,`start_date` ,`end_date` ,`unavailable_date` ,`created_at` ,`created_by` ,`updated_at` ,`updated_by` )
|
||||||
values (3, 'dimension', 'yyyy-MM-dd', DATEADD('DAY', -28, CURRENT_DATE()), DATEADD('DAY', -1, CURRENT_DATE()), '[]', '2023-06-01', 'admin', '2023-06-01', 'admin');
|
values (3, 'dimension', 'yyyy-MM-dd', DATEADD('DAY', -28, CURRENT_DATE()), DATEADD('DAY', -1, CURRENT_DATE()), '[]', '2023-06-01', 'admin', '2023-06-01', 'admin');
|
||||||
|
|
||||||
insert into s2_auth_groups (group_id, config)
|
|
||||||
values (1, '{"modelId":"1","name":"admin-permission","groupId":1,"authRules":[{"metrics":["stay_hours"],"dimensions":["page"]}],"dimensionFilters":[""],"dimensionFilterDescription":"授权admin 页面和停留时长权限","authorizedUsers":["admin"],"authorizedDepartmentIds":[]}');
|
|
||||||
insert into s2_auth_groups (group_id, config)
|
|
||||||
values (2, '{"modelId":"1","name":"tom_sales_permission","groupId":2,"authRules":[{"metrics":["stay_hours"],"dimensions":["page"]}],"dimensionFilters":["department in (''sales'')"],"dimensionFilterDescription":"部门 in [sales]", "authorizedUsers":["tom"],"authorizedDepartmentIds":[]}');
|
|
||||||
|
|
||||||
-- sample data
|
-- sample data
|
||||||
INSERT INTO singer (imp_date,singer_name,act_area, song_name,genre,js_play_cnt,down_cnt,favor_cnt) VALUES (DATEADD('DAY', -1, CURRENT_DATE()), '周杰伦', '港台','青花瓷','国风',1000000,1000000,1000000);
|
INSERT INTO singer (imp_date,singer_name,act_area, song_name,genre,js_play_cnt,down_cnt,favor_cnt) VALUES (DATEADD('DAY', -1, CURRENT_DATE()), '周杰伦', '港台','青花瓷','国风',1000000,1000000,1000000);
|
||||||
INSERT INTO singer (imp_date,singer_name,act_area, song_name,genre,js_play_cnt,down_cnt,favor_cnt) VALUES (DATEADD('DAY', -5, CURRENT_DATE()), '周杰伦', '港台','青花瓷','国风',1000000,1000000,1000000);
|
INSERT INTO singer (imp_date,singer_name,act_area, song_name,genre,js_play_cnt,down_cnt,favor_cnt) VALUES (DATEADD('DAY', -5, CURRENT_DATE()), '周杰伦', '港台','青花瓷','国风',1000000,1000000,1000000);
|
||||||
@@ -1108,3 +1077,35 @@ INSERT INTO s2_stay_time_statis (imp_date, user_name, stay_hours, page) VALUES (
|
|||||||
INSERT INTO s2_stay_time_statis (imp_date, user_name, stay_hours, page) VALUES (DATEADD('DAY', -19, CURRENT_DATE()), 'alice', '0.8131712486302015', 'p2');
|
INSERT INTO s2_stay_time_statis (imp_date, user_name, stay_hours, page) VALUES (DATEADD('DAY', -19, CURRENT_DATE()), 'alice', '0.8131712486302015', 'p2');
|
||||||
INSERT INTO s2_stay_time_statis (imp_date, user_name, stay_hours, page) VALUES (DATEADD('DAY', -15, CURRENT_DATE()), 'lucy', '0.8124302447925607', 'p4');
|
INSERT INTO s2_stay_time_statis (imp_date, user_name, stay_hours, page) VALUES (DATEADD('DAY', -15, CURRENT_DATE()), 'lucy', '0.8124302447925607', 'p4');
|
||||||
INSERT INTO s2_stay_time_statis (imp_date, user_name, stay_hours, page) VALUES (DATEADD('DAY', -8, CURRENT_DATE()), 'lucy', '0.039935860913407284', 'p2');
|
INSERT INTO s2_stay_time_statis (imp_date, user_name, stay_hours, page) VALUES (DATEADD('DAY', -8, CURRENT_DATE()), 'lucy', '0.039935860913407284', 'p2');
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
insert into genre(g_name,rating,most_popular_in) VALUES ('tagore',8,'孟加拉国');
|
||||||
|
insert into genre(g_name,rating,most_popular_in) VALUES ('nazrul',7,'孟加拉国');
|
||||||
|
insert into genre(g_name,rating,most_popular_in) VALUES ('民间',9,'锡尔赫特、吉大港、库斯蒂亚');
|
||||||
|
insert into genre(g_name,rating,most_popular_in) VALUES ('现代',8,'孟加拉国');
|
||||||
|
insert into genre(g_name,rating,most_popular_in) VALUES ('蓝调',7,'加拿大');
|
||||||
|
insert into genre(g_name,rating,most_popular_in) VALUES ('流行',9,'美国');
|
||||||
|
|
||||||
|
insert into artist(artist_name,country,gender,g_name) VALUES ('Shrikanta','印度','男性','tagore');
|
||||||
|
insert into artist(artist_name,country,gender,g_name) VALUES ('Prity','孟加拉国','女性','nazrul');
|
||||||
|
insert into artist(artist_name,country,gender,g_name) VALUES ('Farida','孟加拉国','女性','民间');
|
||||||
|
insert into artist(artist_name,country,gender,g_name) VALUES ('Topu','印度','女性','现代');
|
||||||
|
insert into artist(artist_name,country,gender,g_name) VALUES ('Enrique','美国','男性','蓝调');
|
||||||
|
insert into artist(artist_name,country,gender,g_name) VALUES ('Michel','英国','男性','流行');
|
||||||
|
|
||||||
|
insert into files(f_id,artist_name,file_size,duration,formats) VALUES (1,'Shrikanta','3.78 MB','3:45','mp4');
|
||||||
|
insert into files(f_id,artist_name,file_size,duration,formats) VALUES (2,'Prity','4.12 MB','2:56','mp3');
|
||||||
|
insert into files(f_id,artist_name,file_size,duration,formats) VALUES (3,'Farida','3.69 MB','4:12','mp4');
|
||||||
|
insert into files(f_id,artist_name,file_size,duration,formats) VALUES (4,'Enrique','4.58 MB','5:23','mp4');
|
||||||
|
insert into files(f_id,artist_name,file_size,duration,formats) VALUES (5,'Michel','5.10 MB','4:34','mp3');
|
||||||
|
insert into files(f_id,artist_name,file_size,duration,formats) VALUES (6,'Topu','4.10 MB','4:30','mp4');
|
||||||
|
|
||||||
|
insert into song(imp_date,song_name,artist_name,country,f_id,g_name,rating,languages,releasedate,resolution) VALUES (DATEADD('DAY', 0, CURRENT_DATE()),'Tumi 长袍 尼罗布','Shrikanta','印度',1,'tagore',8,'孟加拉语','28-AUG-2011',1080);
|
||||||
|
insert into song(imp_date,song_name,artist_name,country,f_id,g_name,rating,languages,releasedate,resolution) VALUES (DATEADD('DAY', 0, CURRENT_DATE()),'舒克诺 帕塔尔 努普尔 帕埃','Prity','孟加拉国',2,'nazrul',5,'孟加拉语','21-SEP-1997',512);
|
||||||
|
insert into song(imp_date,song_name,artist_name,country,f_id,g_name,rating,languages,releasedate,resolution) VALUES (DATEADD('DAY', 0, CURRENT_DATE()),'阿米·奥帕尔·霍伊','Farida','孟加拉国',3,'民间',7,'孟加拉语','7-APR-2001',320);
|
||||||
|
insert into song(imp_date,song_name,artist_name,country,f_id,g_name,rating,languages,releasedate,resolution) VALUES (DATEADD('DAY', 0, CURRENT_DATE()),'我的爱','Enrique','美国',4,'蓝调',6,'英文','24-JAN-2007',1080);
|
||||||
|
insert into song(imp_date,song_name,artist_name,country,f_id,g_name,rating,languages,releasedate,resolution) VALUES (DATEADD('DAY', 0, CURRENT_DATE()),'打败它','Michel','英国',5,'流行',8,'英文','17-MAR-2002',720);
|
||||||
|
insert into song(imp_date,song_name,artist_name,country,f_id,g_name,rating,languages,releasedate,resolution) VALUES (DATEADD('DAY', 0, CURRENT_DATE()),'阿杰伊阿卡什','Topu','印度',6,'现代',10,'孟加拉语','27-MAR-2004',320);
|
||||||
|
|
||||||
|
-- benchmark
|
||||||
|
|||||||
@@ -414,4 +414,47 @@ COMMENT ON TABLE s2_dictionary_task IS 'dictionary task information table';
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-- benchmark
|
||||||
|
CREATE TABLE IF NOT EXISTS `genre` (
|
||||||
|
`g_name` varchar(20) NOT NULL , -- genre name
|
||||||
|
`rating` INT ,
|
||||||
|
`most_popular_in` varchar(50) ,
|
||||||
|
PRIMARY KEY (`g_name`)
|
||||||
|
);
|
||||||
|
COMMENT ON TABLE genre IS 'genre';
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS `artist` (
|
||||||
|
`artist_name` varchar(50) NOT NULL , -- genre name
|
||||||
|
`country` varchar(20) ,
|
||||||
|
`gender` varchar(20) ,
|
||||||
|
`g_name` varchar(50)
|
||||||
|
);
|
||||||
|
COMMENT ON TABLE artist IS 'artist';
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS `files` (
|
||||||
|
`f_id` bigINT NOT NULL,
|
||||||
|
`artist_name` varchar(50) ,
|
||||||
|
`file_size` varchar(20) ,
|
||||||
|
`duration` varchar(20) ,
|
||||||
|
`formats` varchar(20) ,
|
||||||
|
PRIMARY KEY (`f_id`)
|
||||||
|
);
|
||||||
|
COMMENT ON TABLE files IS 'files';
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS `song` (
|
||||||
|
`imp_date` varchar(50) ,
|
||||||
|
`song_name` varchar(50) ,
|
||||||
|
`artist_name` varchar(50) ,
|
||||||
|
`country` varchar(20) ,
|
||||||
|
`f_id` bigINT ,
|
||||||
|
`g_name` varchar(20) ,
|
||||||
|
`rating` INT ,
|
||||||
|
`languages` varchar(20) ,
|
||||||
|
`releasedate` varchar(50) ,
|
||||||
|
`resolution` bigINT NOT NULL
|
||||||
|
);
|
||||||
|
COMMENT ON TABLE song IS 'song';
|
||||||
|
|
||||||
|
-- benchmark
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,2 +1,2 @@
|
|||||||
root=.
|
root=.
|
||||||
CustomDictionaryPath=data/dictionary/custom/DimValue_1_1.txt;data/dictionary/custom/DimValue_1_2.txt;data/dictionary/custom/DimValue_1_3.txt;
|
CustomDictionaryPath=data/dictionary/custom/DimValue_1_1.txt;data/dictionary/custom/DimValue_1_2.txt;data/dictionary/custom/DimValue_1_3.txt;data/dictionary/custom/benchmark_cspider.txt;
|
||||||
|
|||||||
@@ -0,0 +1,10 @@
|
|||||||
|
package com.tencent.supersonic.benchmark;
|
||||||
|
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
public class CSpider {
|
||||||
|
@Test
|
||||||
|
public void case1(){
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -210,7 +210,7 @@ public class MetricQueryTest extends BaseQueryTest {
|
|||||||
ChatConfigEditReqReq extendEditCmd = new ChatConfigEditReqReq();
|
ChatConfigEditReqReq extendEditCmd = new ChatConfigEditReqReq();
|
||||||
BeanUtils.copyProperties(chatConfig, extendEditCmd);
|
BeanUtils.copyProperties(chatConfig, extendEditCmd);
|
||||||
// add blacklist
|
// add blacklist
|
||||||
List<Long> blackMetrics = Arrays.asList(3L);
|
List<Long> blackMetrics = Arrays.asList(2L);
|
||||||
extendEditCmd.getChatAggConfig().getVisibility().setBlackMetricIdList(blackMetrics);
|
extendEditCmd.getChatAggConfig().getVisibility().setBlackMetricIdList(blackMetrics);
|
||||||
configService.editConfig(extendEditCmd, User.getFakeUser());
|
configService.editConfig(extendEditCmd, User.getFakeUser());
|
||||||
|
|
||||||
|
|||||||
@@ -4,32 +4,6 @@ insert into s2_user (id, `name`, password, display_name, email) values (2, 'jack
|
|||||||
insert into s2_user (id, `name`, password, display_name, email) values (3, 'tom','123456','tom','tom@xx.com');
|
insert into s2_user (id, `name`, password, display_name, email) values (3, 'tom','123456','tom','tom@xx.com');
|
||||||
insert into s2_user (id, `name`, password, display_name, email) values (4, 'lucy','123456','lucy','lucy@xx.com');
|
insert into s2_user (id, `name`, password, display_name, email) values (4, 'lucy','123456','lucy','lucy@xx.com');
|
||||||
|
|
||||||
-- sample models
|
|
||||||
insert into s2_domain (id, `name`, biz_name, parent_id, status, created_at, created_by, updated_at, updated_by, `admin`, admin_org, viewer, view_org) VALUES(1, '超音数', 'supersonic', 0, 1, '2023-05-24 00:00:00', 'admin', '2023-05-24 00:00:00', 'admin', 'admin', '', 'admin,tom,jack', 'admin' );
|
|
||||||
insert into s2_model (id, `name`, biz_name, domain_id, created_at, created_by, updated_at, updated_by, `admin`, admin_org, is_open, viewer, view_org, entity) VALUES(1, '超音数', 'supersonic', 1, '2023-05-24 00:00:00', 'admin', '2023-05-24 00:00:00', 'admin', 'admin', '', 0, 'admin,tom,jack', 'admin','' );
|
|
||||||
insert into s2_model (id, `name`, biz_name, domain_id, created_at, created_by, updated_at, updated_by, `admin`, admin_org, is_open, viewer, view_org, entity) VALUES(2, '艺人库', 'singer', 1, '2023-05-24 00:00:00', 'admin', '2023-05-24 00:00:00', 'admin', 'admin', '', 0, 'admin,tom,jack', 'admin','{"entityId": 7, "names": ["歌手", "艺人"]}' );
|
|
||||||
insert into s2_database (id, `name`, description, `type` ,config ,created_at ,created_by ,updated_at ,updated_by, `admin`) VALUES(1, 'H2数据实例', '', 'h2', '{"password":"semantic","url":"jdbc:h2:mem:semantic;DATABASE_TO_UPPER=false","userName":"root"}', '2023-05-24 00:00:00', 'admin', '2023-05-24 00:00:00', 'admin', 'admin');
|
|
||||||
insert into s2_datasource (id , model_id, `name`, biz_name, description, database_id ,datasource_detail, created_at, created_by, updated_at, updated_by ) VALUES(1, 1, '停留时长统计', 's2_stay_time_statis', '停留时长统计', 1, '{"dimensions":[{"bizName":"imp_date","dateFormat":"yyyy-MM-dd","expr":"imp_date","isCreateDimension":0,"type":"time","typeParams":{"isPrimary":"true","timeGranularity":"day"}},{"bizName":"page","dateFormat":"yyyy-MM-dd","expr":"page","isCreateDimension":0,"type":"categorical"}],"identifiers":[{"bizName":"user_name","name":"用户名","type":"primary"}],"measures":[{"agg":"sum","bizName":"s2_stay_time_statis_stay_hours","expr":"stay_hours","isCreateMetric":1,"name":"停留时长"}],"queryType":"sql_query","sqlQuery":"SELECT imp_date, page,user_name,stay_hours FROM s2_stay_time_statis"}', '2023-05-25 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin');
|
|
||||||
insert into s2_datasource (id , model_id, `name`, biz_name, description, database_id ,datasource_detail, created_at, created_by, updated_at, updated_by ) VALUES(2, 1, 'PVUV统计', 's2_pv_uv_statis', 'PVUV统计', 1, '{"dimensions":[{"bizName":"imp_date","dateFormat":"yyyy-MM-dd","expr":"imp_date","isCreateDimension":0,"type":"time","typeParams":{"isPrimary":"true","timeGranularity":"day"}},{"bizName":"page","dateFormat":"yyyy-MM-dd","expr":"page","isCreateDimension":0,"type":"categorical"}],"identifiers":[{"bizName":"user_name","name":"用户名","type":"primary"}],"measures":[{"agg":"sum","bizName":"s2_pv_uv_statis_pv","expr":"pv","isCreateMetric":1,"name":"访问次数"},{"agg":"count_distinct","bizName":"s2_pv_uv_statis_uv","expr":"uv","isCreateMetric":1,"name":"访问人数"}],"queryType":"sql_query","sqlQuery":"SELECT imp_date, user_name,page,1 as pv, user_name as uv FROM s2_pv_uv_statis"}', '2023-05-25 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin');
|
|
||||||
insert into s2_datasource (id , model_id, `name`, biz_name, description, database_id ,datasource_detail, created_at, created_by, updated_at, updated_by ) VALUES(3, 1, '用户部门', 'user_department', '用户部门', 1, '{"dimensions":[{"bizName":"department","dateFormat":"yyyy-MM-dd","expr":"department","isCreateDimension":1,"name":"部门","type":"categorical"}],"identifiers":[{"bizName":"user_name","name":"用户名","type":"primary"}],"measures":[],"queryType":"sql_query","sqlQuery":"SELECT user_name,department FROM s2_user_department"}', '2023-05-25 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin');
|
|
||||||
insert into s2_datasource (id , model_id, `name`, biz_name, description, database_id ,datasource_detail, created_at, created_by, updated_at, updated_by ) VALUES(4, 2, '艺人库', 'singer', '艺人库', 1, '{"dimensions":[{"bizName":"imp_date","dateFormat":"yyyy-MM-dd","expr":"imp_date","isCreateDimension":0,"type":"time","typeParams":{"isPrimary":"true","timeGranularity":"day"}},{"bizName":"act_area","dateFormat":"yyyy-MM-dd","expr":"act_area","isCreateDimension":1,"name":"活跃区域","type":"categorical"},{"bizName":"song_name","dateFormat":"yyyy-MM-dd","expr":"song_name","isCreateDimension":1,"name":"代表作","type":"categorical"},{"bizName":"genre","dateFormat":"yyyy-MM-dd","expr":"genre","isCreateDimension":1,"name":"风格","type":"categorical"}],"identifiers":[{"bizName":"singer_name","name":"歌手名","type":"primary"}],"measures":[{"agg":"sum","bizName":"music_down_cnt","expr":"down_cnt","isCreateMetric":1,"name":"下载量"},{"agg":"sum","bizName":"music_js_play_cnt","expr":"js_play_cnt","isCreateMetric":1,"name":"播放量"},{"agg":"sum","bizName":"music_favor_cnt","expr":"favor_cnt","isCreateMetric":1,"name":"收藏量"}],"queryType":"sql_query","sqlQuery":"SELECT imp_date,singer_name,act_area,song_name,genre,js_play_cnt,down_cnt,favor_cnt FROM singer "}', '2023-05-25 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin');
|
|
||||||
insert into s2_datasource_rela (id , model_id, `datasource_from`, datasource_to, join_key, created_at, created_by, updated_at, updated_by ) VALUES(1, 1, 1, 2, 'user_name', '2023-05-25 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin');
|
|
||||||
insert into s2_datasource_rela (id , model_id, `datasource_from`, datasource_to, join_key, created_at, created_by, updated_at, updated_by ) VALUES(2, 1, 1, 3, 'user_name', '2023-05-25 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin');
|
|
||||||
insert into s2_datasource_rela (id , model_id, `datasource_from`, datasource_to, join_key, created_at, created_by, updated_at, updated_by ) VALUES(3, 1, 2, 3, 'user_name', '2023-05-25 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin');
|
|
||||||
insert into s2_dimension (id , model_id, datasource_id, `name`, biz_name, description, status, sensitive_level, `type`, type_params, expr, created_at, created_by, updated_at, updated_by, semantic_type, dim_value_maps) VALUES(1, 1, 3, '部门', 'department', '部门', 1, 0, 'categorical', NULL, 'department', '2023-05-24 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin', 'CATEGORY', '[{"alias":["人力资源","人力"],"bizName":"人力资源","techName":"HR"},{"alias":["营销","销售"],"bizName":"营销部门","techName":"sales"}]');
|
|
||||||
insert into s2_dimension (id , model_id, datasource_id, `name`, biz_name, description, status, sensitive_level, `type`, type_params, expr, created_at, created_by, updated_at, updated_by, semantic_type) VALUES(2, 1, 1, '用户名', 'user_name', '用户名', 1, 0, 'primary', NULL, 'user_name', '2023-05-24 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin', 'CATEGORY');
|
|
||||||
insert into s2_dimension (id , model_id, datasource_id, `name`, biz_name, description, status, sensitive_level, `type`, type_params, expr, created_at, created_by, updated_at, updated_by, semantic_type) VALUES(3, 1, 2, '页面', 'page', '页面', 1, 2, 'categorical', NULL, 'page', '2023-05-24 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin', 'CATEGORY');
|
|
||||||
insert into s2_dimension (id , model_id, datasource_id, `name`, biz_name, description, status, sensitive_level, `type`, type_params, expr, created_at, created_by, updated_at, updated_by, semantic_type) VALUES(4, 2, 4, '活跃区域', 'act_area', '活跃区域', 1, 2, 'categorical', NULL, 'act_area', '2023-05-24 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin', 'CATEGORY');
|
|
||||||
insert into s2_dimension (id , model_id, datasource_id, `name`, biz_name, description, status, sensitive_level, `type`, type_params, expr, created_at, created_by, updated_at, updated_by, semantic_type) VALUES(5, 2, 4, '代表作', 'song_name', '代表作', 1, 2, 'categorical', NULL, 'song_name', '2023-05-24 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin', 'CATEGORY');
|
|
||||||
insert into s2_dimension (id , model_id, datasource_id, `name`, biz_name, description, status, sensitive_level, `type`, type_params, expr, created_at, created_by, updated_at, updated_by, semantic_type) VALUES(6, 2, 4, '风格', 'genre', '风格', 1, 2, 'categorical', NULL, 'genre', '2023-05-24 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin', 'CATEGORY');
|
|
||||||
insert into s2_dimension (id , model_id, datasource_id, `name`, biz_name, description, status, sensitive_level, `type`, type_params, expr, created_at, created_by, updated_at, updated_by, semantic_type) VALUES(7, 2, 4, '歌手名', 'singer_name', '歌手名', 1, 2, 'categorical', NULL, 'singer_name', '2023-05-24 00:00:00', 'admin', '2023-05-25 00:00:00', 'admin', 'CATEGORY');
|
|
||||||
insert into s2_metric (id, model_id, `name`, biz_name, description, status, sensitive_level, `type`, type_params, created_at, created_by, updated_at, updated_by, data_format_type, data_format) VALUES(1, 1, '停留时长', 'stay_hours', '停留时长', 1, 2, 'ATOMIC', '{"expr":"s2_stay_time_statis_stay_hours","measures":[{"agg":"sum","expr":"stay_hours","isCreateMetric":1,"datasourceId":1,"bizName":"s2_stay_time_statis_stay_hours","name":"s2_stay_time_statis_stay_hours"}]}' , '2023-05-24 17:00:00', 'admin', '2023-05-25 00:00:00', 'admin', NULL, NULL );
|
|
||||||
insert into s2_metric (id, model_id, `name`, biz_name, description, status, sensitive_level, `type`, type_params, created_at, created_by, updated_at, updated_by, data_format_type, data_format) VALUES(2, 1, '访问次数', 'pv', '访问次数', 1, 0, 'ATOMIC', ' {"expr":"s2_pv_uv_statis_pv","measures":[{"agg":"sum","bizName":"s2_pv_uv_statis_pv","datasourceId":2,"expr":"pv","isCreateMetric":1,"name":"s2_pv_uv_statis_pv"}]}' , '2023-05-24 17:00:00', 'admin', '2023-05-25 00:00:00', 'admin', NULL, NULL );
|
|
||||||
insert into s2_metric (id, model_id, `name`, biz_name, description, status, sensitive_level, `type`, type_params, created_at, created_by, updated_at, updated_by, data_format_type, data_format) VALUES(3, 1, '访问人数', 'uv', '访问人数', 1, 0, 'ATOMIC', ' {"expr":"s2_pv_uv_statis_uv","measures":[{"agg":"count_distinct","bizName":"s2_pv_uv_statis_uv","datasourceId":2,"expr":"uv","isCreateMetric":1,"name":"s2_pv_uv_statis_uv"}]}' , '2023-05-24 17:00:00', 'admin', '2023-05-25 00:00:00', 'admin', NULL, NULL );
|
|
||||||
insert into s2_metric (id, model_id, `name`, biz_name, description, status, sensitive_level, `type`, type_params, created_at, created_by, updated_at, updated_by, data_format_type, data_format) VALUES(4, 2, '播放量', 'js_play_cnt', '播放量', 1, 2, 'ATOMIC', '{"expr":"music_js_play_cnt","measures":[{"agg":"sum","expr":"js_play_cnt","isCreateMetric":1,"datasourceId":4,"bizName":"music_js_play_cnt","name":"music_js_play_cnt"}]}' , '2023-05-24 17:00:00', 'admin', '2023-05-25 00:00:00', 'admin', NULL, NULL );
|
|
||||||
insert into s2_metric (id, model_id, `name`, biz_name, description, status, sensitive_level, `type`, type_params, created_at, created_by, updated_at, updated_by, data_format_type, data_format) VALUES(5, 2, '下载量', 'down_cnt', '下载量', 1, 0, 'ATOMIC', ' {"expr":"music_down_cnt","measures":[{"agg":"sum","bizName":"music_down_cnt","datasourceId":4,"expr":"down_cnt","isCreateMetric":1,"name":"music_down_cnt"}]}' , '2023-05-24 17:00:00', 'admin', '2023-05-25 00:00:00', 'admin', NULL, NULL );
|
|
||||||
insert into s2_metric (id, model_id, `name`, biz_name, description, status, sensitive_level, `type`, type_params, created_at, created_by, updated_at, updated_by, data_format_type, data_format) VALUES(6, 2, '收藏量', 'favor_cnt', '收藏量', 1, 0, 'ATOMIC', ' {"expr":"music_favor_cnt","measures":[{"agg":"sum","bizName":"music_favor_cnt","datasourceId":4,"expr":"favor_cnt","isCreateMetric":1,"name":"music_favor_cnt"}]}' , '2023-05-24 17:00:00', 'admin', '2023-05-25 00:00:00', 'admin', NULL, NULL );
|
|
||||||
|
|
||||||
insert into s2_available_date_info(`item_id` ,`type` ,`date_format` ,`start_date` ,`end_date` ,`unavailable_date` ,`created_at` ,`created_by` ,`updated_at` ,`updated_by` )
|
insert into s2_available_date_info(`item_id` ,`type` ,`date_format` ,`start_date` ,`end_date` ,`unavailable_date` ,`created_at` ,`created_by` ,`updated_at` ,`updated_by` )
|
||||||
values (1, 'dimension', 'yyyy-MM-dd', DATEADD('DAY', -28, CURRENT_DATE()), DATEADD('DAY', -1, CURRENT_DATE()), '[]', '2023-06-01', 'admin', '2023-06-01', 'admin');
|
values (1, 'dimension', 'yyyy-MM-dd', DATEADD('DAY', -28, CURRENT_DATE()), DATEADD('DAY', -1, CURRENT_DATE()), '[]', '2023-06-01', 'admin', '2023-06-01', 'admin');
|
||||||
insert into s2_available_date_info(`item_id` ,`type` ,`date_format` ,`start_date` ,`end_date` ,`unavailable_date` ,`created_at` ,`created_by` ,`updated_at` ,`updated_by` )
|
insert into s2_available_date_info(`item_id` ,`type` ,`date_format` ,`start_date` ,`end_date` ,`unavailable_date` ,`created_at` ,`created_by` ,`updated_at` ,`updated_by` )
|
||||||
@@ -37,11 +11,6 @@ values (2, 'dimension', 'yyyy-MM-dd', DATEADD('DAY', -28, CURRENT_DATE()), DATEA
|
|||||||
insert into s2_available_date_info(`item_id` ,`type` ,`date_format` ,`start_date` ,`end_date` ,`unavailable_date` ,`created_at` ,`created_by` ,`updated_at` ,`updated_by` )
|
insert into s2_available_date_info(`item_id` ,`type` ,`date_format` ,`start_date` ,`end_date` ,`unavailable_date` ,`created_at` ,`created_by` ,`updated_at` ,`updated_by` )
|
||||||
values (3, 'dimension', 'yyyy-MM-dd', DATEADD('DAY', -28, CURRENT_DATE()), DATEADD('DAY', -1, CURRENT_DATE()), '[]', '2023-06-01', 'admin', '2023-06-01', 'admin');
|
values (3, 'dimension', 'yyyy-MM-dd', DATEADD('DAY', -28, CURRENT_DATE()), DATEADD('DAY', -1, CURRENT_DATE()), '[]', '2023-06-01', 'admin', '2023-06-01', 'admin');
|
||||||
|
|
||||||
insert into s2_auth_groups (group_id, config)
|
|
||||||
values (1, '{"modelId":"1","name":"admin-permission","groupId":1,"authRules":[{"metrics":["stay_hours"],"dimensions":["page"]}],"dimensionFilters":[""],"dimensionFilterDescription":"授权admin 页面和停留时长权限","authorizedUsers":["admin"],"authorizedDepartmentIds":[]}');
|
|
||||||
insert into s2_auth_groups (group_id, config)
|
|
||||||
values (2, '{"modelId":"1","name":"tom_sales_permission","groupId":2,"authRules":[{"metrics":["stay_hours"],"dimensions":["page"]}],"dimensionFilters":["department in (''sales'')"],"dimensionFilterDescription":"开通 tom sales部门权限", "authorizedUsers":["tom"],"authorizedDepartmentIds":[]}');
|
|
||||||
|
|
||||||
-- sample data
|
-- sample data
|
||||||
INSERT INTO singer (imp_date,singer_name,act_area, song_name,genre,js_play_cnt,down_cnt,favor_cnt) VALUES (DATEADD('DAY', -1, CURRENT_DATE()), '周杰伦', '中国','青花瓷','流行',1000000,1000000,1000000);
|
INSERT INTO singer (imp_date,singer_name,act_area, song_name,genre,js_play_cnt,down_cnt,favor_cnt) VALUES (DATEADD('DAY', -1, CURRENT_DATE()), '周杰伦', '中国','青花瓷','流行',1000000,1000000,1000000);
|
||||||
INSERT INTO singer (imp_date,singer_name,act_area, song_name,genre,js_play_cnt,down_cnt,favor_cnt) VALUES (DATEADD('DAY', -5, CURRENT_DATE()), '周杰伦', '中国','青花瓷','流行',1000000,1000000,1000000);
|
INSERT INTO singer (imp_date,singer_name,act_area, song_name,genre,js_play_cnt,down_cnt,favor_cnt) VALUES (DATEADD('DAY', -5, CURRENT_DATE()), '周杰伦', '中国','青花瓷','流行',1000000,1000000,1000000);
|
||||||
|
|||||||
43
pom.xml
@@ -65,6 +65,11 @@
|
|||||||
<mockito-inline.version>4.5.1</mockito-inline.version>
|
<mockito-inline.version>4.5.1</mockito-inline.version>
|
||||||
<jsqlparser.version>4.5</jsqlparser.version>
|
<jsqlparser.version>4.5</jsqlparser.version>
|
||||||
<revision>0.7.5-SNAPSHOT</revision>
|
<revision>0.7.5-SNAPSHOT</revision>
|
||||||
|
<!-- Do not bump spotless plugin version since 2.30.0 is the latest version supports Java 8-->
|
||||||
|
<maven.plugin.spotless.version>2.30.0</maven.plugin.spotless.version>
|
||||||
|
<spotless.python.includes></spotless.python.includes>
|
||||||
|
<!-- Do not bump black version as decided by spotless maven plugin-->
|
||||||
|
<spotless.python.black.version>22.3.0</spotless.python.black.version>
|
||||||
</properties>
|
</properties>
|
||||||
|
|
||||||
<dependencyManagement>
|
<dependencyManagement>
|
||||||
@@ -101,6 +106,15 @@
|
|||||||
</dependencies>
|
</dependencies>
|
||||||
</dependencyManagement>
|
</dependencyManagement>
|
||||||
|
|
||||||
|
<profiles>
|
||||||
|
<profile>
|
||||||
|
<id>spotless-python</id>
|
||||||
|
<properties>
|
||||||
|
<spotless.python.includes>src/**/*.py</spotless.python.includes>
|
||||||
|
</properties>
|
||||||
|
</profile>
|
||||||
|
</profiles>
|
||||||
|
|
||||||
<build>
|
<build>
|
||||||
<plugins>
|
<plugins>
|
||||||
<plugin>
|
<plugin>
|
||||||
@@ -147,6 +161,10 @@
|
|||||||
<groupId>org.apache.maven.plugins</groupId>
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
<artifactId>maven-checkstyle-plugin</artifactId>
|
<artifactId>maven-checkstyle-plugin</artifactId>
|
||||||
</plugin>
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>com.diffplug.spotless</groupId>
|
||||||
|
<artifactId>spotless-maven-plugin</artifactId>
|
||||||
|
</plugin>
|
||||||
</plugins>
|
</plugins>
|
||||||
<pluginManagement>
|
<pluginManagement>
|
||||||
<plugins>
|
<plugins>
|
||||||
@@ -185,6 +203,31 @@
|
|||||||
</execution>
|
</execution>
|
||||||
</executions>
|
</executions>
|
||||||
</plugin>
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>com.diffplug.spotless</groupId>
|
||||||
|
<artifactId>spotless-maven-plugin</artifactId>
|
||||||
|
<version>${maven.plugin.spotless.version}</version>
|
||||||
|
<configuration>
|
||||||
|
<upToDateChecking>
|
||||||
|
<enabled>true</enabled>
|
||||||
|
</upToDateChecking>
|
||||||
|
<python>
|
||||||
|
<includes>
|
||||||
|
<include>${spotless.python.includes}</include>
|
||||||
|
</includes>
|
||||||
|
<black>
|
||||||
|
<version>${spotless.python.black.version}</version>
|
||||||
|
</black>
|
||||||
|
</python>
|
||||||
|
</configuration>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<goals>
|
||||||
|
<goal>check</goal>
|
||||||
|
</goals>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
</plugins>
|
</plugins>
|
||||||
</pluginManagement>
|
</pluginManagement>
|
||||||
</build>
|
</build>
|
||||||
|
|||||||
@@ -0,0 +1,9 @@
|
|||||||
|
package com.tencent.supersonic.semantic.api.model.enums;
|
||||||
|
|
||||||
|
public enum IdentifyTypeEnum {
|
||||||
|
|
||||||
|
primary,
|
||||||
|
|
||||||
|
foreign,
|
||||||
|
|
||||||
|
}
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
package com.tencent.supersonic.semantic.api.model.enums;
|
||||||
|
|
||||||
|
public enum SemanticTypeEnum {
|
||||||
|
|
||||||
|
CATEGORY,
|
||||||
|
ID,
|
||||||
|
DATE,
|
||||||
|
NUMBER
|
||||||
|
|
||||||
|
}
|
||||||
@@ -25,6 +25,13 @@ public class Dim {
|
|||||||
|
|
||||||
private String bizName;
|
private String bizName;
|
||||||
|
|
||||||
|
public Dim(String name, String bizName, String type, Integer isCreateDimension) {
|
||||||
|
this.name = name;
|
||||||
|
this.type = type;
|
||||||
|
this.isCreateDimension = isCreateDimension;
|
||||||
|
this.bizName = bizName;
|
||||||
|
}
|
||||||
|
|
||||||
public static Dim getDefault() {
|
public static Dim getDefault() {
|
||||||
return new Dim("日期", "time", "2023-05-28",
|
return new Dim("日期", "time", "2023-05-28",
|
||||||
Constants.DAY_FORMAT,
|
Constants.DAY_FORMAT,
|
||||||
|
|||||||
@@ -10,8 +10,8 @@ import lombok.NoArgsConstructor;
|
|||||||
@NoArgsConstructor
|
@NoArgsConstructor
|
||||||
public class DimensionTimeTypeParams {
|
public class DimensionTimeTypeParams {
|
||||||
|
|
||||||
private String isPrimary;
|
private String isPrimary = "true";
|
||||||
|
|
||||||
private String timeGranularity;
|
private String timeGranularity = "day";
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -28,5 +28,10 @@ public class Measure {
|
|||||||
|
|
||||||
private Long datasourceId;
|
private Long datasourceId;
|
||||||
|
|
||||||
|
public Measure(String name, String bizName, String agg, Integer isCreateMetric) {
|
||||||
|
this.name = name;
|
||||||
|
this.agg = agg;
|
||||||
|
this.isCreateMetric = isCreateMetric;
|
||||||
|
this.bizName = bizName;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,26 +1,30 @@
|
|||||||
package com.tencent.supersonic.semantic.model.application;
|
package com.tencent.supersonic.semantic.model.application;
|
||||||
|
|
||||||
import com.tencent.supersonic.semantic.api.model.pojo.ItemDateFilter;
|
import com.tencent.supersonic.semantic.api.model.pojo.ItemDateFilter;
|
||||||
import com.tencent.supersonic.semantic.api.model.response.MetricResp;
|
|
||||||
import com.tencent.supersonic.semantic.api.model.response.DatabaseResp;
|
import com.tencent.supersonic.semantic.api.model.response.DatabaseResp;
|
||||||
import com.tencent.supersonic.semantic.api.model.response.ModelResp;
|
|
||||||
import com.tencent.supersonic.semantic.api.model.response.DatasourceResp;
|
import com.tencent.supersonic.semantic.api.model.response.DatasourceResp;
|
||||||
import com.tencent.supersonic.semantic.api.model.response.DimensionResp;
|
import com.tencent.supersonic.semantic.api.model.response.DimensionResp;
|
||||||
import com.tencent.supersonic.semantic.api.model.response.ItemDateResp;
|
import com.tencent.supersonic.semantic.api.model.response.ItemDateResp;
|
||||||
|
import com.tencent.supersonic.semantic.api.model.response.MeasureResp;
|
||||||
|
import com.tencent.supersonic.semantic.api.model.response.MetricResp;
|
||||||
|
import com.tencent.supersonic.semantic.api.model.response.ModelResp;
|
||||||
import com.tencent.supersonic.semantic.api.model.yaml.DatasourceYamlTpl;
|
import com.tencent.supersonic.semantic.api.model.yaml.DatasourceYamlTpl;
|
||||||
import com.tencent.supersonic.semantic.api.model.yaml.DimensionYamlTpl;
|
import com.tencent.supersonic.semantic.api.model.yaml.DimensionYamlTpl;
|
||||||
import com.tencent.supersonic.semantic.api.model.yaml.MetricYamlTpl;
|
import com.tencent.supersonic.semantic.api.model.yaml.MetricYamlTpl;
|
||||||
import com.tencent.supersonic.semantic.model.domain.DatabaseService;
|
|
||||||
import com.tencent.supersonic.semantic.model.domain.ModelService;
|
|
||||||
import com.tencent.supersonic.semantic.model.domain.DimensionService;
|
|
||||||
import com.tencent.supersonic.semantic.model.domain.DatasourceService;
|
|
||||||
import com.tencent.supersonic.semantic.model.domain.MetricService;
|
|
||||||
import com.tencent.supersonic.semantic.model.domain.Catalog;
|
import com.tencent.supersonic.semantic.model.domain.Catalog;
|
||||||
|
import com.tencent.supersonic.semantic.model.domain.DatabaseService;
|
||||||
|
import com.tencent.supersonic.semantic.model.domain.DatasourceService;
|
||||||
|
import com.tencent.supersonic.semantic.model.domain.DimensionService;
|
||||||
|
import com.tencent.supersonic.semantic.model.domain.MetricService;
|
||||||
|
import com.tencent.supersonic.semantic.model.domain.ModelService;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.Optional;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
import org.springframework.stereotype.Component;
|
import org.springframework.stereotype.Component;
|
||||||
|
import org.springframework.util.CollectionUtils;
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
@Component
|
@Component
|
||||||
@@ -96,4 +100,26 @@ public class CatalogImpl implements Catalog {
|
|||||||
public ItemDateResp getItemDate(ItemDateFilter dimension, ItemDateFilter metric) {
|
public ItemDateResp getItemDate(ItemDateFilter dimension, ItemDateFilter metric) {
|
||||||
return datasourceService.getItemDate(dimension, metric);
|
return datasourceService.getItemDate(dimension, metric);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getAgg(Long modelId, String metricBizName) {
|
||||||
|
List<MetricResp> metricResps = getMetrics(modelId);
|
||||||
|
if (!CollectionUtils.isEmpty(metricResps)) {
|
||||||
|
Optional<MetricResp> metric = metricResps.stream()
|
||||||
|
.filter(m -> m.getBizName().equalsIgnoreCase(metricBizName)).findFirst();
|
||||||
|
if (metric.isPresent() && Objects.nonNull(metric.get().getTypeParams()) && !CollectionUtils.isEmpty(
|
||||||
|
metric.get().getTypeParams().getMeasures())) {
|
||||||
|
List<MeasureResp> measureRespList = datasourceService.getMeasureListOfModel(modelId);
|
||||||
|
if (!CollectionUtils.isEmpty(measureRespList)) {
|
||||||
|
String measureName = metric.get().getTypeParams().getMeasures().get(0).getBizName();
|
||||||
|
Optional<MeasureResp> measure = measureRespList.stream()
|
||||||
|
.filter(m -> m.getBizName().equalsIgnoreCase(measureName)).findFirst();
|
||||||
|
if (measure.isPresent()) {
|
||||||
|
return measure.get().getAgg();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return "";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -221,7 +221,7 @@ public class ModelServiceImpl implements ModelService {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Map<Long, String> getModelFullPathMap() {
|
public Map<Long, String> getModelFullPathMap() {
|
||||||
return getModelList().stream().collect(Collectors.toMap(ModelResp::getId,
|
return getModelList().stream().filter(m -> m != null).collect(Collectors.toMap(ModelResp::getId,
|
||||||
ModelResp::getFullPath, (k1, k2) -> k1));
|
ModelResp::getFullPath, (k1, k2) -> k1));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,14 +1,14 @@
|
|||||||
package com.tencent.supersonic.semantic.model.domain;
|
package com.tencent.supersonic.semantic.model.domain;
|
||||||
|
|
||||||
import com.tencent.supersonic.semantic.api.model.pojo.ItemDateFilter;
|
import com.tencent.supersonic.semantic.api.model.pojo.ItemDateFilter;
|
||||||
import com.tencent.supersonic.semantic.api.model.yaml.DatasourceYamlTpl;
|
|
||||||
import com.tencent.supersonic.semantic.api.model.yaml.DimensionYamlTpl;
|
|
||||||
import com.tencent.supersonic.semantic.api.model.yaml.MetricYamlTpl;
|
|
||||||
import com.tencent.supersonic.semantic.api.model.response.DatabaseResp;
|
import com.tencent.supersonic.semantic.api.model.response.DatabaseResp;
|
||||||
import com.tencent.supersonic.semantic.api.model.response.DatasourceResp;
|
import com.tencent.supersonic.semantic.api.model.response.DatasourceResp;
|
||||||
import com.tencent.supersonic.semantic.api.model.response.DimensionResp;
|
import com.tencent.supersonic.semantic.api.model.response.DimensionResp;
|
||||||
import com.tencent.supersonic.semantic.api.model.response.ItemDateResp;
|
import com.tencent.supersonic.semantic.api.model.response.ItemDateResp;
|
||||||
import com.tencent.supersonic.semantic.api.model.response.MetricResp;
|
import com.tencent.supersonic.semantic.api.model.response.MetricResp;
|
||||||
|
import com.tencent.supersonic.semantic.api.model.yaml.DatasourceYamlTpl;
|
||||||
|
import com.tencent.supersonic.semantic.api.model.yaml.DimensionYamlTpl;
|
||||||
|
import com.tencent.supersonic.semantic.api.model.yaml.MetricYamlTpl;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
@@ -16,6 +16,7 @@ import java.util.Set;
|
|||||||
public interface Catalog {
|
public interface Catalog {
|
||||||
|
|
||||||
DatabaseResp getDatabase(Long id);
|
DatabaseResp getDatabase(Long id);
|
||||||
|
|
||||||
DatabaseResp getDatabaseByModelId(Long modelId);
|
DatabaseResp getDatabaseByModelId(Long modelId);
|
||||||
|
|
||||||
List<DatasourceResp> getDatasourceList(Long modelId);
|
List<DatasourceResp> getDatasourceList(Long modelId);
|
||||||
@@ -36,4 +37,6 @@ public interface Catalog {
|
|||||||
|
|
||||||
ItemDateResp getItemDate(ItemDateFilter dimension, ItemDateFilter metric);
|
ItemDateResp getItemDate(ItemDateFilter dimension, ItemDateFilter metric);
|
||||||
|
|
||||||
|
String getAgg(Long modelId, String metricBizName);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -29,8 +29,13 @@ public class H2Adaptor extends EngineAdaptor {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getColumnMetaQueryTpl() {
|
public String getColumnMetaQueryTpl() {
|
||||||
return "SELECT COLUMN_NAME AS name, DATA_TYPE AS dataType\n"
|
return "SELECT COLUMN_NAME AS name, "
|
||||||
+ "FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA ='%s' AND TABLE_NAME = '%s'";
|
+ " case DATA_TYPE"
|
||||||
|
+ " when '12' then 'varchar'"
|
||||||
|
+ " when '-5' then 'integer'"
|
||||||
|
+ " when '8' then 'double'"
|
||||||
|
+ " end AS dataType"
|
||||||
|
+ " FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA ='%s' AND TABLE_NAME = '%s'";
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|||||||
@@ -10,6 +10,10 @@ import lombok.NoArgsConstructor;
|
|||||||
@NoArgsConstructor
|
@NoArgsConstructor
|
||||||
public class Identify {
|
public class Identify {
|
||||||
|
|
||||||
|
public enum Type {
|
||||||
|
PRIMARY, FOREIGN
|
||||||
|
}
|
||||||
|
|
||||||
private String name;
|
private String name;
|
||||||
|
|
||||||
// primary or foreign
|
// primary or foreign
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
package com.tencent.supersonic.semantic.query.parser.calcite.sql;
|
package com.tencent.supersonic.semantic.query.parser.calcite.sql;
|
||||||
|
|
||||||
|
import com.tencent.supersonic.semantic.query.parser.calcite.dsl.DataSource;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
@@ -25,6 +26,8 @@ public class TableView {
|
|||||||
private String alias;
|
private String alias;
|
||||||
private List<String> primary;
|
private List<String> primary;
|
||||||
|
|
||||||
|
private DataSource dataSource;
|
||||||
|
|
||||||
public SqlNode build() {
|
public SqlNode build() {
|
||||||
measure.addAll(dimension);
|
measure.addAll(dimension);
|
||||||
SqlNodeList dimensionNodeList = null;
|
SqlNodeList dimensionNodeList = null;
|
||||||
|
|||||||
@@ -1,6 +1,11 @@
|
|||||||
package com.tencent.supersonic.semantic.query.parser.calcite.sql.node;
|
package com.tencent.supersonic.semantic.query.parser.calcite.sql.node;
|
||||||
|
|
||||||
import com.tencent.supersonic.semantic.query.parser.calcite.dsl.Identify;
|
import com.tencent.supersonic.semantic.query.parser.calcite.dsl.Identify;
|
||||||
|
import com.tencent.supersonic.semantic.query.parser.calcite.dsl.Identify.Type;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
import org.apache.calcite.sql.SqlNode;
|
import org.apache.calcite.sql.SqlNode;
|
||||||
import org.apache.calcite.sql.validate.SqlValidatorScope;
|
import org.apache.calcite.sql.validate.SqlValidatorScope;
|
||||||
|
|
||||||
@@ -9,4 +14,28 @@ public class IdentifyNode extends SemanticNode {
|
|||||||
public static SqlNode build(Identify identify, SqlValidatorScope scope) throws Exception {
|
public static SqlNode build(Identify identify, SqlValidatorScope scope) throws Exception {
|
||||||
return parse(identify.getName(), scope);
|
return parse(identify.getName(), scope);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static Set<String> getIdentifyNames(List<Identify> identifies, Identify.Type type) {
|
||||||
|
return identifies.stream().filter(i -> type.name().equalsIgnoreCase(i.getType())).map(i -> i.getName())
|
||||||
|
.collect(Collectors.toSet());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public static boolean isForeign(String name, List<Identify> identifies) {
|
||||||
|
Optional<Identify> identify = identifies.stream().filter(i -> i.getName().equalsIgnoreCase(name))
|
||||||
|
.findFirst();
|
||||||
|
if (identify.isPresent()) {
|
||||||
|
return Type.FOREIGN.name().equalsIgnoreCase(identify.get().getType());
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static boolean isPrimary(String name, List<Identify> identifies) {
|
||||||
|
Optional<Identify> identify = identifies.stream().filter(i -> i.getName().equalsIgnoreCase(name))
|
||||||
|
.findFirst();
|
||||||
|
if (identify.isPresent()) {
|
||||||
|
return Type.PRIMARY.name().equalsIgnoreCase(identify.get().getType());
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import com.tencent.supersonic.semantic.query.parser.calcite.dsl.Constants;
|
|||||||
import com.tencent.supersonic.semantic.query.parser.calcite.dsl.DataSource;
|
import com.tencent.supersonic.semantic.query.parser.calcite.dsl.DataSource;
|
||||||
import com.tencent.supersonic.semantic.query.parser.calcite.dsl.Dimension;
|
import com.tencent.supersonic.semantic.query.parser.calcite.dsl.Dimension;
|
||||||
import com.tencent.supersonic.semantic.query.parser.calcite.dsl.Identify;
|
import com.tencent.supersonic.semantic.query.parser.calcite.dsl.Identify;
|
||||||
|
import com.tencent.supersonic.semantic.query.parser.calcite.dsl.Identify.Type;
|
||||||
import com.tencent.supersonic.semantic.query.parser.calcite.dsl.Metric;
|
import com.tencent.supersonic.semantic.query.parser.calcite.dsl.Metric;
|
||||||
import com.tencent.supersonic.semantic.query.parser.calcite.schema.SemanticSchema;
|
import com.tencent.supersonic.semantic.query.parser.calcite.schema.SemanticSchema;
|
||||||
import com.tencent.supersonic.semantic.query.parser.calcite.sql.Renderer;
|
import com.tencent.supersonic.semantic.query.parser.calcite.sql.Renderer;
|
||||||
@@ -12,15 +13,20 @@ import com.tencent.supersonic.semantic.query.parser.calcite.sql.TableView;
|
|||||||
import com.tencent.supersonic.semantic.query.parser.calcite.sql.node.AggFunctionNode;
|
import com.tencent.supersonic.semantic.query.parser.calcite.sql.node.AggFunctionNode;
|
||||||
import com.tencent.supersonic.semantic.query.parser.calcite.sql.node.DataSourceNode;
|
import com.tencent.supersonic.semantic.query.parser.calcite.sql.node.DataSourceNode;
|
||||||
import com.tencent.supersonic.semantic.query.parser.calcite.sql.node.FilterNode;
|
import com.tencent.supersonic.semantic.query.parser.calcite.sql.node.FilterNode;
|
||||||
|
import com.tencent.supersonic.semantic.query.parser.calcite.sql.node.IdentifyNode;
|
||||||
import com.tencent.supersonic.semantic.query.parser.calcite.sql.node.MetricNode;
|
import com.tencent.supersonic.semantic.query.parser.calcite.sql.node.MetricNode;
|
||||||
import com.tencent.supersonic.semantic.query.parser.calcite.sql.node.SemanticNode;
|
import com.tencent.supersonic.semantic.query.parser.calcite.sql.node.SemanticNode;
|
||||||
|
import java.util.ArrayDeque;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Comparator;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
import java.util.Queue;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
@@ -33,6 +39,7 @@ import org.apache.calcite.sql.SqlNode;
|
|||||||
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
|
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
|
||||||
import org.apache.calcite.sql.parser.SqlParserPos;
|
import org.apache.calcite.sql.parser.SqlParserPos;
|
||||||
import org.apache.calcite.sql.validate.SqlValidatorScope;
|
import org.apache.calcite.sql.validate.SqlValidatorScope;
|
||||||
|
import org.springframework.util.CollectionUtils;
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
public class JoinRender extends Renderer {
|
public class JoinRender extends Renderer {
|
||||||
@@ -41,6 +48,7 @@ public class JoinRender extends Renderer {
|
|||||||
public void render(MetricReq metricCommand, List<DataSource> dataSources, SqlValidatorScope scope,
|
public void render(MetricReq metricCommand, List<DataSource> dataSources, SqlValidatorScope scope,
|
||||||
SemanticSchema schema, boolean nonAgg) throws Exception {
|
SemanticSchema schema, boolean nonAgg) throws Exception {
|
||||||
String queryWhere = metricCommand.getWhere();
|
String queryWhere = metricCommand.getWhere();
|
||||||
|
dataSources = getOrderSource(dataSources);
|
||||||
Set<String> whereFields = new HashSet<>();
|
Set<String> whereFields = new HashSet<>();
|
||||||
List<String> fieldWhere = new ArrayList<>();
|
List<String> fieldWhere = new ArrayList<>();
|
||||||
if (queryWhere != null && !queryWhere.isEmpty()) {
|
if (queryWhere != null && !queryWhere.isEmpty()) {
|
||||||
@@ -95,6 +103,7 @@ public class JoinRender extends Renderer {
|
|||||||
String alias = Constants.JOIN_TABLE_PREFIX + dataSource.getName();
|
String alias = Constants.JOIN_TABLE_PREFIX + dataSource.getName();
|
||||||
tableView.setAlias(alias);
|
tableView.setAlias(alias);
|
||||||
tableView.setPrimary(primary);
|
tableView.setPrimary(primary);
|
||||||
|
tableView.setDataSource(dataSource);
|
||||||
if (left == null) {
|
if (left == null) {
|
||||||
leftTable = tableView;
|
leftTable = tableView;
|
||||||
left = SemanticNode.buildAs(tableView.getAlias(), getTable(tableView, scope));
|
left = SemanticNode.buildAs(tableView.getAlias(), getTable(tableView, scope));
|
||||||
@@ -246,7 +255,7 @@ public class JoinRender extends Renderer {
|
|||||||
|
|
||||||
private SqlNode getCondition(TableView left, TableView right, DataSource dataSource, SemanticSchema schema,
|
private SqlNode getCondition(TableView left, TableView right, DataSource dataSource, SemanticSchema schema,
|
||||||
SqlValidatorScope scope) throws Exception {
|
SqlValidatorScope scope) throws Exception {
|
||||||
log.info(left.getClass().toString());
|
|
||||||
Set<String> selectLeft = SemanticNode.getSelect(left.getTable());
|
Set<String> selectLeft = SemanticNode.getSelect(left.getTable());
|
||||||
Set<String> selectRight = SemanticNode.getSelect(right.getTable());
|
Set<String> selectRight = SemanticNode.getSelect(right.getTable());
|
||||||
selectLeft.retainAll(selectRight);
|
selectLeft.retainAll(selectRight);
|
||||||
@@ -255,6 +264,16 @@ public class JoinRender extends Renderer {
|
|||||||
if (!SourceRender.isDimension(on, dataSource, schema)) {
|
if (!SourceRender.isDimension(on, dataSource, schema)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if (IdentifyNode.isForeign(on, left.getDataSource().getIdentifiers())) {
|
||||||
|
if (!IdentifyNode.isPrimary(on, right.getDataSource().getIdentifiers())) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (IdentifyNode.isForeign(on, right.getDataSource().getIdentifiers())) {
|
||||||
|
if (!IdentifyNode.isPrimary(on, left.getDataSource().getIdentifiers())) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
List<SqlNode> ons = new ArrayList<>();
|
List<SqlNode> ons = new ArrayList<>();
|
||||||
ons.add(SemanticNode.parse(left.getAlias() + "." + on, scope));
|
ons.add(SemanticNode.parse(left.getAlias() + "." + on, scope));
|
||||||
ons.add(SemanticNode.parse(right.getAlias() + "." + on, scope));
|
ons.add(SemanticNode.parse(right.getAlias() + "." + on, scope));
|
||||||
@@ -276,4 +295,85 @@ public class JoinRender extends Renderer {
|
|||||||
}
|
}
|
||||||
return condition;
|
return condition;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private List<DataSource> getOrderSource(List<DataSource> dataSources) throws Exception {
|
||||||
|
if (CollectionUtils.isEmpty(dataSources) || dataSources.size() <= 2) {
|
||||||
|
return dataSources;
|
||||||
|
}
|
||||||
|
Map<String, Set<String>> next = new HashMap<>();
|
||||||
|
Map<String, Boolean> visited = new HashMap<>();
|
||||||
|
Map<String, List<Identify>> dataSourceIdentifies = new HashMap<>();
|
||||||
|
dataSources.stream().forEach(d -> {
|
||||||
|
next.put(d.getName(), new HashSet<>());
|
||||||
|
visited.put(d.getName(), false);
|
||||||
|
dataSourceIdentifies.put(d.getName(), d.getIdentifiers());
|
||||||
|
});
|
||||||
|
int cnt = dataSources.size();
|
||||||
|
List<Map.Entry<String, List<Identify>>> dataSourceIdentifyList = dataSourceIdentifies.entrySet().stream()
|
||||||
|
.collect(
|
||||||
|
Collectors.toList());
|
||||||
|
for (int i = 0; i < cnt; i++) {
|
||||||
|
for (int j = i + 1; j < cnt; j++) {
|
||||||
|
Set<String> primaries = IdentifyNode.getIdentifyNames(dataSourceIdentifyList.get(i).getValue(),
|
||||||
|
Type.PRIMARY);
|
||||||
|
Set<String> foreign = IdentifyNode.getIdentifyNames(dataSourceIdentifyList.get(i).getValue(),
|
||||||
|
Type.FOREIGN);
|
||||||
|
Set<String> nextPrimaries = IdentifyNode.getIdentifyNames(dataSourceIdentifyList.get(j).getValue(),
|
||||||
|
Type.PRIMARY);
|
||||||
|
Set<String> nextForeign = IdentifyNode.getIdentifyNames(dataSourceIdentifyList.get(j).getValue(),
|
||||||
|
Type.FOREIGN);
|
||||||
|
Set<String> nextAll = new HashSet<>();
|
||||||
|
nextAll.addAll(nextPrimaries);
|
||||||
|
nextAll.addAll(nextForeign);
|
||||||
|
primaries.retainAll(nextPrimaries);
|
||||||
|
foreign.retainAll(nextPrimaries);
|
||||||
|
if (primaries.size() > 0 || foreign.size() > 0) {
|
||||||
|
next.get(dataSourceIdentifyList.get(i).getKey()).add(dataSourceIdentifyList.get(j).getKey());
|
||||||
|
next.get(dataSourceIdentifyList.get(j).getKey()).add(dataSourceIdentifyList.get(i).getKey());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Queue<String> paths = new ArrayDeque<>();
|
||||||
|
for (String id : visited.keySet()) {
|
||||||
|
if (!visited.get(id)) {
|
||||||
|
joinOrder(cnt, id, next, paths, visited);
|
||||||
|
if (paths.size() >= cnt) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (paths.size() < cnt) {
|
||||||
|
throw new Exception("datasource cant join,pls check identify :" + dataSources.stream()
|
||||||
|
.map(d -> d.getName()).collect(
|
||||||
|
Collectors.joining(",")));
|
||||||
|
}
|
||||||
|
List<String> orderList = new ArrayList<>(paths);
|
||||||
|
Collections.sort(dataSources, new Comparator<DataSource>() {
|
||||||
|
@Override
|
||||||
|
public int compare(DataSource o1, DataSource o2) {
|
||||||
|
return orderList.indexOf(o1.getName()) - orderList.indexOf(o2.getName());
|
||||||
|
}
|
||||||
|
});
|
||||||
|
return dataSources;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void joinOrder(int cnt, String id, Map<String, Set<String>> next, Queue<String> orders,
|
||||||
|
Map<String, Boolean> visited) {
|
||||||
|
visited.put(id, true);
|
||||||
|
orders.add(id);
|
||||||
|
if (orders.size() >= cnt) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
for (String nextId : next.get(id)) {
|
||||||
|
if (!visited.get(nextId)) {
|
||||||
|
joinOrder(cnt, nextId, next, orders, visited);
|
||||||
|
if (orders.size() >= cnt) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
orders.poll();
|
||||||
|
visited.put(id, false);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||