[improvement](project) Adding tag abstraction to the dictionary and optimizing related code. (#785)

This commit is contained in:
lexluo09
2024-03-05 18:00:48 +08:00
committed by GitHub
parent ba83c6ca81
commit b8ecfc6a99
27 changed files with 335 additions and 261 deletions

View File

@@ -43,6 +43,9 @@ public class SemanticSchema implements Serializable {
case VALUE:
element = getElementsById(elementID, getDimensionValues());
break;
case TAG:
element = getElementsById(elementID, getTags());
break;
default:
}

View File

@@ -12,6 +12,8 @@ import com.tencent.supersonic.headless.api.pojo.SchemaItem;
import com.tencent.supersonic.headless.api.pojo.response.DimSchemaResp;
import com.tencent.supersonic.headless.api.pojo.response.MetricSchemaResp;
import com.tencent.supersonic.headless.api.pojo.response.DataSetSchemaResp;
import com.tencent.supersonic.headless.api.pojo.response.TagResp;
import java.util.Objects;
import org.apache.logging.log4j.util.Strings;
import org.springframework.beans.BeanUtils;
import org.springframework.util.CollectionUtils;
@@ -37,6 +39,120 @@ public class DataSetSchemaBuilder {
.build();
dataSetSchema.setDataSet(dataSet);
Set<SchemaElement> metrics = getMetrics(resp);
dataSetSchema.getMetrics().addAll(metrics);
Set<SchemaElement> dimensions = getDimensions(resp);
dataSetSchema.getDimensions().addAll(dimensions);
Set<SchemaElement> dimensionValues = getDimensionValues(resp);
dataSetSchema.getDimensionValues().addAll(dimensionValues);
Set<SchemaElement> tags = getTags(resp);
dataSetSchema.getTags().addAll(tags);
SchemaElement entity = getEntity(resp);
if (Objects.nonNull(entity)) {
dataSetSchema.setEntity(entity);
}
return dataSetSchema;
}
private static SchemaElement getEntity(DataSetSchemaResp resp) {
DimSchemaResp dim = resp.getPrimaryKey();
if (Objects.isNull(dim)) {
return null;
}
return SchemaElement.builder()
.dataSet(resp.getId())
.model(dim.getModelId())
.id(dim.getId())
.name(dim.getName())
.bizName(dim.getBizName())
.type(SchemaElementType.ENTITY)
.useCnt(dim.getUseCnt())
.alias(dim.getEntityAlias())
.build();
}
private static Set<SchemaElement> getTags(DataSetSchemaResp resp) {
Set<SchemaElement> tags = new HashSet<>();
List<TagResp> tagResps = resp.getTags();
for (TagResp tagResp : tagResps) {
SchemaElement element = SchemaElement.builder()
.dataSet(resp.getId())
.model(tagResp.getModelId())
.id(tagResp.getId())
.name(tagResp.getName())
.bizName(tagResp.getBizName())
.type(SchemaElementType.TAG)
.build();
tags.add(element);
}
return tags;
}
private static Set<SchemaElement> getDimensions(DataSetSchemaResp resp) {
Set<SchemaElement> dimensions = new HashSet<>();
for (DimSchemaResp dim : resp.getDimensions()) {
List<String> alias = SchemaItem.getAliasList(dim.getAlias());
List<DimValueMap> dimValueMaps = dim.getDimValueMaps();
List<SchemaValueMap> schemaValueMaps = new ArrayList<>();
if (!CollectionUtils.isEmpty(dimValueMaps)) {
for (DimValueMap dimValueMap : dimValueMaps) {
SchemaValueMap schemaValueMap = new SchemaValueMap();
BeanUtils.copyProperties(dimValueMap, schemaValueMap);
schemaValueMaps.add(schemaValueMap);
}
}
SchemaElement dimToAdd = SchemaElement.builder()
.dataSet(resp.getId())
.model(dim.getModelId())
.id(dim.getId())
.name(dim.getName())
.bizName(dim.getBizName())
.type(SchemaElementType.DIMENSION)
.useCnt(dim.getUseCnt())
.alias(alias)
.schemaValueMaps(schemaValueMaps)
.build();
dimensions.add(dimToAdd);
}
return dimensions;
}
private static Set<SchemaElement> getDimensionValues(DataSetSchemaResp resp) {
Set<SchemaElement> dimensionValues = new HashSet<>();
for (DimSchemaResp dim : resp.getDimensions()) {
Set<String> dimValueAlias = new HashSet<>();
List<DimValueMap> dimValueMaps = dim.getDimValueMaps();
if (!CollectionUtils.isEmpty(dimValueMaps)) {
for (DimValueMap dimValueMap : dimValueMaps) {
if (Strings.isNotEmpty(dimValueMap.getBizName())) {
dimValueAlias.add(dimValueMap.getBizName());
}
if (!CollectionUtils.isEmpty(dimValueMap.getAlias())) {
dimValueAlias.addAll(dimValueMap.getAlias());
}
}
}
SchemaElement dimValueToAdd = SchemaElement.builder()
.dataSet(resp.getId())
.model(dim.getModelId())
.id(dim.getId())
.name(dim.getName())
.bizName(dim.getBizName())
.type(SchemaElementType.VALUE)
.useCnt(dim.getUseCnt())
.alias(new ArrayList<>(Arrays.asList(dimValueAlias.toArray(new String[0]))))
.build();
dimensionValues.add(dimValueToAdd);
}
return dimensionValues;
}
private static Set<SchemaElement> getMetrics(DataSetSchemaResp resp) {
Set<SchemaElement> metrics = new HashSet<>();
for (MetricSchemaResp metric : resp.getMetrics()) {
@@ -57,90 +173,7 @@ public class DataSetSchemaBuilder {
metrics.add(metricToAdd);
}
dataSetSchema.getMetrics().addAll(metrics);
Set<SchemaElement> dimensions = new HashSet<>();
Set<SchemaElement> dimensionValues = new HashSet<>();
Set<SchemaElement> tags = new HashSet<>();
for (DimSchemaResp dim : resp.getDimensions()) {
List<String> alias = SchemaItem.getAliasList(dim.getAlias());
Set<String> dimValueAlias = new HashSet<>();
List<DimValueMap> dimValueMaps = dim.getDimValueMaps();
List<SchemaValueMap> schemaValueMaps = new ArrayList<>();
if (!CollectionUtils.isEmpty(dimValueMaps)) {
for (DimValueMap dimValueMap : dimValueMaps) {
if (Strings.isNotEmpty(dimValueMap.getBizName())) {
dimValueAlias.add(dimValueMap.getBizName());
}
if (!CollectionUtils.isEmpty(dimValueMap.getAlias())) {
dimValueAlias.addAll(dimValueMap.getAlias());
}
SchemaValueMap schemaValueMap = new SchemaValueMap();
BeanUtils.copyProperties(dimValueMap, schemaValueMap);
schemaValueMaps.add(schemaValueMap);
}
}
SchemaElement dimToAdd = SchemaElement.builder()
.dataSet(resp.getId())
.model(dim.getModelId())
.id(dim.getId())
.name(dim.getName())
.bizName(dim.getBizName())
.type(SchemaElementType.DIMENSION)
.useCnt(dim.getUseCnt())
.alias(alias)
.schemaValueMaps(schemaValueMaps)
.build();
dimensions.add(dimToAdd);
SchemaElement dimValueToAdd = SchemaElement.builder()
.dataSet(resp.getId())
.model(dim.getModelId())
.id(dim.getId())
.name(dim.getName())
.bizName(dim.getBizName())
.type(SchemaElementType.VALUE)
.useCnt(dim.getUseCnt())
.alias(new ArrayList<>(Arrays.asList(dimValueAlias.toArray(new String[0]))))
.build();
dimensionValues.add(dimValueToAdd);
if (dim.getIsTag() == 1) {
SchemaElement tagToAdd = SchemaElement.builder()
.dataSet(resp.getId())
.model(dim.getModelId())
.id(dim.getId())
.name(dim.getName())
.bizName(dim.getBizName())
.type(SchemaElementType.TAG)
.useCnt(dim.getUseCnt())
.alias(alias)
.schemaValueMaps(schemaValueMaps)
.build();
tags.add(tagToAdd);
}
}
dataSetSchema.getDimensions().addAll(dimensions);
dataSetSchema.getDimensionValues().addAll(dimensionValues);
dataSetSchema.getTags().addAll(tags);
DimSchemaResp dim = resp.getPrimaryKey();
if (dim != null) {
SchemaElement entity = SchemaElement.builder()
.dataSet(resp.getId())
.model(dim.getModelId())
.id(dim.getId())
.name(dim.getName())
.bizName(dim.getBizName())
.type(SchemaElementType.ENTITY)
.useCnt(dim.getUseCnt())
.alias(dim.getEntityAlias())
.build();
dataSetSchema.setEntity(entity);
}
return dataSetSchema;
return metrics;
}
private static List<RelatedSchemaElement> getRelateSchemaElement(MetricSchemaResp metricSchemaResp) {

View File

@@ -32,6 +32,7 @@ public class WordService {
addWordsByType(DictWordType.METRIC, semanticSchema.getMetrics(), words);
addWordsByType(DictWordType.ENTITY, semanticSchema.getEntities(), words);
addWordsByType(DictWordType.VALUE, semanticSchema.getDimensionValues(), words);
addWordsByType(DictWordType.TAG, semanticSchema.getTags(), words);
return words;
}

View File

@@ -32,11 +32,6 @@ public class SqlSelectFunctionHelper {
return SqlSelectHelper.hasGroupBy(sql);
}
public static void main(String[] args) {
String sql = "select a from table";
System.out.println(hasAggregateFunction(sql));
}
public static boolean hasFunction(String sql, String functionName) {
Set<String> functions = getFunctions(sql);
if (!CollectionUtils.isEmpty(functions)) {

View File

@@ -20,9 +20,12 @@ public class DataSetModelConfig {
private List<Long> dimensions = Lists.newArrayList();
public DataSetModelConfig(Long id, List<Long> dimensions, List<Long> metrics) {
private List<Long> tagIds = Lists.newArrayList();
public DataSetModelConfig(Long id, List<Long> dimensions, List<Long> metrics, List<Long> tagIds) {
this.id = id;
this.metrics = metrics;
this.dimensions = dimensions;
this.tagIds = tagIds;
}
}

View File

@@ -45,6 +45,11 @@ public class DataSetResp extends SchemaItem {
.collect(Collectors.toList());
}
public List<Long> getAllTags() {
return getDataSetModelConfigs().stream().map(DataSetModelConfig::getTagIds)
.flatMap(Collection::stream).collect(Collectors.toList());
}
public List<Long> getAllIncludeAllModels() {
return getDataSetModelConfigs().stream().filter(DataSetModelConfig::isIncludesAll)
.map(DataSetModelConfig::getId)

View File

@@ -15,6 +15,8 @@ public class DataSetSchemaResp extends DataSetResp {
private List<MetricSchemaResp> metrics = Lists.newArrayList();
private List<DimSchemaResp> dimensions = Lists.newArrayList();
private List<TagResp> tags = Lists.newArrayList();
private List<ModelResp> modelResps = Lists.newArrayList();
public DimSchemaResp getPrimaryKey() {

View File

@@ -35,4 +35,5 @@ public abstract class BaseWordBuilder {
}
protected abstract List<DictWord> doGet(String word, SchemaElement schemaElement);
}

View File

@@ -0,0 +1,25 @@
package com.tencent.supersonic.headless.core.knowledge.builder;
import com.tencent.supersonic.headless.api.pojo.SchemaElement;
import com.tencent.supersonic.headless.core.knowledge.DictWord;
import java.util.ArrayList;
import java.util.List;
import org.springframework.util.CollectionUtils;
public abstract class BaseWordWithAliasBuilder extends BaseWordBuilder {
public abstract DictWord getOneWordNature(String word, SchemaElement schemaElement, boolean isSuffix);
public List<DictWord> getOneWordNatureAlias(SchemaElement schemaElement, boolean isSuffix) {
List<DictWord> dictWords = new ArrayList<>();
if (CollectionUtils.isEmpty(schemaElement.getAlias())) {
return dictWords;
}
for (String alias : schemaElement.getAlias()) {
dictWords.add(getOneWordNature(alias, schemaElement, isSuffix));
}
return dictWords;
}
}

View File

@@ -1,41 +1,32 @@
package com.tencent.supersonic.headless.core.knowledge.builder;
import com.google.common.collect.Lists;
import com.tencent.supersonic.common.pojo.enums.DictWordType;
import com.tencent.supersonic.headless.api.pojo.SchemaElement;
import com.tencent.supersonic.headless.core.knowledge.DictWord;
import com.tencent.supersonic.common.pojo.enums.DictWordType;
import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.springframework.stereotype.Service;
/**
* dimension word nature
*/
@Service
public class DimensionWordBuilder extends BaseWordBuilder {
@Value("${nlp.dimension.use.suffix:true}")
private boolean nlpDimensionUseSuffix = true;
public class DimensionWordBuilder extends BaseWordWithAliasBuilder {
@Override
public List<DictWord> doGet(String word, SchemaElement schemaElement) {
List<DictWord> result = Lists.newArrayList();
result.add(getOnwWordNature(word, schemaElement, false));
result.addAll(getOnwWordNatureAlias(schemaElement, false));
if (nlpDimensionUseSuffix) {
String reverseWord = StringUtils.reverse(word);
if (StringUtils.isNotEmpty(word) && !word.equalsIgnoreCase(reverseWord)) {
result.add(getOnwWordNature(reverseWord, schemaElement, true));
}
result.add(getOneWordNature(word, schemaElement, false));
result.addAll(getOneWordNatureAlias(schemaElement, false));
String reverseWord = StringUtils.reverse(word);
if (StringUtils.isNotEmpty(word) && !word.equalsIgnoreCase(reverseWord)) {
result.add(getOneWordNature(reverseWord, schemaElement, true));
}
return result;
}
private DictWord getOnwWordNature(String word, SchemaElement schemaElement, boolean isSuffix) {
public DictWord getOneWordNature(String word, SchemaElement schemaElement, boolean isSuffix) {
DictWord dictWord = new DictWord();
dictWord.setWord(word);
Long modelId = schemaElement.getModel();
@@ -49,16 +40,4 @@ public class DimensionWordBuilder extends BaseWordBuilder {
return dictWord;
}
private List<DictWord> getOnwWordNatureAlias(SchemaElement schemaElement, boolean isSuffix) {
List<DictWord> dictWords = new ArrayList<>();
if (CollectionUtils.isEmpty(schemaElement.getAlias())) {
return dictWords;
}
for (String alias : schemaElement.getAlias()) {
dictWords.add(getOnwWordNature(alias, schemaElement, false));
}
return dictWords;
}
}

View File

@@ -2,44 +2,38 @@ package com.tencent.supersonic.headless.core.knowledge.builder;
import com.google.common.collect.Lists;
import com.tencent.supersonic.common.pojo.enums.DictWordType;
import com.tencent.supersonic.headless.api.pojo.SchemaElement;
import com.tencent.supersonic.headless.core.knowledge.DictWord;
import com.tencent.supersonic.common.pojo.enums.DictWordType;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils;
import java.util.List;
import java.util.Objects;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
/**
* dimension value wordNature
*/
@Service
@Slf4j
public class EntityWordBuilder extends BaseWordBuilder {
public class EntityWordBuilder extends BaseWordWithAliasBuilder {
@Override
public List<DictWord> doGet(String word, SchemaElement schemaElement) {
List<DictWord> result = Lists.newArrayList();
if (Objects.isNull(schemaElement)) {
return result;
}
Long modelId = schemaElement.getModel();
String nature = DictWordType.NATURE_SPILT + modelId + DictWordType.NATURE_SPILT + schemaElement.getId()
+ DictWordType.ENTITY.getType();
if (!CollectionUtils.isEmpty(schemaElement.getAlias())) {
schemaElement.getAlias().stream().forEach(alias -> {
DictWord dictWordAlias = new DictWord();
dictWordAlias.setWord(alias);
dictWordAlias.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY * 2, nature));
result.add(dictWordAlias);
});
}
result.add(getOneWordNature(word, schemaElement, false));
result.addAll(getOneWordNatureAlias(schemaElement, false));
return result;
}
@Override
public DictWord getOneWordNature(String word, SchemaElement schemaElement, boolean isSuffix) {
String nature = DictWordType.NATURE_SPILT + schemaElement.getModel()
+ DictWordType.NATURE_SPILT + schemaElement.getId() + DictWordType.ENTITY.getType();
DictWord dictWord = new DictWord();
dictWord.setWord(word);
dictWord.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY * 2, nature));
return dictWord;
}
}

View File

@@ -1,41 +1,32 @@
package com.tencent.supersonic.headless.core.knowledge.builder;
import com.google.common.collect.Lists;
import com.tencent.supersonic.common.pojo.enums.DictWordType;
import com.tencent.supersonic.headless.api.pojo.SchemaElement;
import com.tencent.supersonic.headless.core.knowledge.DictWord;
import com.tencent.supersonic.common.pojo.enums.DictWordType;
import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.springframework.stereotype.Service;
/**
* Metric DictWord
*/
@Service
public class MetricWordBuilder extends BaseWordBuilder {
@Value("${nlp.metric.use.suffix:true}")
private boolean nlpMetricUseSuffix = true;
public class MetricWordBuilder extends BaseWordWithAliasBuilder {
@Override
public List<DictWord> doGet(String word, SchemaElement schemaElement) {
List<DictWord> result = Lists.newArrayList();
result.add(getOnwWordNature(word, schemaElement, false));
result.addAll(getOnwWordNatureAlias(schemaElement, false));
if (nlpMetricUseSuffix) {
String reverseWord = StringUtils.reverse(word);
if (!word.equalsIgnoreCase(reverseWord)) {
result.add(getOnwWordNature(reverseWord, schemaElement, true));
}
result.add(getOneWordNature(word, schemaElement, false));
result.addAll(getOneWordNatureAlias(schemaElement, false));
String reverseWord = StringUtils.reverse(word);
if (!word.equalsIgnoreCase(reverseWord)) {
result.add(getOneWordNature(reverseWord, schemaElement, true));
}
return result;
}
private DictWord getOnwWordNature(String word, SchemaElement schemaElement, boolean isSuffix) {
public DictWord getOneWordNature(String word, SchemaElement schemaElement, boolean isSuffix) {
DictWord dictWord = new DictWord();
dictWord.setWord(word);
Long modelId = schemaElement.getModel();
@@ -49,16 +40,4 @@ public class MetricWordBuilder extends BaseWordBuilder {
return dictWord;
}
private List<DictWord> getOnwWordNatureAlias(SchemaElement schemaElement, boolean isSuffix) {
List<DictWord> dictWords = new ArrayList<>();
if (CollectionUtils.isEmpty(schemaElement.getAlias())) {
return dictWords;
}
for (String alias : schemaElement.getAlias()) {
dictWords.add(getOnwWordNature(alias, schemaElement, false));
}
return dictWords;
}
}

View File

@@ -1,42 +1,36 @@
package com.tencent.supersonic.headless.core.knowledge.builder;
import com.google.common.collect.Lists;
import com.tencent.supersonic.common.pojo.enums.DictWordType;
import com.tencent.supersonic.headless.api.pojo.SchemaElement;
import com.tencent.supersonic.headless.core.knowledge.DictWord;
import com.tencent.supersonic.common.pojo.enums.DictWordType;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections.CollectionUtils;
import org.springframework.stereotype.Service;
import java.util.List;
import java.util.Objects;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
/**
* model word nature
*/
@Service
@Slf4j
public class ModelWordBuilder extends BaseWordBuilder {
public class ModelWordBuilder extends BaseWordWithAliasBuilder {
@Override
public List<DictWord> doGet(String word, SchemaElement schemaElement) {
List<DictWord> result = Lists.newArrayList();
//modelName
DictWord dictWord = buildDictWord(word, schemaElement.getDataSet());
result.add(dictWord);
//alias
List<String> aliasList = schemaElement.getAlias();
if (CollectionUtils.isNotEmpty(aliasList)) {
for (String alias : aliasList) {
result.add(buildDictWord(alias, schemaElement.getDataSet()));
}
if (Objects.isNull(schemaElement)) {
return result;
}
result.add(getOneWordNature(word, schemaElement, false));
result.addAll(getOneWordNatureAlias(schemaElement, false));
return result;
}
private DictWord buildDictWord(String word, Long modelId) {
public DictWord getOneWordNature(String word, SchemaElement schemaElement, boolean isSuffix) {
DictWord dictWord = new DictWord();
dictWord.setWord(word);
String nature = DictWordType.NATURE_SPILT + modelId;
String nature = DictWordType.NATURE_SPILT + schemaElement.getDataSet();
dictWord.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY, nature));
return dictWord;
}

View File

@@ -0,0 +1,40 @@
package com.tencent.supersonic.headless.core.knowledge.builder;
import com.google.common.collect.Lists;
import com.tencent.supersonic.common.pojo.enums.DictWordType;
import com.tencent.supersonic.headless.api.pojo.SchemaElement;
import com.tencent.supersonic.headless.core.knowledge.DictWord;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.springframework.stereotype.Service;
@Service
public class TagWordBuilder extends BaseWordWithAliasBuilder {
@Override
public List<DictWord> doGet(String word, SchemaElement schemaElement) {
List<DictWord> result = Lists.newArrayList();
result.add(getOneWordNature(word, schemaElement, false));
result.addAll(getOneWordNatureAlias(schemaElement, false));
String reverseWord = StringUtils.reverse(word);
if (!word.equalsIgnoreCase(reverseWord)) {
result.add(getOneWordNature(reverseWord, schemaElement, true));
}
return result;
}
public DictWord getOneWordNature(String word, SchemaElement schemaElement, boolean isSuffix) {
DictWord dictWord = new DictWord();
dictWord.setWord(word);
Long modelId = schemaElement.getModel();
String nature = DictWordType.NATURE_SPILT + modelId + DictWordType.NATURE_SPILT + schemaElement.getId()
+ DictWordType.TAG.getType();
if (isSuffix) {
nature = DictWordType.NATURE_SPILT + modelId + DictWordType.NATURE_SPILT + schemaElement.getId()
+ DictWordType.SUFFIX.getType() + DictWordType.TAG.getType();
}
dictWord.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY, nature));
return dictWord;
}
}

View File

@@ -2,40 +2,34 @@ package com.tencent.supersonic.headless.core.knowledge.builder;
import com.google.common.collect.Lists;
import com.tencent.supersonic.common.pojo.enums.DictWordType;
import com.tencent.supersonic.headless.api.pojo.SchemaElement;
import com.tencent.supersonic.headless.core.knowledge.DictWord;
import com.tencent.supersonic.common.pojo.enums.DictWordType;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils;
import java.util.List;
import java.util.Objects;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
/**
* dimension value wordNature
*/
@Service
@Slf4j
public class ValueWordBuilder extends BaseWordBuilder {
public class ValueWordBuilder extends BaseWordWithAliasBuilder {
@Override
public List<DictWord> doGet(String word, SchemaElement schemaElement) {
List<DictWord> result = Lists.newArrayList();
if (Objects.nonNull(schemaElement) && !CollectionUtils.isEmpty(schemaElement.getAlias())) {
schemaElement.getAlias().stream().forEach(value -> {
DictWord dictWord = new DictWord();
Long modelId = schemaElement.getModel();
String nature = DictWordType.NATURE_SPILT + modelId + DictWordType.NATURE_SPILT + schemaElement.getId();
dictWord.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY, nature));
dictWord.setWord(value);
result.add(dictWord);
});
if (Objects.nonNull(schemaElement)) {
result.addAll(getOneWordNatureAlias(schemaElement, false));
}
log.debug("ValueWordBuilder, result:{}", result);
return result;
}
public DictWord getOneWordNature(String word, SchemaElement schemaElement, boolean isSuffix) {
DictWord dictWord = new DictWord();
Long modelId = schemaElement.getModel();
String nature = DictWordType.NATURE_SPILT + modelId + DictWordType.NATURE_SPILT + schemaElement.getId();
dictWord.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY, nature));
dictWord.setWord(word);
return dictWord;
}
}

View File

@@ -18,6 +18,7 @@ public class WordBuilderFactory {
wordNatures.put(DictWordType.VIEW, new ModelWordBuilder());
wordNatures.put(DictWordType.ENTITY, new EntityWordBuilder());
wordNatures.put(DictWordType.VALUE, new ValueWordBuilder());
wordNatures.put(DictWordType.TAG, new TagWordBuilder());
}
public static BaseWordBuilder get(DictWordType strategyType) {

View File

@@ -107,7 +107,7 @@ public class KnowledgeController {
}
/**
* dailyDictTask-手动离线更新所字典
* dailyDictTask-手动离线更新所字典
*/
@PutMapping("/task/all")
public Boolean dailyDictTask(

View File

@@ -19,7 +19,7 @@ public interface TagService {
TagResp getTag(Long id, User user);
List<TagResp> query(TagFilter tagFilter);
List<TagResp> getTags(TagFilter tagFilter);
PageInfo<TagResp> queryPage(TagFilterPage tagFilterPage, User user);

View File

@@ -50,7 +50,7 @@ import com.tencent.supersonic.headless.server.service.QueryService;
import com.tencent.supersonic.headless.server.utils.QueryReqConverter;
import com.tencent.supersonic.headless.server.utils.QueryUtils;
import com.tencent.supersonic.headless.server.utils.StatUtils;
import com.tencent.supersonic.headless.server.utils.TagReqConverter;
import com.tencent.supersonic.headless.server.utils.TagConverter;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections.CollectionUtils;
@@ -72,7 +72,7 @@ public class QueryServiceImpl implements QueryService {
private StatUtils statUtils;
private final QueryUtils queryUtils;
private final QueryReqConverter queryReqConverter;
private final TagReqConverter tagReqConverter;
private final TagConverter tagConverter;
private final Catalog catalog;
private final AppService appService;
private final QueryCache queryCache;
@@ -84,7 +84,7 @@ public class QueryServiceImpl implements QueryService {
StatUtils statUtils,
QueryUtils queryUtils,
QueryReqConverter queryReqConverter,
TagReqConverter tagReqConverter, Catalog catalog,
TagConverter tagConverter, Catalog catalog,
AppService appService,
QueryCache queryCache,
SemanticSchemaManager semanticSchemaManager,
@@ -93,7 +93,7 @@ public class QueryServiceImpl implements QueryService {
this.statUtils = statUtils;
this.queryUtils = queryUtils;
this.queryReqConverter = queryReqConverter;
this.tagReqConverter = tagReqConverter;
this.tagConverter = tagConverter;
this.catalog = catalog;
this.appService = appService;
this.queryCache = queryCache;
@@ -206,7 +206,7 @@ public class QueryServiceImpl implements QueryService {
SchemaFilterReq filter = buildSchemaFilterReq(queryTagReq);
schemaFilterReq.setModelIds(queryTagReq.getModelIds());
SemanticSchemaResp semanticSchemaResp = catalog.fetchSemanticSchema(filter);
QueryStatement queryStatement = tagReqConverter.convert(queryTagReq, semanticSchemaResp);
QueryStatement queryStatement = tagConverter.convert(queryTagReq, semanticSchemaResp);
queryStatement.setModelIds(queryTagReq.getModelIds());
queryStatement.setEnableOptimize(queryUtils.enableOptimize());
queryStatement.setSemanticSchemaResp(semanticSchemaResp);

View File

@@ -1,5 +1,7 @@
package com.tencent.supersonic.headless.server.service.impl;
import static com.tencent.supersonic.common.pojo.Constants.AT_SYMBOL;
import com.github.pagehelper.PageInfo;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
@@ -12,12 +14,14 @@ import com.tencent.supersonic.common.pojo.enums.TypeEnums;
import com.tencent.supersonic.common.pojo.exception.InvalidArgumentException;
import com.tencent.supersonic.common.util.JsonUtil;
import com.tencent.supersonic.headless.api.pojo.enums.SchemaType;
import com.tencent.supersonic.headless.api.pojo.request.DataSetFilterReq;
import com.tencent.supersonic.headless.api.pojo.request.ItemUseReq;
import com.tencent.supersonic.headless.api.pojo.request.PageDimensionReq;
import com.tencent.supersonic.headless.api.pojo.request.PageMetricReq;
import com.tencent.supersonic.headless.api.pojo.request.SchemaFilterReq;
import com.tencent.supersonic.headless.api.pojo.request.SchemaItemQueryReq;
import com.tencent.supersonic.headless.api.pojo.request.DataSetFilterReq;
import com.tencent.supersonic.headless.api.pojo.response.DataSetResp;
import com.tencent.supersonic.headless.api.pojo.response.DataSetSchemaResp;
import com.tencent.supersonic.headless.api.pojo.response.DatabaseResp;
import com.tencent.supersonic.headless.api.pojo.response.DimSchemaResp;
import com.tencent.supersonic.headless.api.pojo.response.DimensionResp;
@@ -30,10 +34,9 @@ import com.tencent.supersonic.headless.api.pojo.response.ModelResp;
import com.tencent.supersonic.headless.api.pojo.response.ModelSchemaResp;
import com.tencent.supersonic.headless.api.pojo.response.SemanticSchemaResp;
import com.tencent.supersonic.headless.api.pojo.response.TagResp;
import com.tencent.supersonic.headless.api.pojo.response.DataSetResp;
import com.tencent.supersonic.headless.api.pojo.response.DataSetSchemaResp;
import com.tencent.supersonic.headless.server.pojo.MetaFilter;
import com.tencent.supersonic.headless.server.pojo.TagFilter;
import com.tencent.supersonic.headless.server.service.DataSetService;
import com.tencent.supersonic.headless.server.service.DimensionService;
import com.tencent.supersonic.headless.server.service.DomainService;
import com.tencent.supersonic.headless.server.service.MetricService;
@@ -41,16 +44,10 @@ import com.tencent.supersonic.headless.server.service.ModelRelaService;
import com.tencent.supersonic.headless.server.service.ModelService;
import com.tencent.supersonic.headless.server.service.SchemaService;
import com.tencent.supersonic.headless.server.service.TagService;
import com.tencent.supersonic.headless.server.service.DataSetService;
import com.tencent.supersonic.headless.server.utils.DimensionConverter;
import com.tencent.supersonic.headless.server.utils.MetricConverter;
import com.tencent.supersonic.headless.server.utils.StatUtils;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.BeanUtils;
import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils;
import com.tencent.supersonic.headless.server.utils.TagConverter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
@@ -58,8 +55,11 @@ import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import static com.tencent.supersonic.common.pojo.Constants.AT_SYMBOL;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.BeanUtils;
import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils;
@Slf4j
@Service
@@ -119,21 +119,31 @@ public class SchemaServiceImpl implements SchemaService {
}
public List<DataSetSchemaResp> buildDataSetSchema(DataSetFilterReq filter) {
List<DataSetSchemaResp> dataSetSchemaResps = new ArrayList<>();
List<Long> dataSetIds = filter.getDataSetIds();
MetaFilter metaFilter = new MetaFilter();
metaFilter.setStatus(StatusEnum.ONLINE.getCode());
metaFilter.setIds(dataSetIds);
metaFilter.setIds(filter.getDataSetIds());
List<DataSetResp> dataSetResps = dataSetService.getDataSetList(metaFilter);
List<Long> modelIds = dataSetResps.stream().map(DataSetResp::getAllModels)
Map<Long, DataSetResp> dataSetRespMap = getDataSetMap(dataSetResps);
List<Long> modelIds = dataSetRespMap.values().stream().map(DataSetResp::getAllModels)
.flatMap(Collection::stream).collect(Collectors.toList());
metaFilter.setModelIds(modelIds);
metaFilter.setIds(Lists.newArrayList());
List<MetricResp> metricResps = metricService.getMetrics(metaFilter);
List<DimensionResp> dimensionResps = dimensionService.getDimensions(metaFilter);
metaFilter.setIds(modelIds);
List<ModelResp> modelResps = modelService.getModelList(metaFilter);
Map<Long, DataSetResp> dataSetRespMap = getDataSetMap(dataSetResps);
TagFilter tagFilter = new TagFilter();
tagFilter.setModelIds(modelIds);
List<TagResp> tagResps = tagService.getTags(tagFilter);
List<DataSetSchemaResp> dataSetSchemaResps = new ArrayList<>();
for (Long dataSetId : dataSetRespMap.keySet()) {
DataSetResp dataSetResp = dataSetRespMap.get(dataSetId);
if (dataSetResp == null || !StatusEnum.ONLINE.getCode().equals(dataSetResp.getStatus())) {
@@ -149,6 +159,9 @@ public class SchemaServiceImpl implements SchemaService {
dataSetSchemaResp.setMetrics(metricSchemaResps);
dataSetSchemaResp.setModelResps(modelResps.stream().filter(modelResp ->
dataSetResp.getAllModels().contains(modelResp.getId())).collect(Collectors.toList()));
tagResps = TagConverter.filterByDataSet(tagResps, dataSetResp);
dataSetSchemaResp.setTags(tagResps);
dataSetSchemaResps.add(dataSetSchemaResp);
}
fillStaticInfo(dataSetSchemaResps);
@@ -309,7 +322,7 @@ public class SchemaServiceImpl implements SchemaService {
// add tag info
TagFilter tagFilter = new TagFilter();
tagFilter.setModelIds(schemaFilterReq.getModelIds());
List<TagResp> tagResps = tagService.query(tagFilter);
List<TagResp> tagResps = tagService.getTags(tagFilter);
semanticSchemaResp.setTags(tagResps);
}
if (!CollectionUtils.isEmpty(semanticSchemaResp.getModelIds())) {

View File

@@ -123,7 +123,7 @@ public class TagServiceImpl implements TagService {
}
@Override
public List<TagResp> query(TagFilter tagFilter) {
public List<TagResp> getTags(TagFilter tagFilter) {
List<TagDO> tagDOS = tagRepository.query(tagFilter);
if (!CollectionUtils.isEmpty(tagDOS)) {
return tagDOS.stream().map(tagDO -> convert(tagDO)).collect(Collectors.toList());
@@ -154,7 +154,7 @@ public class TagServiceImpl implements TagService {
PageInfo<TagDO> tagDOPageInfo = PageHelper.startPage(tagFilterPage.getCurrent(),
tagFilterPage.getPageSize())
.doSelectPageInfo(() -> query(tagFilter));
.doSelectPageInfo(() -> getTags(tagFilter));
PageInfo<TagResp> pageInfo = new PageInfo<>();
BeanUtils.copyProperties(tagDOPageInfo, pageInfo);
List<TagResp> tagRespList = convertList(tagDOPageInfo.getList(), collectIds);
@@ -223,7 +223,7 @@ public class TagServiceImpl implements TagService {
TagFilter tagFilter = new TagFilter();
tagFilter.setModelIds(Arrays.asList(tagReq.getModelId()));
List<TagResp> tagResps = query(tagFilter);
List<TagResp> tagResps = getTags(tagFilter);
if (!CollectionUtils.isEmpty(tagResps)) {
Long bizNameSameCount = tagResps.stream().filter(tagResp -> !tagResp.getId().equals(tagReq.getId()))
.filter(tagResp -> tagResp.getBizName().equalsIgnoreCase(tagReq.getBizName())).count();

View File

@@ -146,7 +146,7 @@ public class QueryReqConverter {
// if there is no group by in S2SQL,set MetricTable's aggOption to "NATIVE"
// if there is count() in S2SQL,set MetricTable's aggOption to "NATIVE"
String sql = databaseReq.getSql();
if (!SqlSelectHelper.hasGroupBy(sql)
if (!SqlSelectFunctionHelper.hasAggregateFunction(sql)
|| SqlSelectFunctionHelper.hasFunction(sql, "count")
|| SqlSelectFunctionHelper.hasFunction(sql, "count_distinct")) {
return AggOption.NATIVE;

View File

@@ -9,11 +9,17 @@ import com.tencent.supersonic.headless.api.pojo.enums.EngineType;
import com.tencent.supersonic.headless.api.pojo.request.QuerySqlReq;
import com.tencent.supersonic.headless.api.pojo.request.QueryStructReq;
import com.tencent.supersonic.headless.api.pojo.request.QueryTagReq;
import com.tencent.supersonic.headless.api.pojo.response.DataSetResp;
import com.tencent.supersonic.headless.api.pojo.response.DatabaseResp;
import com.tencent.supersonic.headless.api.pojo.response.SemanticSchemaResp;
import com.tencent.supersonic.headless.core.pojo.QueryStatement;
import com.tencent.supersonic.headless.api.pojo.response.TagResp;
import com.tencent.supersonic.headless.core.pojo.DataSetQueryParam;
import com.tencent.supersonic.headless.core.pojo.QueryStatement;
import com.tencent.supersonic.headless.core.utils.SqlGenerateUtils;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections.CollectionUtils;
import org.springframework.beans.BeanUtils;
@@ -21,13 +27,9 @@ import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
@Component
@Slf4j
public class TagReqConverter {
public class TagConverter {
@Value("${query.sql.limitWrapper:true}")
private Boolean limitWrapper;
@@ -95,4 +97,9 @@ public class TagReqConverter {
queryParam.setDimensionFilters(queryTagReq.getTagFilters());
queryParam.setQueryType(QueryType.TAG);
}
public static List<TagResp> filterByDataSet(List<TagResp> tagResps, DataSetResp dataSetResp) {
return tagResps.stream().filter(tagResp -> dataSetResp.getAllTags().contains(tagResp.getId())
|| dataSetResp.getAllIncludeAllModels().contains(tagResp.getModelId())).collect(Collectors.toList());
}
}

View File

@@ -221,10 +221,11 @@ public class BenchMarkDemoDataLoader {
viewReq.setDescription("包含cspider数据集相关标签和指标信息");
viewReq.setAdmins(Lists.newArrayList("admin"));
List<DataSetModelConfig> viewModelConfigs = Lists.newArrayList(
new DataSetModelConfig(5L, Lists.newArrayList(8L), Lists.newArrayList()),
new DataSetModelConfig(6L, Lists.newArrayList(9L, 10L), Lists.newArrayList()),
new DataSetModelConfig(7L, Lists.newArrayList(11L, 12L), Lists.newArrayList()),
new DataSetModelConfig(8L, Lists.newArrayList(13L, 14L), Lists.newArrayList(8L, 9L))
new DataSetModelConfig(5L, Lists.newArrayList(8L), Lists.newArrayList(), Lists.newArrayList()),
new DataSetModelConfig(6L, Lists.newArrayList(9L, 10L), Lists.newArrayList(), Lists.newArrayList()),
new DataSetModelConfig(7L, Lists.newArrayList(11L, 12L), Lists.newArrayList(), Lists.newArrayList()),
new DataSetModelConfig(8L, Lists.newArrayList(13L, 14L), Lists.newArrayList(8L, 9L),
Lists.newArrayList())
);
DataSetDetail viewDetail = new DataSetDetail();
viewDetail.setDataSetModelConfigs(viewModelConfigs);

View File

@@ -262,10 +262,14 @@ public class DuSQLDemoDataLoader {
viewReq.setDescription("DuSQL互联网企业数据源相关的指标和维度等");
viewReq.setAdmins(Lists.newArrayList("admin"));
List<DataSetModelConfig> viewModelConfigs = Lists.newArrayList(
new DataSetModelConfig(9L, Lists.newArrayList(16L, 17L, 18L, 19L, 20L), Lists.newArrayList(10L, 11L)),
new DataSetModelConfig(10L, Lists.newArrayList(21L, 22L, 23L), Lists.newArrayList(12L)),
new DataSetModelConfig(11L, Lists.newArrayList(), Lists.newArrayList(13L, 14L, 15L)),
new DataSetModelConfig(12L, Lists.newArrayList(24L), Lists.newArrayList(16L, 17L, 18L, 19L)));
new DataSetModelConfig(9L, Lists.newArrayList(16L, 17L, 18L, 19L, 20L), Lists.newArrayList(10L, 11L),
Lists.newArrayList()),
new DataSetModelConfig(10L, Lists.newArrayList(21L, 22L, 23L), Lists.newArrayList(12L),
Lists.newArrayList()),
new DataSetModelConfig(11L, Lists.newArrayList(), Lists.newArrayList(13L, 14L, 15L),
Lists.newArrayList()),
new DataSetModelConfig(12L, Lists.newArrayList(24L), Lists.newArrayList(16L, 17L, 18L, 19L),
Lists.newArrayList()));
DataSetDetail viewDetail = new DataSetDetail();
viewDetail.setDataSetModelConfigs(viewModelConfigs);

View File

@@ -453,9 +453,9 @@ public class ModelDemoDataLoader {
dataSetReq.setDescription("包含超音数访问统计相关的指标和维度等");
dataSetReq.setAdmins(Lists.newArrayList("admin"));
List<DataSetModelConfig> dataSetModelConfigs = Lists.newArrayList(
new DataSetModelConfig(1L, Lists.newArrayList(1L, 2L), Lists.newArrayList()),
new DataSetModelConfig(2L, Lists.newArrayList(), Lists.newArrayList(1L, 2L, 3L)),
new DataSetModelConfig(3L, Lists.newArrayList(3L), Lists.newArrayList(4L)));
new DataSetModelConfig(1L, Lists.newArrayList(1L, 2L), Lists.newArrayList(), Lists.newArrayList()),
new DataSetModelConfig(2L, Lists.newArrayList(), Lists.newArrayList(1L, 2L, 3L), Lists.newArrayList()),
new DataSetModelConfig(3L, Lists.newArrayList(3L), Lists.newArrayList(4L), Lists.newArrayList()));
DataSetDetail dataSetDetail = new DataSetDetail();
dataSetDetail.setDataSetModelConfigs(dataSetModelConfigs);
@@ -481,7 +481,7 @@ public class ModelDemoDataLoader {
dataSetReq.setAdmins(Lists.newArrayList("admin", "jack"));
List<DataSetModelConfig> dataSetModelConfigs = Lists.newArrayList(
new DataSetModelConfig(4L, Lists.newArrayList(4L, 5L, 6L, 7L),
Lists.newArrayList(5L, 6L, 7L))
Lists.newArrayList(5L, 6L, 7L), Lists.newArrayList())
);
DataSetDetail dataSetDetail = new DataSetDetail();
dataSetDetail.setDataSetModelConfigs(dataSetModelConfigs);

View File

@@ -61,7 +61,7 @@ s2:
chat-model:
provider: open_ai
openai:
api-key: api_key
api-key: app_key
model-name: gpt-3.5-turbo-16k
temperature: 0.0
timeout: PT60S