(improvement)(Headless) Abstracted tags from dimensions and metrics. (#828)

This commit is contained in:
lexluo09
2024-03-18 12:07:49 +08:00
committed by GitHub
parent 7f24c4c4e0
commit 8733231976
40 changed files with 236 additions and 322 deletions

View File

@@ -13,9 +13,9 @@ public enum DictWordType {
DIMENSION("dimension"), DIMENSION("dimension"),
VALUE("dv"), VALUE("value"),
DATASET("dataset"), DATASET("dataSet"),
ENTITY("entity"), ENTITY("entity"),
@@ -23,8 +23,6 @@ public enum DictWordType {
TAG("tag"), TAG("tag"),
TAG_VALUE("tv"),
SUFFIX("suffix"); SUFFIX("suffix");
public static final String NATURE_SPILT = "_"; public static final String NATURE_SPILT = "_";
@@ -35,7 +33,7 @@ public enum DictWordType {
this.type = type; this.type = type;
} }
public String getTypeWithSpilt() { public String getType() {
return NATURE_SPILT + type; return NATURE_SPILT + type;
} }
@@ -44,7 +42,7 @@ public enum DictWordType {
return null; return null;
} }
for (DictWordType dictWordType : values()) { for (DictWordType dictWordType : values()) {
if (nature.endsWith(dictWordType.getTypeWithSpilt())) { if (nature.endsWith(dictWordType.getType())) {
return dictWordType; return dictWordType;
} }
} }
@@ -54,7 +52,7 @@ public enum DictWordType {
return DATASET; return DATASET;
} }
//dimension value //dimension value
if (natures.length >= 3 && StringUtils.isNumeric(natures[1]) && StringUtils.isNumeric(natures[2])) { if (natures.length == 3 && StringUtils.isNumeric(natures[1]) && StringUtils.isNumeric(natures[2])) {
return VALUE; return VALUE;
} }
return null; return null;
@@ -76,4 +74,5 @@ public enum DictWordType {
} }
return ""; return "";
} }
}
}

View File

@@ -9,13 +9,11 @@ import java.util.Set;
@Data @Data
public class DataSetSchema { public class DataSetSchema {
private SchemaElement dataSet; private SchemaElement dataSet;
private Set<SchemaElement> metrics = new HashSet<>(); private Set<SchemaElement> metrics = new HashSet<>();
private Set<SchemaElement> dimensions = new HashSet<>(); private Set<SchemaElement> dimensions = new HashSet<>();
private Set<SchemaElement> dimensionValues = new HashSet<>();
private Set<SchemaElement> tags = new HashSet<>(); private Set<SchemaElement> tags = new HashSet<>();
private Set<SchemaElement> tagValues = new HashSet<>(); private Set<SchemaElement> dimensionValues = new HashSet<>();
private SchemaElement entity = new SchemaElement(); private SchemaElement entity = new SchemaElement();
private QueryConfig queryConfig; private QueryConfig queryConfig;
private QueryType queryType; private QueryType queryType;
@@ -42,9 +40,6 @@ public class DataSetSchema {
case TAG: case TAG:
element = tags.stream().filter(e -> e.getId() == elementID).findFirst(); element = tags.stream().filter(e -> e.getId() == elementID).findFirst();
break; break;
case TAG_VALUE:
element = tagValues.stream().filter(e -> e.getId() == elementID).findFirst();
break;
default: default:
} }

View File

@@ -6,8 +6,7 @@ public enum SchemaElementType {
DIMENSION, DIMENSION,
VALUE, VALUE,
ENTITY, ENTITY,
TAG,
TAG_VALUE,
ID, ID,
DATE DATE,
TAG
} }

View File

@@ -44,9 +44,6 @@ public class SemanticSchema implements Serializable {
case TAG: case TAG:
element = getElementsById(elementID, getTags()); element = getElementsById(elementID, getTags());
break; break;
case TAG_VALUE:
element = getElementsById(elementID, getTagValues());
break;
default: default:
} }
@@ -85,28 +82,6 @@ public class SemanticSchema implements Serializable {
return dimension.orElse(null); return dimension.orElse(null);
} }
public List<SchemaElement> getTags() {
List<SchemaElement> tags = new ArrayList<>();
dataSetSchemaList.stream().forEach(d -> tags.addAll(d.getTags()));
return tags;
}
public List<SchemaElement> getTags(Long dataSetId) {
List<SchemaElement> tags = getTags();
return getElementsByDataSetId(dataSetId, tags);
}
public List<SchemaElement> getTagValues() {
List<SchemaElement> tags = new ArrayList<>();
dataSetSchemaList.stream().forEach(d -> tags.addAll(d.getTagValues()));
return tags;
}
public List<SchemaElement> getTagValues(Long dataSetId) {
List<SchemaElement> tags = getTagValues();
return getElementsByDataSetId(dataSetId, tags);
}
public List<SchemaElement> getMetrics() { public List<SchemaElement> getMetrics() {
List<SchemaElement> metrics = new ArrayList<>(); List<SchemaElement> metrics = new ArrayList<>();
dataSetSchemaList.stream().forEach(d -> metrics.addAll(d.getMetrics())); dataSetSchemaList.stream().forEach(d -> metrics.addAll(d.getMetrics()));
@@ -129,6 +104,20 @@ public class SemanticSchema implements Serializable {
return getElementsByDataSetId(dataSetId, entities); return getElementsByDataSetId(dataSetId, entities);
} }
public List<SchemaElement> getTags() {
List<SchemaElement> tags = new ArrayList<>();
dataSetSchemaList.stream().forEach(d -> tags.addAll(d.getTags()));
return tags;
}
public List<SchemaElement> getTags(Long dataSetId) {
List<SchemaElement> tags = new ArrayList<>();
dataSetSchemaList.stream().filter(schemaElement ->
dataSetId.equals(schemaElement.getDataSet().getDataSet()))
.forEach(d -> tags.addAll(d.getTags()));
return tags;
}
private List<SchemaElement> getElementsByDataSetId(Long dataSetId, List<SchemaElement> elements) { private List<SchemaElement> getElementsByDataSetId(Long dataSetId, List<SchemaElement> elements) {
return elements.stream() return elements.stream()
.filter(schemaElement -> dataSetId.equals(schemaElement.getDataSet())) .filter(schemaElement -> dataSetId.equals(schemaElement.getDataSet()))

View File

@@ -11,7 +11,7 @@ import javax.validation.constraints.NotNull;
@ToString @ToString
public class ItemValueReq { public class ItemValueReq {
private SchemaElementType type = SchemaElementType.TAG; private SchemaElementType type;
@NotNull @NotNull
private Long itemId; private Long itemId;

View File

@@ -1,6 +1,5 @@
package com.tencent.supersonic.headless.api.pojo.response; package com.tencent.supersonic.headless.api.pojo.response;
import static com.tencent.supersonic.common.pojo.Constants.DICT_VALUE;
import static com.tencent.supersonic.common.pojo.Constants.UNDERLINE; import static com.tencent.supersonic.common.pojo.Constants.UNDERLINE;
import com.tencent.supersonic.common.pojo.enums.StatusEnum; import com.tencent.supersonic.common.pojo.enums.StatusEnum;
@@ -32,10 +31,8 @@ public class DictItemResp {
@NotNull @NotNull
private StatusEnum status; private StatusEnum status;
public String generateNature() { public String getNature() {
return UNDERLINE + modelId + UNDERLINE + itemId + UNDERLINE + type.name().toLowerCase().substring(0, 1) return UNDERLINE + modelId + UNDERLINE + itemId;
+ DICT_VALUE;
} }
public String fetchDictFileName() { public String fetchDictFileName() {

View File

@@ -27,4 +27,5 @@ public class MeasureResp {
private Long modelId; private Long modelId;
private int isTag;
} }

View File

@@ -18,7 +18,7 @@ public class KnowledgeService {
public void updateSemanticKnowledge(List<DictWord> natures) { public void updateSemanticKnowledge(List<DictWord> natures) {
List<DictWord> prefixes = natures.stream() List<DictWord> prefixes = natures.stream()
.filter(entry -> !entry.getNatureWithFrequency().contains(DictWordType.SUFFIX.getTypeWithSpilt())) .filter(entry -> !entry.getNatureWithFrequency().contains(DictWordType.SUFFIX.getType()))
.collect(Collectors.toList()); .collect(Collectors.toList());
for (DictWord nature : prefixes) { for (DictWord nature : prefixes) {
@@ -26,7 +26,7 @@ public class KnowledgeService {
} }
List<DictWord> suffixes = natures.stream() List<DictWord> suffixes = natures.stream()
.filter(entry -> entry.getNatureWithFrequency().contains(DictWordType.SUFFIX.getTypeWithSpilt())) .filter(entry -> entry.getNatureWithFrequency().contains(DictWordType.SUFFIX.getType()))
.collect(Collectors.toList()); .collect(Collectors.toList());
SearchService.loadSuffix(suffixes); SearchService.loadSuffix(suffixes);

View File

@@ -88,7 +88,7 @@ public class SearchService {
entry -> { entry -> {
String name = entry.getKey().replace("#", " "); String name = entry.getKey().replace("#", " ");
List<String> natures = entry.getValue().stream() List<String> natures = entry.getValue().stream()
.map(nature -> nature.replaceAll(DictWordType.SUFFIX.getTypeWithSpilt(), "")) .map(nature -> nature.replaceAll(DictWordType.SUFFIX.getType(), ""))
.collect(Collectors.toList()); .collect(Collectors.toList());
name = StringUtils.reverse(name); name = StringUtils.reverse(name);
return new HanlpMapResult(name, natures, key); return new HanlpMapResult(name, natures, key);
@@ -169,8 +169,8 @@ public class SearchService {
if (Objects.nonNull(natures) && natures.length > 0) { if (Objects.nonNull(natures) && natures.length > 0) {
trie.put(dictWord.getWord(), getValue(natures)); trie.put(dictWord.getWord(), getValue(natures));
} }
if (dictWord.getNature().contains(DictWordType.METRIC.getTypeWithSpilt()) || dictWord.getNature() if (dictWord.getNature().contains(DictWordType.METRIC.getType()) || dictWord.getNature()
.contains(DictWordType.DIMENSION.getTypeWithSpilt())) { .contains(DictWordType.DIMENSION.getType())) {
suffixTrie.remove(dictWord.getWord()); suffixTrie.remove(dictWord.getWord());
} }
} }

View File

@@ -31,10 +31,10 @@ public class DimensionWordBuilder extends BaseWordWithAliasBuilder {
dictWord.setWord(word); dictWord.setWord(word);
Long modelId = schemaElement.getModel(); Long modelId = schemaElement.getModel();
String nature = DictWordType.NATURE_SPILT + modelId + DictWordType.NATURE_SPILT + schemaElement.getId() String nature = DictWordType.NATURE_SPILT + modelId + DictWordType.NATURE_SPILT + schemaElement.getId()
+ DictWordType.DIMENSION.getTypeWithSpilt(); + DictWordType.DIMENSION.getType();
if (isSuffix) { if (isSuffix) {
nature = DictWordType.NATURE_SPILT + modelId + DictWordType.NATURE_SPILT + schemaElement.getId() nature = DictWordType.NATURE_SPILT + modelId + DictWordType.NATURE_SPILT + schemaElement.getId()
+ DictWordType.SUFFIX.getTypeWithSpilt() + DictWordType.DIMENSION.getTypeWithSpilt(); + DictWordType.SUFFIX.getType() + DictWordType.DIMENSION.getType();
} }
dictWord.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY, nature)); dictWord.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY, nature));
return dictWord; return dictWord;

View File

@@ -29,7 +29,7 @@ public class EntityWordBuilder extends BaseWordWithAliasBuilder {
@Override @Override
public DictWord getOneWordNature(String word, SchemaElement schemaElement, boolean isSuffix) { public DictWord getOneWordNature(String word, SchemaElement schemaElement, boolean isSuffix) {
String nature = DictWordType.NATURE_SPILT + schemaElement.getModel() String nature = DictWordType.NATURE_SPILT + schemaElement.getModel()
+ DictWordType.NATURE_SPILT + schemaElement.getId() + DictWordType.ENTITY.getTypeWithSpilt(); + DictWordType.NATURE_SPILT + schemaElement.getId() + DictWordType.ENTITY.getType();
DictWord dictWord = new DictWord(); DictWord dictWord = new DictWord();
dictWord.setWord(word); dictWord.setWord(word);
dictWord.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY * 2, nature)); dictWord.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY * 2, nature));

View File

@@ -31,10 +31,10 @@ public class MetricWordBuilder extends BaseWordWithAliasBuilder {
dictWord.setWord(word); dictWord.setWord(word);
Long modelId = schemaElement.getModel(); Long modelId = schemaElement.getModel();
String nature = DictWordType.NATURE_SPILT + modelId + DictWordType.NATURE_SPILT + schemaElement.getId() String nature = DictWordType.NATURE_SPILT + modelId + DictWordType.NATURE_SPILT + schemaElement.getId()
+ DictWordType.METRIC.getTypeWithSpilt(); + DictWordType.METRIC.getType();
if (isSuffix) { if (isSuffix) {
nature = DictWordType.NATURE_SPILT + modelId + DictWordType.NATURE_SPILT + schemaElement.getId() nature = DictWordType.NATURE_SPILT + modelId + DictWordType.NATURE_SPILT + schemaElement.getId()
+ DictWordType.SUFFIX.getTypeWithSpilt() + DictWordType.METRIC.getTypeWithSpilt(); + DictWordType.SUFFIX.getType() + DictWordType.METRIC.getType();
} }
dictWord.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY, nature)); dictWord.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY, nature));
return dictWord; return dictWord;

View File

@@ -1,41 +0,0 @@
package com.tencent.supersonic.headless.core.chat.knowledge.builder;
import com.google.common.collect.Lists;
import com.tencent.supersonic.common.pojo.enums.DictWordType;
import com.tencent.supersonic.headless.api.pojo.SchemaElement;
import com.tencent.supersonic.headless.core.chat.knowledge.DictWord;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.springframework.stereotype.Service;
@Service
public class TagWordBuilder extends BaseWordWithAliasBuilder {
@Override
public List<DictWord> doGet(String word, SchemaElement schemaElement) {
List<DictWord> result = Lists.newArrayList();
result.add(getOneWordNature(word, schemaElement, false));
result.addAll(getOneWordNatureAlias(schemaElement, false));
String reverseWord = StringUtils.reverse(word);
if (!word.equalsIgnoreCase(reverseWord)) {
result.add(getOneWordNature(reverseWord, schemaElement, true));
}
return result;
}
public DictWord getOneWordNature(String word, SchemaElement schemaElement, boolean isSuffix) {
DictWord dictWord = new DictWord();
dictWord.setWord(word);
Long modelId = schemaElement.getModel();
String nature = DictWordType.NATURE_SPILT + modelId + DictWordType.NATURE_SPILT + schemaElement.getId()
+ DictWordType.TAG.getTypeWithSpilt();
if (isSuffix) {
nature = DictWordType.NATURE_SPILT + modelId + DictWordType.NATURE_SPILT + schemaElement.getId()
+ DictWordType.SUFFIX.getTypeWithSpilt() + DictWordType.TAG.getTypeWithSpilt();
}
dictWord.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY, nature));
return dictWord;
}
}

View File

@@ -18,7 +18,6 @@ public class WordBuilderFactory {
wordNatures.put(DictWordType.DATASET, new ModelWordBuilder()); wordNatures.put(DictWordType.DATASET, new ModelWordBuilder());
wordNatures.put(DictWordType.ENTITY, new EntityWordBuilder()); wordNatures.put(DictWordType.ENTITY, new EntityWordBuilder());
wordNatures.put(DictWordType.VALUE, new ValueWordBuilder()); wordNatures.put(DictWordType.VALUE, new ValueWordBuilder());
wordNatures.put(DictWordType.TAG, new TagWordBuilder());
} }
public static BaseWordBuilder get(DictWordType strategyType) { public static BaseWordBuilder get(DictWordType strategyType) {

View File

@@ -46,12 +46,6 @@ public class NatureHelper {
case VALUE: case VALUE:
result = SchemaElementType.VALUE; result = SchemaElementType.VALUE;
break; break;
case TAG:
result = SchemaElementType.TAG;
break;
case TAG_VALUE:
result = SchemaElementType.TAG_VALUE;
break;
default: default:
break; break;
} }
@@ -60,7 +54,7 @@ public class NatureHelper {
private static boolean isDataSetOrEntity(S2Term term, Integer model) { private static boolean isDataSetOrEntity(S2Term term, Integer model) {
return (DictWordType.NATURE_SPILT + model).equals(term.nature.toString()) || term.nature.toString() return (DictWordType.NATURE_SPILT + model).equals(term.nature.toString()) || term.nature.toString()
.endsWith(DictWordType.ENTITY.getTypeWithSpilt()); .endsWith(DictWordType.ENTITY.getType());
} }
public static Integer getDataSetByNature(Nature nature) { public static Integer getDataSetByNature(Nature nature) {
@@ -134,8 +128,8 @@ public class NatureHelper {
if (split.length <= 1) { if (split.length <= 1) {
return false; return false;
} }
return !nature.endsWith(DictWordType.METRIC.getTypeWithSpilt()) && !nature.endsWith( return !nature.endsWith(DictWordType.METRIC.getType()) && !nature.endsWith(
DictWordType.DIMENSION.getTypeWithSpilt()) DictWordType.DIMENSION.getType())
&& StringUtils.isNumeric(split[1]); && StringUtils.isNumeric(split[1]);
} }
@@ -158,12 +152,12 @@ public class NatureHelper {
private static long getDimensionCount(List<S2Term> terms) { private static long getDimensionCount(List<S2Term> terms) {
return terms.stream().filter(term -> term.nature.startsWith(DictWordType.NATURE_SPILT) && term.nature.toString() return terms.stream().filter(term -> term.nature.startsWith(DictWordType.NATURE_SPILT) && term.nature.toString()
.endsWith(DictWordType.DIMENSION.getTypeWithSpilt())).count(); .endsWith(DictWordType.DIMENSION.getType())).count();
} }
private static long getMetricCount(List<S2Term> terms) { private static long getMetricCount(List<S2Term> terms) {
return terms.stream().filter(term -> term.nature.startsWith(DictWordType.NATURE_SPILT) && term.nature.toString() return terms.stream().filter(term -> term.nature.startsWith(DictWordType.NATURE_SPILT) && term.nature.toString()
.endsWith(DictWordType.METRIC.getTypeWithSpilt())).count(); .endsWith(DictWordType.METRIC.getType())).count();
} }
/** /**

View File

@@ -33,10 +33,8 @@ public class EntityMapper extends BaseMapper {
continue; continue;
} }
List<SchemaElementMatch> valueSchemaElements = schemaElementMatchList.stream() List<SchemaElementMatch> valueSchemaElements = schemaElementMatchList.stream()
.filter(schemaElementMatch -> .filter(schemaElementMatch -> SchemaElementType.VALUE.equals(
SchemaElementType.VALUE.equals(schemaElementMatch.getElement().getType()) schemaElementMatch.getElement().getType()))
|| SchemaElementType.TAG_VALUE.equals(schemaElementMatch.getElement().getType()
))
.collect(Collectors.toList()); .collect(Collectors.toList());
for (SchemaElementMatch schemaElementMatch : valueSchemaElements) { for (SchemaElementMatch schemaElementMatch : valueSchemaElements) {
if (!entity.getId().equals(schemaElementMatch.getElement().getId())) { if (!entity.getId().equals(schemaElementMatch.getElement().getId())) {

View File

@@ -71,8 +71,7 @@ public class KeywordMapper extends BaseMapper {
if (element == null) { if (element == null) {
continue; continue;
} }
if (element.getType().equals(SchemaElementType.VALUE) || element.getType() if (element.getType().equals(SchemaElementType.VALUE)) {
.equals(SchemaElementType.TAG_VALUE)) {
element.setName(hanlpMapResult.getName()); element.setName(hanlpMapResult.getName());
} }
Long frequency = wordNatureToFrequency.get(hanlpMapResult.getName() + nature); Long frequency = wordNatureToFrequency.get(hanlpMapResult.getName() + nature);

View File

@@ -19,12 +19,12 @@ import java.util.Set;
import java.util.stream.Collectors; import java.util.stream.Collectors;
@Slf4j @Slf4j
public class QueryFilterMapper implements SchemaMapper { public class QueryFilterMapper extends BaseMapper {
private double similarity = 1.0; private double similarity = 1.0;
@Override @Override
public void map(QueryContext queryContext) { public void doMap(QueryContext queryContext) {
Set<Long> dataSetIds = queryContext.getDataSetIds(); Set<Long> dataSetIds = queryContext.getDataSetIds();
if (CollectionUtils.isEmpty(dataSetIds)) { if (CollectionUtils.isEmpty(dataSetIds)) {
return; return;

View File

@@ -65,7 +65,7 @@ public class SearchMatchStrategy extends BaseMatchStrategy<HanlpMapResult> {
// remove entity name where search // remove entity name where search
hanlpMapResults = hanlpMapResults.stream().filter(entry -> { hanlpMapResults = hanlpMapResults.stream().filter(entry -> {
List<String> natures = entry.getNatures().stream() List<String> natures = entry.getNatures().stream()
.filter(nature -> !nature.endsWith(DictWordType.ENTITY.getTypeWithSpilt())) .filter(nature -> !nature.endsWith(DictWordType.ENTITY.getType()))
.collect(Collectors.toList()); .collect(Collectors.toList());
if (CollectionUtils.isEmpty(natures)) { if (CollectionUtils.isEmpty(natures)) {
return false; return false;

View File

@@ -1,13 +1,25 @@
package com.tencent.supersonic.headless.core.chat.parser; package com.tencent.supersonic.headless.core.chat.parser;
import com.tencent.supersonic.auth.api.authentication.pojo.User; import com.tencent.supersonic.auth.api.authentication.pojo.User;
import com.tencent.supersonic.common.pojo.enums.QueryType;
import com.tencent.supersonic.common.pojo.enums.TimeDimensionEnum;
import com.tencent.supersonic.common.util.jsqlparser.SqlSelectHelper;
import com.tencent.supersonic.headless.api.pojo.SchemaElement;
import com.tencent.supersonic.headless.api.pojo.SemanticParseInfo; import com.tencent.supersonic.headless.api.pojo.SemanticParseInfo;
import com.tencent.supersonic.headless.api.pojo.SemanticSchema;
import com.tencent.supersonic.headless.api.pojo.SqlInfo;
import com.tencent.supersonic.headless.core.chat.query.SemanticQuery; import com.tencent.supersonic.headless.core.chat.query.SemanticQuery;
import com.tencent.supersonic.headless.core.chat.query.llm.s2sql.LLMSqlQuery;
import com.tencent.supersonic.headless.core.chat.query.rule.RuleSemanticQuery;
import com.tencent.supersonic.headless.core.pojo.ChatContext; import com.tencent.supersonic.headless.core.pojo.ChatContext;
import com.tencent.supersonic.headless.core.pojo.QueryContext; import com.tencent.supersonic.headless.core.pojo.QueryContext;
import lombok.extern.slf4j.Slf4j;
import java.util.List; import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
/** /**
* QueryTypeParser resolves query type as either METRIC or TAG, or ID. * QueryTypeParser resolves query type as either METRIC or TAG, or ID.
@@ -25,9 +37,50 @@ public class QueryTypeParser implements SemanticParser {
// 1.init S2SQL // 1.init S2SQL
semanticQuery.initS2Sql(queryContext.getSemanticSchema(), user); semanticQuery.initS2Sql(queryContext.getSemanticSchema(), user);
// 2.set queryType // 2.set queryType
SemanticParseInfo parseInfo = semanticQuery.getParseInfo(); QueryType queryType = getQueryType(queryContext, semanticQuery);
parseInfo.setQueryType(queryContext.getQueryType(parseInfo.getDataSetId())); semanticQuery.getParseInfo().setQueryType(queryType);
} }
} }
private QueryType getQueryType(QueryContext queryContext, SemanticQuery semanticQuery) {
SemanticParseInfo parseInfo = semanticQuery.getParseInfo();
SqlInfo sqlInfo = parseInfo.getSqlInfo();
if (Objects.isNull(sqlInfo) || StringUtils.isBlank(sqlInfo.getS2SQL())) {
return QueryType.ID;
}
//1. entity queryType
Long dataSetId = parseInfo.getDataSetId();
SemanticSchema semanticSchema = queryContext.getSemanticSchema();
if (semanticQuery instanceof RuleSemanticQuery || semanticQuery instanceof LLMSqlQuery) {
//If all the fields in the SELECT statement are of tag type.
List<String> whereFields = SqlSelectHelper.getWhereFields(sqlInfo.getS2SQL())
.stream().filter(field -> !TimeDimensionEnum.containsTimeDimension(field))
.collect(Collectors.toList());
if (CollectionUtils.isNotEmpty(whereFields)) {
Set<String> ids = semanticSchema.getEntities(dataSetId).stream().map(SchemaElement::getName)
.collect(Collectors.toSet());
if (CollectionUtils.isNotEmpty(ids) && ids.stream().anyMatch(whereFields::contains)) {
return QueryType.ID;
}
Set<String> tags = semanticSchema.getTags(dataSetId).stream().map(SchemaElement::getName)
.collect(Collectors.toSet());
if (CollectionUtils.isNotEmpty(tags) && tags.containsAll(whereFields)) {
return QueryType.TAG;
}
}
}
//2. metric queryType
List<String> selectFields = SqlSelectHelper.getSelectFields(sqlInfo.getS2SQL());
List<SchemaElement> metrics = semanticSchema.getMetrics(dataSetId);
if (CollectionUtils.isNotEmpty(metrics)) {
Set<String> metricNameSet = metrics.stream().map(SchemaElement::getName).collect(Collectors.toSet());
boolean containMetric = selectFields.stream().anyMatch(metricNameSet::contains);
if (containMetric) {
return QueryType.METRIC;
}
}
return QueryType.ID;
}
} }

View File

@@ -1,7 +1,6 @@
package com.tencent.supersonic.headless.core.chat.parser.llm; package com.tencent.supersonic.headless.core.chat.parser.llm;
import com.tencent.supersonic.common.pojo.enums.DataFormatTypeEnum; import com.tencent.supersonic.common.pojo.enums.DataFormatTypeEnum;
import com.tencent.supersonic.common.pojo.enums.QueryType;
import com.tencent.supersonic.common.pojo.enums.TimeDimensionEnum; import com.tencent.supersonic.common.pojo.enums.TimeDimensionEnum;
import com.tencent.supersonic.common.util.DateUtils; import com.tencent.supersonic.common.util.DateUtils;
import com.tencent.supersonic.headless.api.pojo.SchemaElement; import com.tencent.supersonic.headless.api.pojo.SchemaElement;
@@ -17,13 +16,6 @@ import com.tencent.supersonic.headless.core.config.OptimizationConfig;
import com.tencent.supersonic.headless.core.pojo.QueryContext; import com.tencent.supersonic.headless.core.pojo.QueryContext;
import com.tencent.supersonic.headless.core.utils.ComponentFactory; import com.tencent.supersonic.headless.core.utils.ComponentFactory;
import com.tencent.supersonic.headless.core.utils.S2SqlDateHelper; import com.tencent.supersonic.headless.core.utils.S2SqlDateHelper;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Comparator; import java.util.Comparator;
import java.util.HashSet; import java.util.HashSet;
@@ -32,10 +24,17 @@ import java.util.Map;
import java.util.Objects; import java.util.Objects;
import java.util.Set; import java.util.Set;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils;
@Slf4j @Slf4j
@Service @Service
public class LLMRequestService { public class LLMRequestService {
@Autowired @Autowired
private LLMParserConfig llmParserConfig; private LLMParserConfig llmParserConfig;
@Autowired @Autowired
@@ -62,7 +61,7 @@ public class LLMRequestService {
} }
public LLMReq getLlmReq(QueryContext queryCtx, Long dataSetId, public LLMReq getLlmReq(QueryContext queryCtx, Long dataSetId,
SemanticSchema semanticSchema, List<LLMReq.ElementValue> linkingValues) { SemanticSchema semanticSchema, List<LLMReq.ElementValue> linkingValues) {
Map<Long, String> dataSetIdToName = semanticSchema.getDataSetIdToName(); Map<Long, String> dataSetIdToName = semanticSchema.getDataSetIdToName();
String queryText = queryCtx.getQueryText(); String queryText = queryCtx.getQueryText();
@@ -154,8 +153,7 @@ public class LLMRequestService {
.filter(elementMatch -> !elementMatch.isInherited()) .filter(elementMatch -> !elementMatch.isInherited())
.filter(schemaElementMatch -> { .filter(schemaElementMatch -> {
SchemaElementType type = schemaElementMatch.getElement().getType(); SchemaElementType type = schemaElementMatch.getElement().getType();
return SchemaElementType.VALUE.equals(type) || SchemaElementType.TAG_VALUE.equals(type) return SchemaElementType.VALUE.equals(type) || SchemaElementType.ID.equals(type);
|| SchemaElementType.ID.equals(type);
}) })
.map(elementMatch -> { .map(elementMatch -> {
ElementValue elementValue = new ElementValue(); ElementValue elementValue = new ElementValue();
@@ -169,9 +167,6 @@ public class LLMRequestService {
protected Map<Long, String> getItemIdToName(QueryContext queryCtx, Long dataSetId) { protected Map<Long, String> getItemIdToName(QueryContext queryCtx, Long dataSetId) {
SemanticSchema semanticSchema = queryCtx.getSemanticSchema(); SemanticSchema semanticSchema = queryCtx.getSemanticSchema();
List<SchemaElement> elements = semanticSchema.getDimensions(dataSetId); List<SchemaElement> elements = semanticSchema.getDimensions(dataSetId);
if (QueryType.TAG.equals(queryCtx.getQueryType(dataSetId))) {
elements = semanticSchema.getTags(dataSetId);
}
return elements.stream() return elements.stream()
.collect(Collectors.toMap(SchemaElement::getId, SchemaElement::getName, (value1, value2) -> value2)); .collect(Collectors.toMap(SchemaElement::getId, SchemaElement::getName, (value1, value2) -> value2));
} }
@@ -179,27 +174,18 @@ public class LLMRequestService {
private Set<String> getTopNFieldNames(QueryContext queryCtx, Long dataSetId, LLMParserConfig llmParserConfig) { private Set<String> getTopNFieldNames(QueryContext queryCtx, Long dataSetId, LLMParserConfig llmParserConfig) {
SemanticSchema semanticSchema = queryCtx.getSemanticSchema(); SemanticSchema semanticSchema = queryCtx.getSemanticSchema();
Set<String> results = new HashSet<>(); Set<String> results = new HashSet<>();
if (QueryType.TAG.equals(queryCtx.getQueryType(dataSetId))) { Set<String> dimensions = semanticSchema.getDimensions(dataSetId).stream()
Set<String> tags = semanticSchema.getTags(dataSetId).stream() .sorted(Comparator.comparing(SchemaElement::getUseCnt).reversed())
.sorted(Comparator.comparing(SchemaElement::getUseCnt).reversed()) .limit(llmParserConfig.getDimensionTopN())
.limit(llmParserConfig.getDimensionTopN()) .map(entry -> entry.getName())
.map(entry -> entry.getName()) .collect(Collectors.toSet());
.collect(Collectors.toSet()); results.addAll(dimensions);
results.addAll(tags); Set<String> metrics = semanticSchema.getMetrics(dataSetId).stream()
} else { .sorted(Comparator.comparing(SchemaElement::getUseCnt).reversed())
Set<String> dimensions = semanticSchema.getDimensions(dataSetId).stream() .limit(llmParserConfig.getMetricTopN())
.sorted(Comparator.comparing(SchemaElement::getUseCnt).reversed()) .map(entry -> entry.getName())
.limit(llmParserConfig.getDimensionTopN()) .collect(Collectors.toSet());
.map(entry -> entry.getName()) results.addAll(metrics);
.collect(Collectors.toSet());
results.addAll(dimensions);
Set<String> metrics = semanticSchema.getMetrics(dataSetId).stream()
.sorted(Comparator.comparing(SchemaElement::getUseCnt).reversed())
.limit(llmParserConfig.getMetricTopN())
.map(entry -> entry.getName())
.collect(Collectors.toSet());
results.addAll(metrics);
}
return results; return results;
} }
@@ -214,15 +200,12 @@ public class LLMRequestService {
SchemaElementType elementType = schemaElementMatch.getElement().getType(); SchemaElementType elementType = schemaElementMatch.getElement().getType();
return SchemaElementType.METRIC.equals(elementType) return SchemaElementType.METRIC.equals(elementType)
|| SchemaElementType.DIMENSION.equals(elementType) || SchemaElementType.DIMENSION.equals(elementType)
|| SchemaElementType.VALUE.equals(elementType) || SchemaElementType.VALUE.equals(elementType);
|| SchemaElementType.TAG.equals(elementType)
|| SchemaElementType.TAG_VALUE.equals(elementType);
}) })
.map(schemaElementMatch -> { .map(schemaElementMatch -> {
SchemaElement element = schemaElementMatch.getElement(); SchemaElement element = schemaElementMatch.getElement();
SchemaElementType elementType = element.getType(); SchemaElementType elementType = element.getType();
if (SchemaElementType.VALUE.equals(elementType) || SchemaElementType.TAG_VALUE.equals( if (SchemaElementType.VALUE.equals(elementType)) {
elementType)) {
return itemIdToName.get(element.getId()); return itemIdToName.get(element.getId());
} }
return schemaElementMatch.getWord(); return schemaElementMatch.getWord();

View File

@@ -38,7 +38,6 @@ public class ContextInheritParser implements SemanticParser {
new AbstractMap.SimpleEntry<>( new AbstractMap.SimpleEntry<>(
SchemaElementType.VALUE, Arrays.asList(SchemaElementType.VALUE, SchemaElementType.DIMENSION)), SchemaElementType.VALUE, Arrays.asList(SchemaElementType.VALUE, SchemaElementType.DIMENSION)),
new AbstractMap.SimpleEntry<>(SchemaElementType.ENTITY, Arrays.asList(SchemaElementType.ENTITY)), new AbstractMap.SimpleEntry<>(SchemaElementType.ENTITY, Arrays.asList(SchemaElementType.ENTITY)),
new AbstractMap.SimpleEntry<>(SchemaElementType.TAG, Arrays.asList(SchemaElementType.TAG)),
new AbstractMap.SimpleEntry<>(SchemaElementType.DATASET, Arrays.asList(SchemaElementType.DATASET)), new AbstractMap.SimpleEntry<>(SchemaElementType.DATASET, Arrays.asList(SchemaElementType.DATASET)),
new AbstractMap.SimpleEntry<>(SchemaElementType.ID, Arrays.asList(SchemaElementType.ID)) new AbstractMap.SimpleEntry<>(SchemaElementType.ID, Arrays.asList(SchemaElementType.ID))
).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); ).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));

View File

@@ -1,11 +1,8 @@
package com.tencent.supersonic.headless.core.chat.parser.rule; package com.tencent.supersonic.headless.core.chat.parser.rule;
import com.tencent.supersonic.common.pojo.enums.QueryType;
import com.tencent.supersonic.headless.api.pojo.SchemaElementMatch; import com.tencent.supersonic.headless.api.pojo.SchemaElementMatch;
import com.tencent.supersonic.headless.api.pojo.SchemaMapInfo; import com.tencent.supersonic.headless.api.pojo.SchemaMapInfo;
import com.tencent.supersonic.headless.api.pojo.SemanticParseInfo;
import com.tencent.supersonic.headless.core.chat.parser.SemanticParser; import com.tencent.supersonic.headless.core.chat.parser.SemanticParser;
import com.tencent.supersonic.headless.core.chat.query.QueryManager;
import com.tencent.supersonic.headless.core.chat.query.rule.RuleSemanticQuery; import com.tencent.supersonic.headless.core.chat.query.rule.RuleSemanticQuery;
import com.tencent.supersonic.headless.core.pojo.ChatContext; import com.tencent.supersonic.headless.core.pojo.ChatContext;
import com.tencent.supersonic.headless.core.pojo.QueryContext; import com.tencent.supersonic.headless.core.pojo.QueryContext;
@@ -35,24 +32,10 @@ public class RuleSqlParser implements SemanticParser {
List<RuleSemanticQuery> queries = RuleSemanticQuery.resolve(dataSetId, elementMatches, queryContext); List<RuleSemanticQuery> queries = RuleSemanticQuery.resolve(dataSetId, elementMatches, queryContext);
for (RuleSemanticQuery query : queries) { for (RuleSemanticQuery query : queries) {
query.fillParseInfo(queryContext, chatContext); query.fillParseInfo(queryContext, chatContext);
SemanticParseInfo parseInfo = query.getParseInfo(); queryContext.getCandidateQueries().add(query);
QueryType queryType = queryContext.getQueryType(parseInfo.getDataSetId());
if (isRightQuery(parseInfo, queryType)) {
queryContext.getCandidateQueries().add(query);
}
} }
} }
auxiliaryParsers.stream().forEach(p -> p.parse(queryContext, chatContext)); auxiliaryParsers.stream().forEach(p -> p.parse(queryContext, chatContext));
} }
private boolean isRightQuery(SemanticParseInfo parseInfo, QueryType queryType) {
if (QueryType.TAG.equals(queryType) && QueryManager.isTagQuery(parseInfo.getQueryMode())) {
return true;
}
if (QueryType.METRIC.equals(queryType) && QueryManager.isMetricQuery(parseInfo.getQueryMode())) {
return true;
}
return false;
}
} }

View File

@@ -3,7 +3,6 @@ package com.tencent.supersonic.headless.core.chat.query.rule;
import com.tencent.supersonic.auth.api.authentication.pojo.User; import com.tencent.supersonic.auth.api.authentication.pojo.User;
import com.tencent.supersonic.common.pojo.enums.FilterOperatorEnum; import com.tencent.supersonic.common.pojo.enums.FilterOperatorEnum;
import com.tencent.supersonic.common.pojo.enums.QueryType;
import com.tencent.supersonic.headless.api.pojo.SchemaElement; import com.tencent.supersonic.headless.api.pojo.SchemaElement;
import com.tencent.supersonic.headless.api.pojo.SchemaElementMatch; import com.tencent.supersonic.headless.api.pojo.SchemaElementMatch;
import com.tencent.supersonic.headless.api.pojo.SchemaElementType; import com.tencent.supersonic.headless.api.pojo.SchemaElementType;
@@ -18,10 +17,6 @@ import com.tencent.supersonic.headless.core.chat.query.QueryManager;
import com.tencent.supersonic.headless.core.pojo.ChatContext; import com.tencent.supersonic.headless.core.pojo.ChatContext;
import com.tencent.supersonic.headless.core.pojo.QueryContext; import com.tencent.supersonic.headless.core.pojo.QueryContext;
import com.tencent.supersonic.headless.core.utils.QueryReqBuilder; import com.tencent.supersonic.headless.core.utils.QueryReqBuilder;
import lombok.ToString;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.HashMap; import java.util.HashMap;
@@ -30,6 +25,9 @@ import java.util.Map;
import java.util.Map.Entry; import java.util.Map.Entry;
import java.util.Set; import java.util.Set;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import lombok.ToString;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
@Slf4j @Slf4j
@ToString @ToString
@@ -42,7 +40,7 @@ public abstract class RuleSemanticQuery extends BaseSemanticQuery {
} }
public List<SchemaElementMatch> match(List<SchemaElementMatch> candidateElementMatches, public List<SchemaElementMatch> match(List<SchemaElementMatch> candidateElementMatches,
QueryContext queryCtx) { QueryContext queryCtx) {
return queryMatcher.match(candidateElementMatches); return queryMatcher.match(candidateElementMatches);
} }
@@ -101,22 +99,31 @@ public abstract class RuleSemanticQuery extends BaseSemanticQuery {
parseInfo.setDataSet(semanticSchema.getDataSet(dataSetId)); parseInfo.setDataSet(semanticSchema.getDataSet(dataSetId));
Map<Long, List<SchemaElementMatch>> dim2Values = new HashMap<>(); Map<Long, List<SchemaElementMatch>> dim2Values = new HashMap<>();
Map<Long, List<SchemaElementMatch>> id2Values = new HashMap<>(); Map<Long, List<SchemaElementMatch>> id2Values = new HashMap<>();
Map<Long, List<SchemaElementMatch>> tag2Values = new HashMap<>();
for (SchemaElementMatch schemaMatch : parseInfo.getElementMatches()) { for (SchemaElementMatch schemaMatch : parseInfo.getElementMatches()) {
SchemaElement element = schemaMatch.getElement(); SchemaElement element = schemaMatch.getElement();
element.setOrder(1 - schemaMatch.getSimilarity()); element.setOrder(1 - schemaMatch.getSimilarity());
switch (element.getType()) { switch (element.getType()) {
case ID: case ID:
addToValues(semanticSchema, SchemaElementType.ENTITY, id2Values, schemaMatch); SchemaElement entityElement = semanticSchema.getElement(SchemaElementType.ENTITY, element.getId());
break; if (entityElement != null) {
case TAG_VALUE: if (id2Values.containsKey(element.getId())) {
addToValues(semanticSchema, SchemaElementType.TAG, tag2Values, schemaMatch); id2Values.get(element.getId()).add(schemaMatch);
} else {
id2Values.put(element.getId(), new ArrayList<>(Arrays.asList(schemaMatch)));
}
}
break; break;
case VALUE: case VALUE:
addToValues(semanticSchema, SchemaElementType.DIMENSION, dim2Values, schemaMatch); SchemaElement dimElement = semanticSchema.getElement(SchemaElementType.DIMENSION, element.getId());
if (dimElement != null) {
if (dim2Values.containsKey(element.getId())) {
dim2Values.get(element.getId()).add(schemaMatch);
} else {
dim2Values.put(element.getId(), new ArrayList<>(Arrays.asList(schemaMatch)));
}
}
break; break;
case TAG:
case DIMENSION: case DIMENSION:
parseInfo.getDimensions().add(element); parseInfo.getDimensions().add(element);
break; break;
@@ -129,10 +136,8 @@ public abstract class RuleSemanticQuery extends BaseSemanticQuery {
default: default:
} }
} }
addToFilters(id2Values, parseInfo, semanticSchema, SchemaElementType.ENTITY); addToFilters(id2Values, parseInfo, semanticSchema, SchemaElementType.ENTITY);
addToFilters(dim2Values, parseInfo, semanticSchema, SchemaElementType.DIMENSION); addToFilters(dim2Values, parseInfo, semanticSchema, SchemaElementType.DIMENSION);
addToFilters(tag2Values, parseInfo, semanticSchema, SchemaElementType.TAG);
} }
private void addToFilters(Map<Long, List<SchemaElementMatch>> id2Values, SemanticParseInfo parseInfo, private void addToFilters(Map<Long, List<SchemaElementMatch>> id2Values, SemanticParseInfo parseInfo,
@@ -220,8 +225,6 @@ public abstract class RuleSemanticQuery extends BaseSemanticQuery {
public static List<RuleSemanticQuery> resolve(Long dataSetId, List<SchemaElementMatch> candidateElementMatches, public static List<RuleSemanticQuery> resolve(Long dataSetId, List<SchemaElementMatch> candidateElementMatches,
QueryContext queryContext) { QueryContext queryContext) {
List<RuleSemanticQuery> matchedQueries = new ArrayList<>(); List<RuleSemanticQuery> matchedQueries = new ArrayList<>();
candidateElementMatches = filterByQueryType(dataSetId, candidateElementMatches, queryContext);
for (RuleSemanticQuery semanticQuery : QueryManager.getRuleQueries()) { for (RuleSemanticQuery semanticQuery : QueryManager.getRuleQueries()) {
List<SchemaElementMatch> matches = semanticQuery.match(candidateElementMatches, queryContext); List<SchemaElementMatch> matches = semanticQuery.match(candidateElementMatches, queryContext);
@@ -231,30 +234,9 @@ public abstract class RuleSemanticQuery extends BaseSemanticQuery {
matchedQueries.add(query); matchedQueries.add(query);
} }
} }
return matchedQueries; return matchedQueries;
} }
private static List<SchemaElementMatch> filterByQueryType(Long dataSetId,
List<SchemaElementMatch> candidateElementMatches, QueryContext queryContext) {
QueryType queryType = queryContext.getQueryType(dataSetId);
if (QueryType.TAG.equals(queryType)) {
candidateElementMatches = candidateElementMatches.stream()
.filter(elementMatch -> !(SchemaElementType.METRIC.equals(elementMatch.getElement().getType())
|| SchemaElementType.DIMENSION.equals(elementMatch.getElement().getType())
|| SchemaElementType.VALUE.equals(elementMatch.getElement().getType()))
)
.collect(Collectors.toList());
}
if (QueryType.METRIC.equals(queryType)) {
candidateElementMatches = candidateElementMatches.stream()
.filter(elementMatch -> !(SchemaElementType.TAG.equals(elementMatch.getElement().getType())
|| SchemaElementType.TAG_VALUE.equals(elementMatch.getElement().getType())))
.collect(Collectors.toList());
}
return candidateElementMatches;
}
protected QueryStructReq convertQueryStruct() { protected QueryStructReq convertQueryStruct() {
return QueryReqBuilder.buildStructReq(parseInfo); return QueryReqBuilder.buildStructReq(parseInfo);
} }

View File

@@ -1,11 +1,10 @@
package com.tencent.supersonic.headless.core.chat.query.rule.tag; package com.tencent.supersonic.headless.core.chat.query.rule.tag;
import org.springframework.stereotype.Component; import static com.tencent.supersonic.headless.api.pojo.SchemaElementType.ID;
import static com.tencent.supersonic.headless.api.pojo.SchemaElementType.TAG;
import static com.tencent.supersonic.headless.core.chat.query.rule.QueryMatchOption.OptionType.REQUIRED; import static com.tencent.supersonic.headless.core.chat.query.rule.QueryMatchOption.OptionType.REQUIRED;
import static com.tencent.supersonic.headless.core.chat.query.rule.QueryMatchOption.RequireNumberType.AT_LEAST; import static com.tencent.supersonic.headless.core.chat.query.rule.QueryMatchOption.RequireNumberType.AT_LEAST;
import static com.tencent.supersonic.headless.api.pojo.SchemaElementType.ID;
import org.springframework.stereotype.Component;
@Component @Component
public class TagDetailQuery extends TagSemanticQuery { public class TagDetailQuery extends TagSemanticQuery {
@@ -14,8 +13,7 @@ public class TagDetailQuery extends TagSemanticQuery {
public TagDetailQuery() { public TagDetailQuery() {
super(); super();
queryMatcher.addOption(TAG, REQUIRED, AT_LEAST, 1) queryMatcher.addOption(ID, REQUIRED, AT_LEAST, 1);
.addOption(ID, REQUIRED, AT_LEAST, 1);
} }
@Override @Override

View File

@@ -1,14 +1,12 @@
package com.tencent.supersonic.headless.core.chat.query.rule.tag; package com.tencent.supersonic.headless.core.chat.query.rule.tag;
import static com.tencent.supersonic.headless.api.pojo.SchemaElementType.VALUE;
import static com.tencent.supersonic.headless.core.chat.query.rule.QueryMatchOption.OptionType.REQUIRED;
import static com.tencent.supersonic.headless.core.chat.query.rule.QueryMatchOption.RequireNumberType.AT_LEAST;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
import static com.tencent.supersonic.headless.api.pojo.SchemaElementType.TAG;
import static com.tencent.supersonic.headless.api.pojo.SchemaElementType.TAG_VALUE;
import static com.tencent.supersonic.headless.core.chat.query.rule.QueryMatchOption.OptionType.OPTIONAL;
import static com.tencent.supersonic.headless.core.chat.query.rule.QueryMatchOption.OptionType.REQUIRED;
import static com.tencent.supersonic.headless.core.chat.query.rule.QueryMatchOption.RequireNumberType.AT_LEAST;
@Slf4j @Slf4j
@Component @Component
public class TagFilterQuery extends TagListQuery { public class TagFilterQuery extends TagListQuery {
@@ -17,8 +15,7 @@ public class TagFilterQuery extends TagListQuery {
public TagFilterQuery() { public TagFilterQuery() {
super(); super();
queryMatcher.addOption(TAG, OPTIONAL, AT_LEAST, 0); queryMatcher.addOption(VALUE, REQUIRED, AT_LEAST, 1);
queryMatcher.addOption(TAG_VALUE, REQUIRED, AT_LEAST, 1);
} }
@Override @Override

View File

@@ -146,11 +146,7 @@ public class QueryServiceImpl implements QueryService {
queryStatement.setModelIds(querySqlReq.getModelIds()); queryStatement.setModelIds(querySqlReq.getModelIds());
queryStatement.setEnableOptimize(queryUtils.enableOptimize()); queryStatement.setEnableOptimize(queryUtils.enableOptimize());
queryStatement.setSemanticSchemaResp(semanticSchemaResp); queryStatement.setSemanticSchemaResp(semanticSchemaResp);
if (QueryType.TAG.equals(semanticSchemaResp.getQueryType())) { queryStatement.setSemanticModel(semanticSchemaManager.getSemanticModel(semanticSchemaResp));
queryStatement.setSemanticModel(semanticSchemaManager.getTagSemanticModel(semanticSchemaResp));
} else {
queryStatement.setSemanticModel(semanticSchemaManager.getSemanticModel(semanticSchemaResp));
}
return queryStatement; return queryStatement;
} }

View File

@@ -148,7 +148,6 @@ public class TagMetaServiceImpl implements TagMetaService {
@Override @Override
public TagResp getTag(Long id, User user) { public TagResp getTag(Long id, User user) {
// return convert(tagRepository.getTagById(id));
TagDO tagDO = tagRepository.getTagById(id); TagDO tagDO = tagRepository.getTagById(id);
TagResp tagResp = fillCollectAndAdminInfo(tagDO, user); TagResp tagResp = fillCollectAndAdminInfo(tagDO, user);
tagResp = fillModelInfo(tagResp); tagResp = fillModelInfo(tagResp);

View File

@@ -60,7 +60,7 @@ public class TagQueryServiceImpl implements TagQueryService {
public ItemValueResp queryTagValue(ItemValueReq itemValueReq, User user) throws Exception { public ItemValueResp queryTagValue(ItemValueReq itemValueReq, User user) throws Exception {
ItemValueResp itemValueResp = new ItemValueResp(); ItemValueResp itemValueResp = new ItemValueResp();
itemValueResp.setItemId(itemValueReq.getItemId()); itemValueResp.setItemId(itemValueReq.getItemId());
itemValueResp.setType(SchemaElementType.TAG); itemValueResp.setType(SchemaElementType.DIMENSION);
TagResp tag = tagMetaService.getTag(itemValueReq.getItemId(), user); TagResp tag = tagMetaService.getTag(itemValueReq.getItemId(), user);
checkTag(tag); checkTag(tag);
itemValueResp.setName(tag.getName()); itemValueResp.setName(tag.getName());

View File

@@ -34,7 +34,6 @@ public class WordService {
addWordsByType(DictWordType.METRIC, semanticSchema.getMetrics(), words); addWordsByType(DictWordType.METRIC, semanticSchema.getMetrics(), words);
addWordsByType(DictWordType.ENTITY, semanticSchema.getEntities(), words); addWordsByType(DictWordType.ENTITY, semanticSchema.getEntities(), words);
addWordsByType(DictWordType.VALUE, semanticSchema.getDimensionValues(), words); addWordsByType(DictWordType.VALUE, semanticSchema.getDimensionValues(), words);
addWordsByType(DictWordType.TAG, semanticSchema.getTags(), words);
return words; return words;
} }

View File

@@ -12,11 +12,6 @@ import com.tencent.supersonic.headless.api.pojo.SchemaValueMap;
import com.tencent.supersonic.headless.api.pojo.response.DataSetSchemaResp; import com.tencent.supersonic.headless.api.pojo.response.DataSetSchemaResp;
import com.tencent.supersonic.headless.api.pojo.response.DimSchemaResp; import com.tencent.supersonic.headless.api.pojo.response.DimSchemaResp;
import com.tencent.supersonic.headless.api.pojo.response.MetricSchemaResp; import com.tencent.supersonic.headless.api.pojo.response.MetricSchemaResp;
import com.tencent.supersonic.headless.api.pojo.response.TagResp;
import org.apache.logging.log4j.util.Strings;
import org.springframework.beans.BeanUtils;
import org.springframework.util.CollectionUtils;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.HashSet; import java.util.HashSet;
@@ -24,6 +19,9 @@ import java.util.List;
import java.util.Objects; import java.util.Objects;
import java.util.Set; import java.util.Set;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import org.apache.logging.log4j.util.Strings;
import org.springframework.beans.BeanUtils;
import org.springframework.util.CollectionUtils;
public class DataSetSchemaBuilder { public class DataSetSchemaBuilder {
@@ -43,18 +41,18 @@ public class DataSetSchemaBuilder {
Set<SchemaElement> metrics = getMetrics(resp); Set<SchemaElement> metrics = getMetrics(resp);
dataSetSchema.getMetrics().addAll(metrics); dataSetSchema.getMetrics().addAll(metrics);
Set<SchemaElement> metricTags = getMetricTags(resp);
dataSetSchema.getTags().addAll(metricTags);
Set<SchemaElement> dimensions = getDimensions(resp); Set<SchemaElement> dimensions = getDimensions(resp);
dataSetSchema.getDimensions().addAll(dimensions); dataSetSchema.getDimensions().addAll(dimensions);
Set<SchemaElement> dimensionTags = getDimensionTags(resp);
dataSetSchema.getTags().addAll(dimensionTags);
Set<SchemaElement> dimensionValues = getDimensionValues(resp); Set<SchemaElement> dimensionValues = getDimensionValues(resp);
dataSetSchema.getDimensionValues().addAll(dimensionValues); dataSetSchema.getDimensionValues().addAll(dimensionValues);
Set<SchemaElement> tags = getTags(resp);
dataSetSchema.getTags().addAll(tags);
Set<SchemaElement> tagValues = getTagValues(resp);
dataSetSchema.getTagValues().addAll(tagValues);
SchemaElement entity = getEntity(resp); SchemaElement entity = getEntity(resp);
if (Objects.nonNull(entity)) { if (Objects.nonNull(entity)) {
dataSetSchema.setEntity(entity); dataSetSchema.setEntity(entity);
@@ -62,6 +60,58 @@ public class DataSetSchemaBuilder {
return dataSetSchema; return dataSetSchema;
} }
private static Set<SchemaElement> getMetricTags(DataSetSchemaResp resp) {
Set<SchemaElement> tags = new HashSet<>();
for (MetricSchemaResp metric : resp.getMetrics()) {
List<String> alias = SchemaItem.getAliasList(metric.getAlias());
if (metric.getIsTag() == 1) {
SchemaElement tagToAdd = SchemaElement.builder()
.dataSet(resp.getId())
.model(metric.getModelId())
.id(metric.getId())
.name(metric.getName())
.bizName(metric.getBizName())
.type(SchemaElementType.TAG)
.useCnt(metric.getUseCnt())
.alias(alias)
.build();
tags.add(tagToAdd);
}
}
return tags;
}
private static Set<SchemaElement> getDimensionTags(DataSetSchemaResp resp) {
Set<SchemaElement> tags = new HashSet<>();
for (DimSchemaResp dim : resp.getDimensions()) {
List<String> alias = SchemaItem.getAliasList(dim.getAlias());
List<DimValueMap> dimValueMaps = dim.getDimValueMaps();
List<SchemaValueMap> schemaValueMaps = new ArrayList<>();
if (!CollectionUtils.isEmpty(dimValueMaps)) {
for (DimValueMap dimValueMap : dimValueMaps) {
SchemaValueMap schemaValueMap = new SchemaValueMap();
BeanUtils.copyProperties(dimValueMap, schemaValueMap);
schemaValueMaps.add(schemaValueMap);
}
}
if (dim.getIsTag() == 1) {
SchemaElement tagToAdd = SchemaElement.builder()
.dataSet(resp.getId())
.model(dim.getModelId())
.id(dim.getId())
.name(dim.getName())
.bizName(dim.getBizName())
.type(SchemaElementType.TAG)
.useCnt(dim.getUseCnt())
.alias(alias)
.schemaValueMaps(schemaValueMaps)
.build();
tags.add(tagToAdd);
}
}
return tags;
}
private static SchemaElement getEntity(DataSetSchemaResp resp) { private static SchemaElement getEntity(DataSetSchemaResp resp) {
DimSchemaResp dim = resp.getPrimaryKey(); DimSchemaResp dim = resp.getPrimaryKey();
if (Objects.isNull(dim)) { if (Objects.isNull(dim)) {
@@ -79,38 +129,6 @@ public class DataSetSchemaBuilder {
.build(); .build();
} }
private static Set<SchemaElement> getTags(DataSetSchemaResp resp) {
Set<SchemaElement> tags = new HashSet<>();
for (TagResp tagResp : resp.getTags()) {
SchemaElement element = SchemaElement.builder()
.dataSet(resp.getId())
.model(tagResp.getModelId())
.id(tagResp.getId())
.name(tagResp.getName())
.bizName(tagResp.getBizName())
.type(SchemaElementType.TAG)
.build();
tags.add(element);
}
return tags;
}
private static Set<SchemaElement> getTagValues(DataSetSchemaResp resp) {
Set<SchemaElement> dimensionValues = new HashSet<>();
for (TagResp tagResp : resp.getTags()) {
SchemaElement element = SchemaElement.builder()
.dataSet(resp.getId())
.model(tagResp.getModelId())
.id(tagResp.getId())
.name(tagResp.getName())
.bizName(tagResp.getBizName())
.type(SchemaElementType.TAG_VALUE)
.build();
dimensionValues.add(element);
}
return dimensionValues;
}
private static Set<SchemaElement> getDimensions(DataSetSchemaResp resp) { private static Set<SchemaElement> getDimensions(DataSetSchemaResp resp) {
Set<SchemaElement> dimensions = new HashSet<>(); Set<SchemaElement> dimensions = new HashSet<>();
for (DimSchemaResp dim : resp.getDimensions()) { for (DimSchemaResp dim : resp.getDimensions()) {

View File

@@ -1,5 +1,11 @@
package com.tencent.supersonic.headless.server.utils; package com.tencent.supersonic.headless.server.utils;
import static com.tencent.supersonic.common.pojo.Constants.AND_UPPER;
import static com.tencent.supersonic.common.pojo.Constants.APOSTROPHE;
import static com.tencent.supersonic.common.pojo.Constants.COMMA;
import static com.tencent.supersonic.common.pojo.Constants.POUND;
import static com.tencent.supersonic.common.pojo.Constants.SPACE;
import com.google.common.base.Strings; import com.google.common.base.Strings;
import com.tencent.supersonic.auth.api.authentication.pojo.User; import com.tencent.supersonic.auth.api.authentication.pojo.User;
import com.tencent.supersonic.common.pojo.Aggregator; import com.tencent.supersonic.common.pojo.Aggregator;
@@ -35,12 +41,6 @@ import com.tencent.supersonic.headless.server.service.MetricService;
import com.tencent.supersonic.headless.server.service.ModelService; import com.tencent.supersonic.headless.server.service.ModelService;
import com.tencent.supersonic.headless.server.service.QueryService; import com.tencent.supersonic.headless.server.service.QueryService;
import com.tencent.supersonic.headless.server.service.TagMetaService; import com.tencent.supersonic.headless.server.service.TagMetaService;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.BeanUtils;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Lazy;
import org.springframework.stereotype.Component;
import org.springframework.util.CollectionUtils;
import java.time.LocalDate; import java.time.LocalDate;
import java.time.format.DateTimeFormatter; import java.time.format.DateTimeFormatter;
import java.util.ArrayList; import java.util.ArrayList;
@@ -53,11 +53,12 @@ import java.util.Map;
import java.util.Objects; import java.util.Objects;
import java.util.Set; import java.util.Set;
import java.util.StringJoiner; import java.util.StringJoiner;
import static com.tencent.supersonic.common.pojo.Constants.AND_UPPER; import lombok.extern.slf4j.Slf4j;
import static com.tencent.supersonic.common.pojo.Constants.APOSTROPHE; import org.springframework.beans.BeanUtils;
import static com.tencent.supersonic.common.pojo.Constants.COMMA; import org.springframework.beans.factory.annotation.Value;
import static com.tencent.supersonic.common.pojo.Constants.POUND; import org.springframework.context.annotation.Lazy;
import static com.tencent.supersonic.common.pojo.Constants.SPACE; import org.springframework.stereotype.Component;
import org.springframework.util.CollectionUtils;
@Slf4j @Slf4j
@Component @Component
@@ -185,7 +186,7 @@ public class DictUtils {
mergeMultivaluedValue(valueAndFrequencyPair, dimValue, metric); mergeMultivaluedValue(valueAndFrequencyPair, dimValue, metric);
} }
} }
String nature = dictItemResp.generateNature(); String nature = dictItemResp.getNature();
constructDictLines(valueAndFrequencyPair, lines, nature); constructDictLines(valueAndFrequencyPair, lines, nature);
addWhiteValueLines(dictItemResp, lines, nature); addWhiteValueLines(dictItemResp, lines, nature);
} catch (Exception e) { } catch (Exception e) {

View File

@@ -110,7 +110,6 @@ public class ModelDemoDataLoader {
updateDimension(); updateDimension();
updateMetric(); updateMetric();
updateMetric_pv(); updateMetric_pv();
addTags();
addDataSet_1(); addDataSet_1();
addDataSet_2(); addDataSet_2();
addAuthGroup_1(); addAuthGroup_1();

View File

@@ -1,3 +0,0 @@
内地 _4_1_tv 100
欧美 _4_1_tv 100
港台 _4_1_tv 100

View File

@@ -1,2 +0,0 @@
流行 _4_2_tv 100
国风 _4_2_tv 100

View File

@@ -1,6 +0,0 @@
周杰伦 _4_4_tv 100
陈奕迅 _4_4_tv 100
林俊杰 _4_4_tv 100
张碧晨 _4_4_tv 100
程响 _4_4_tv 100
Taylor#Swift _4_4_tv 100

View File

@@ -27,7 +27,7 @@ public class TagTest extends BaseTest {
expectedParseInfo.setAggType(AggregateTypeEnum.NONE); expectedParseInfo.setAggType(AggregateTypeEnum.NONE);
QueryFilter dimensionFilter = DataUtils.getFilter("genre", FilterOperatorEnum.EQUALS, QueryFilter dimensionFilter = DataUtils.getFilter("genre", FilterOperatorEnum.EQUALS,
"流行", "风格", 2L); "流行", "风格", 6L);
expectedParseInfo.getDimensionFilters().add(dimensionFilter); expectedParseInfo.getDimensionFilters().add(dimensionFilter);
SchemaElement metric = SchemaElement.builder().name("播放量").build(); SchemaElement metric = SchemaElement.builder().name("播放量").build();

View File

@@ -1,3 +0,0 @@
内地 _4_1_tv 100
欧美 _4_1_tv 100
港台 _4_1_tv 100

View File

@@ -1,2 +0,0 @@
流行 _4_2_tv 100
国风 _4_2_tv 100

View File

@@ -1,6 +0,0 @@
周杰伦 _4_4_tv 100
陈奕迅 _4_4_tv 100
林俊杰 _4_4_tv 100
张碧晨 _4_4_tv 100
程响 _4_4_tv 100
Taylor#Swift _4_4_tv 100