mirror of
https://github.com/tencentmusic/supersonic.git
synced 2025-12-11 03:58:14 +00:00
(improvement)(headless) Add semantic retrieval to term descriptions and extract relevant semantic information (#1468)
Co-authored-by: lxwcodemonkey
This commit is contained in:
@@ -33,6 +33,7 @@ public class SchemaElement implements Serializable {
|
||||
private double order;
|
||||
private int isTag;
|
||||
private String description;
|
||||
private boolean descriptionMapped;
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
|
||||
@@ -20,4 +20,8 @@ public class SchemaElementMatch {
|
||||
Long frequency;
|
||||
boolean isInherited;
|
||||
|
||||
public boolean isFullMatched() {
|
||||
return 1.0 == similarity;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
package com.tencent.supersonic.headless.api.pojo;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||
import com.google.common.collect.Lists;
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
@@ -26,4 +28,25 @@ public class SchemaMapInfo {
|
||||
public void setMatchedElements(Long dataSet, List<SchemaElementMatch> elementMatches) {
|
||||
dataSetElementMatches.put(dataSet, elementMatches);
|
||||
}
|
||||
|
||||
@JsonIgnore
|
||||
public List<SchemaElement> getTermDescriptionToMap() {
|
||||
List<SchemaElement> termElements = Lists.newArrayList();
|
||||
for (Long dataSetId : getDataSetElementMatches().keySet()) {
|
||||
List<SchemaElementMatch> matchedElements = getMatchedElements(dataSetId);
|
||||
for (SchemaElementMatch schemaElementMatch : matchedElements) {
|
||||
if (SchemaElementType.TERM.equals(schemaElementMatch.getElement().getType())
|
||||
&& schemaElementMatch.isFullMatched()
|
||||
&& !schemaElementMatch.getElement().isDescriptionMapped()) {
|
||||
termElements.add(schemaElementMatch.getElement());
|
||||
}
|
||||
}
|
||||
}
|
||||
return termElements;
|
||||
}
|
||||
|
||||
public boolean needContinueMap() {
|
||||
return CollectionUtils.isNotEmpty(getTermDescriptionToMap());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -35,6 +35,7 @@ import java.util.stream.Collectors;
|
||||
public class ChatQueryContext {
|
||||
|
||||
private String queryText;
|
||||
private String oriQueryText;
|
||||
private Set<Long> dataSetIds;
|
||||
private Map<Long, List<Long>> modelIdToDataSetIds;
|
||||
private User user;
|
||||
|
||||
@@ -0,0 +1,37 @@
|
||||
package com.tencent.supersonic.headless.chat.mapper;
|
||||
|
||||
import com.tencent.supersonic.headless.api.pojo.SchemaElement;
|
||||
import com.tencent.supersonic.headless.chat.ChatQueryContext;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import java.util.List;
|
||||
|
||||
/***
|
||||
* A mapper that map the description of the term.
|
||||
*/
|
||||
@Slf4j
|
||||
public class TermDescMapper extends BaseMapper {
|
||||
|
||||
@Override
|
||||
public void doMap(ChatQueryContext chatQueryContext) {
|
||||
List<SchemaElement> termDescriptionToMap = chatQueryContext.getMapInfo().getTermDescriptionToMap();
|
||||
if (CollectionUtils.isEmpty(termDescriptionToMap)) {
|
||||
if (StringUtils.isNotBlank(chatQueryContext.getOriQueryText())) {
|
||||
chatQueryContext.setQueryText(chatQueryContext.getOriQueryText());
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (StringUtils.isBlank(chatQueryContext.getOriQueryText())) {
|
||||
chatQueryContext.setOriQueryText(chatQueryContext.getQueryText());
|
||||
}
|
||||
for (SchemaElement schemaElement : termDescriptionToMap) {
|
||||
if (chatQueryContext.getQueryText().equals(schemaElement.getDescription())) {
|
||||
schemaElement.setDescriptionMapped(true);
|
||||
continue;
|
||||
}
|
||||
chatQueryContext.setQueryText(schemaElement.getDescription());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -48,6 +48,8 @@ public class ChatWorkflowEngine {
|
||||
parseResult.setState(ParseResp.ParseState.FAILED);
|
||||
parseResult.setErrorMsg("No semantic entities can be mapped against user question.");
|
||||
queryCtx.setChatWorkflowState(ChatWorkflowState.FINISHED);
|
||||
} else if (queryCtx.getMapInfo().needContinueMap()) {
|
||||
queryCtx.setChatWorkflowState(ChatWorkflowState.MAPPING);
|
||||
} else {
|
||||
queryCtx.setChatWorkflowState(ChatWorkflowState.PARSING);
|
||||
}
|
||||
@@ -89,7 +91,8 @@ public class ChatWorkflowEngine {
|
||||
|
||||
private void performMapping(ChatQueryContext queryCtx) {
|
||||
if (Objects.isNull(queryCtx.getMapInfo())
|
||||
|| MapUtils.isEmpty(queryCtx.getMapInfo().getDataSetElementMatches())) {
|
||||
|| MapUtils.isEmpty(queryCtx.getMapInfo().getDataSetElementMatches())
|
||||
|| queryCtx.getMapInfo().needContinueMap()) {
|
||||
schemaMappers.forEach(mapper -> mapper.map(queryCtx));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,7 +4,8 @@ com.tencent.supersonic.headless.chat.mapper.SchemaMapper=\
|
||||
com.tencent.supersonic.headless.chat.mapper.EmbeddingMapper, \
|
||||
com.tencent.supersonic.headless.chat.mapper.KeywordMapper, \
|
||||
com.tencent.supersonic.headless.chat.mapper.QueryFilterMapper, \
|
||||
com.tencent.supersonic.headless.chat.mapper.EntityMapper
|
||||
com.tencent.supersonic.headless.chat.mapper.EntityMapper, \
|
||||
com.tencent.supersonic.headless.chat.mapper.TermDescMapper
|
||||
|
||||
com.tencent.supersonic.headless.chat.parser.SemanticParser=\
|
||||
com.tencent.supersonic.headless.chat.parser.rule.RuleSqlParser, \
|
||||
|
||||
Reference in New Issue
Block a user