(improvement)(headless) Add semantic retrieval to term descriptions and extract relevant semantic information (#1468)

Co-authored-by: lxwcodemonkey
This commit is contained in:
LXW
2024-07-29 09:15:18 +08:00
committed by GitHub
parent ccd79e4830
commit 26f682cc45
7 changed files with 72 additions and 2 deletions

View File

@@ -33,6 +33,7 @@ public class SchemaElement implements Serializable {
private double order;
private int isTag;
private String description;
private boolean descriptionMapped;
@Override
public boolean equals(Object o) {

View File

@@ -20,4 +20,8 @@ public class SchemaElementMatch {
Long frequency;
boolean isInherited;
public boolean isFullMatched() {
return 1.0 == similarity;
}
}

View File

@@ -1,6 +1,8 @@
package com.tencent.supersonic.headless.api.pojo;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.google.common.collect.Lists;
import org.apache.commons.collections4.CollectionUtils;
import java.util.HashMap;
import java.util.List;
@@ -26,4 +28,25 @@ public class SchemaMapInfo {
public void setMatchedElements(Long dataSet, List<SchemaElementMatch> elementMatches) {
dataSetElementMatches.put(dataSet, elementMatches);
}
@JsonIgnore
public List<SchemaElement> getTermDescriptionToMap() {
List<SchemaElement> termElements = Lists.newArrayList();
for (Long dataSetId : getDataSetElementMatches().keySet()) {
List<SchemaElementMatch> matchedElements = getMatchedElements(dataSetId);
for (SchemaElementMatch schemaElementMatch : matchedElements) {
if (SchemaElementType.TERM.equals(schemaElementMatch.getElement().getType())
&& schemaElementMatch.isFullMatched()
&& !schemaElementMatch.getElement().isDescriptionMapped()) {
termElements.add(schemaElementMatch.getElement());
}
}
}
return termElements;
}
public boolean needContinueMap() {
return CollectionUtils.isNotEmpty(getTermDescriptionToMap());
}
}