diff --git a/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/mapper/HanlpDictMatchStrategy.java b/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/mapper/HanlpDictMatchStrategy.java index 139c23351..29a29b9b8 100644 --- a/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/mapper/HanlpDictMatchStrategy.java +++ b/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/mapper/HanlpDictMatchStrategy.java @@ -9,6 +9,7 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; import java.util.ArrayList; +import java.util.Comparator; import java.util.LinkedHashSet; import java.util.List; import java.util.Set; @@ -50,8 +51,11 @@ public class HanlpDictMatchStrategy extends SingleMatchStrategy return new ArrayList<>(); } // step3. merge pre/suffix result + // sort by similarity (desc) first, then name length (desc), so that + // higher-similarity records are inserted first and survive LinkedHashSet dedup hanlpMapResults = hanlpMapResults.stream() - .sorted((a, b) -> -(b.getName().length() - a.getName().length())) + .sorted(Comparator.comparingDouble(HanlpMapResult::getSimilarity).reversed() + .thenComparing((a, b) -> Integer.compare(b.getName().length(), a.getName().length()))) .collect(Collectors.toCollection(LinkedHashSet::new)); // step4. filter by similarity diff --git a/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/mapper/MapFilter.java b/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/mapper/MapFilter.java index aad8d10c4..5851fccfd 100644 --- a/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/mapper/MapFilter.java +++ b/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/mapper/MapFilter.java @@ -123,15 +123,9 @@ public class MapFilter { .filter(SchemaElementMatch::isFullMatched).collect(Collectors.toList()); if (!fullMatches.isEmpty()) { - // If there are objects with similarity=1.0, choose the one with the longest - // detectWord and smallest offset - SchemaElementMatch bestMatch = fullMatches.stream() - .max(Comparator.comparing( - (SchemaElementMatch match) -> match.getDetectWord().length())) - .orElse(null); - if (bestMatch != null) { - result.add(bestMatch); - } + // Keep all records with similarity=1.0, as they may correspond to different + // elementIds with the same detectWord + result.addAll(fullMatches); } else { // If there are no objects with similarity=1.0, keep all objects with similarity<1.0 result.addAll(group);