fix(mapper): 优先按相似度排序并保留所有完全匹配项

2026-06-26 06:39:20 +08:00 · 2026-06-09 19:48:02 +08:00
parent ef2f07a59e
commit 6d41ce4c5b
2 changed files with 8 additions and 10 deletions
--- a/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/mapper/HanlpDictMatchStrategy.java
+++ b/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/mapper/HanlpDictMatchStrategy.java
@@ -9,6 +9,7 @@ import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.stereotype.Service;
 import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Set;
@@ -50,8 +51,11 @@ public class HanlpDictMatchStrategy extends SingleMatchStrategy<HanlpMapResult>
            return new ArrayList<>();
        }
        // step3. merge pre/suffix result
        // sort by similarity (desc) first, then name length (desc), so that
        // higher-similarity records are inserted first and survive LinkedHashSet dedup
        hanlpMapResults = hanlpMapResults.stream()
-                .sorted((a, b) -> -(b.getName().length() - a.getName().length()))
+                .sorted(Comparator.comparingDouble(HanlpMapResult::getSimilarity).reversed()
                        .thenComparing((a, b) -> Integer.compare(b.getName().length(), a.getName().length())))
                .collect(Collectors.toCollection(LinkedHashSet::new));
        // step4. filter by similarity
--- a/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/mapper/MapFilter.java
+++ b/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/mapper/MapFilter.java
@@ -123,15 +123,9 @@ public class MapFilter {
                    .filter(SchemaElementMatch::isFullMatched).collect(Collectors.toList());
            if (!fullMatches.isEmpty()) {
-                // If there are objects with similarity=1.0, choose the one with the longest
+                // Keep all records with similarity=1.0, as they may correspond to different
-                // detectWord and smallest offset
+                // elementIds with the same detectWord
-                SchemaElementMatch bestMatch = fullMatches.stream()
+                result.addAll(fullMatches);
                        .max(Comparator.comparing(
                                (SchemaElementMatch match) -> match.getDetectWord().length()))
                        .orElse(null);
                if (bestMatch != null) {
                    result.add(bestMatch);
                }
            } else {
                // If there are no objects with similarity=1.0, keep all objects with similarity<1.0
                result.addAll(group);