fix(mapper): 优先按相似度排序并保留所有完全匹配项
Some checks failed
supersonic CentOS CI / build (21) (push) Has been cancelled
supersonic mac CI / build (21) (push) Has been cancelled
supersonic ubuntu CI / build (21) (push) Has been cancelled
supersonic windows CI / build (21) (push) Has been cancelled

This commit is contained in:
jerryjzhang
2026-06-09 19:48:02 +08:00
parent ef2f07a59e
commit 6d41ce4c5b
2 changed files with 8 additions and 10 deletions

View File

@@ -9,6 +9,7 @@ import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
@@ -50,8 +51,11 @@ public class HanlpDictMatchStrategy extends SingleMatchStrategy<HanlpMapResult>
return new ArrayList<>();
}
// step3. merge pre/suffix result
// sort by similarity (desc) first, then name length (desc), so that
// higher-similarity records are inserted first and survive LinkedHashSet dedup
hanlpMapResults = hanlpMapResults.stream()
.sorted((a, b) -> -(b.getName().length() - a.getName().length()))
.sorted(Comparator.comparingDouble(HanlpMapResult::getSimilarity).reversed()
.thenComparing((a, b) -> Integer.compare(b.getName().length(), a.getName().length())))
.collect(Collectors.toCollection(LinkedHashSet::new));
// step4. filter by similarity

View File

@@ -123,15 +123,9 @@ public class MapFilter {
.filter(SchemaElementMatch::isFullMatched).collect(Collectors.toList());
if (!fullMatches.isEmpty()) {
// If there are objects with similarity=1.0, choose the one with the longest
// detectWord and smallest offset
SchemaElementMatch bestMatch = fullMatches.stream()
.max(Comparator.comparing(
(SchemaElementMatch match) -> match.getDetectWord().length()))
.orElse(null);
if (bestMatch != null) {
result.add(bestMatch);
}
// Keep all records with similarity=1.0, as they may correspond to different
// elementIds with the same detectWord
result.addAll(fullMatches);
} else {
// If there are no objects with similarity=1.0, keep all objects with similarity<1.0
result.addAll(group);