mirror of
https://github.com/tencentmusic/supersonic.git
synced 2026-06-25 22:29:22 +08:00
fix(mapper): 优先按相似度排序并保留所有完全匹配项
This commit is contained in:
@@ -9,6 +9,7 @@ import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
@@ -50,8 +51,11 @@ public class HanlpDictMatchStrategy extends SingleMatchStrategy<HanlpMapResult>
|
||||
return new ArrayList<>();
|
||||
}
|
||||
// step3. merge pre/suffix result
|
||||
// sort by similarity (desc) first, then name length (desc), so that
|
||||
// higher-similarity records are inserted first and survive LinkedHashSet dedup
|
||||
hanlpMapResults = hanlpMapResults.stream()
|
||||
.sorted((a, b) -> -(b.getName().length() - a.getName().length()))
|
||||
.sorted(Comparator.comparingDouble(HanlpMapResult::getSimilarity).reversed()
|
||||
.thenComparing((a, b) -> Integer.compare(b.getName().length(), a.getName().length())))
|
||||
.collect(Collectors.toCollection(LinkedHashSet::new));
|
||||
|
||||
// step4. filter by similarity
|
||||
|
||||
@@ -123,15 +123,9 @@ public class MapFilter {
|
||||
.filter(SchemaElementMatch::isFullMatched).collect(Collectors.toList());
|
||||
|
||||
if (!fullMatches.isEmpty()) {
|
||||
// If there are objects with similarity=1.0, choose the one with the longest
|
||||
// detectWord and smallest offset
|
||||
SchemaElementMatch bestMatch = fullMatches.stream()
|
||||
.max(Comparator.comparing(
|
||||
(SchemaElementMatch match) -> match.getDetectWord().length()))
|
||||
.orElse(null);
|
||||
if (bestMatch != null) {
|
||||
result.add(bestMatch);
|
||||
}
|
||||
// Keep all records with similarity=1.0, as they may correspond to different
|
||||
// elementIds with the same detectWord
|
||||
result.addAll(fullMatches);
|
||||
} else {
|
||||
// If there are no objects with similarity=1.0, keep all objects with similarity<1.0
|
||||
result.addAll(group);
|
||||
|
||||
Reference in New Issue
Block a user