mirror of
https://github.com/tencentmusic/supersonic.git
synced 2026-04-19 13:04:21 +08:00
修复同一模型被多个数据集引用时:
1.浅拷贝导致meta中map的元数据被更改,新对象通过copy后因数据集key已存在导致其余数据集id未能正确创建。 2.因为copy相似度值相等,小于判断会导致不同数据集id的对象被移除,改为小于等于防止不同数据集Id的EmbeddingResult被移除。
This commit is contained in:
@@ -22,4 +22,9 @@ public abstract class MapResult implements Serializable {
|
||||
return this.getMapKey().equals(otherResult.getMapKey())
|
||||
&& this.similarity < otherResult.similarity;
|
||||
}
|
||||
|
||||
public Boolean lessOrEqualSimilar(MapResult otherResult) {
|
||||
return this.getMapKey().equals(otherResult.getMapKey())
|
||||
&& this.similarity <= otherResult.similarity;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -75,6 +75,8 @@ public class MetaEmbeddingService {
|
||||
return dataSetIds.stream().map(dataSetId -> {
|
||||
Retrieval newRetrieval = new Retrieval();
|
||||
BeanUtils.copyProperties(retrieval, newRetrieval);
|
||||
HashMap<String, Object> newMetadata = new HashMap<>(retrieval.getMetadata());
|
||||
newRetrieval.setMetadata(newMetadata);
|
||||
newRetrieval.getMetadata().putIfAbsent("dataSetId",
|
||||
dataSetId + Constants.UNDERLINE);
|
||||
return newRetrieval;
|
||||
|
||||
@@ -56,7 +56,8 @@ public abstract class BaseMatchStrategy<T extends MapResult> implements MatchStr
|
||||
for (T oneRoundResult : oneRoundResults) {
|
||||
if (existResults.contains(oneRoundResult)) {
|
||||
boolean isDeleted = existResults.removeIf(existResult -> {
|
||||
boolean delete = existResult.lessSimilar(oneRoundResult);
|
||||
// boolean delete = existResult.lessSimilar(oneRoundResult);
|
||||
boolean delete = existResult.lessOrEqualSimilar(oneRoundResult);
|
||||
if (delete) {
|
||||
log.debug("deleted existResult:{}", existResult);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user