修复同一模型被多个数据集引用时:

1.浅拷贝导致meta中map的元数据被更改,新对象通过copy后因数据集key已存在导致其余数据集id未能正确创建。
2.因为copy相似度值相等,小于判断会导致不同数据集id的对象被移除,改为小于等于防止不同数据集Id的EmbeddingResult被移除。
This commit is contained in:
iridescentpeo
2025-03-21 15:13:15 +08:00
parent 35b835172b
commit 313718fa6a
3 changed files with 9 additions and 1 deletions

View File

@@ -22,4 +22,9 @@ public abstract class MapResult implements Serializable {
return this.getMapKey().equals(otherResult.getMapKey())
&& this.similarity < otherResult.similarity;
}
public Boolean lessOrEqualSimilar(MapResult otherResult) {
return this.getMapKey().equals(otherResult.getMapKey())
&& this.similarity <= otherResult.similarity;
}
}

View File

@@ -75,6 +75,8 @@ public class MetaEmbeddingService {
return dataSetIds.stream().map(dataSetId -> {
Retrieval newRetrieval = new Retrieval();
BeanUtils.copyProperties(retrieval, newRetrieval);
HashMap<String, Object> newMetadata = new HashMap<>(retrieval.getMetadata());
newRetrieval.setMetadata(newMetadata);
newRetrieval.getMetadata().putIfAbsent("dataSetId",
dataSetId + Constants.UNDERLINE);
return newRetrieval;

View File

@@ -56,7 +56,8 @@ public abstract class BaseMatchStrategy<T extends MapResult> implements MatchStr
for (T oneRoundResult : oneRoundResults) {
if (existResults.contains(oneRoundResult)) {
boolean isDeleted = existResults.removeIf(existResult -> {
boolean delete = existResult.lessSimilar(oneRoundResult);
// boolean delete = existResult.lessSimilar(oneRoundResult);
boolean delete = existResult.lessOrEqualSimilar(oneRoundResult);
if (delete) {
log.debug("deleted existResult:{}", existResult);
}