mirror of
https://github.com/tencentmusic/supersonic.git
synced 2025-12-14 13:47:09 +00:00
[improvement][headless]Refactor headless infra to support advanced semantic modelling.
This commit is contained in:
@@ -19,19 +19,6 @@ public class Term {
|
||||
this.nature = nature;
|
||||
}
|
||||
|
||||
public Term(String word, Nature nature, int offset) {
|
||||
this.word = word;
|
||||
this.nature = nature;
|
||||
this.offset = offset;
|
||||
}
|
||||
|
||||
public Term(String word, Nature nature, int offset, int frequency) {
|
||||
this.word = word;
|
||||
this.nature = nature;
|
||||
this.offset = offset;
|
||||
this.frequency = frequency;
|
||||
}
|
||||
|
||||
public int length() {
|
||||
return this.word.length();
|
||||
}
|
||||
|
||||
@@ -8,7 +8,6 @@ import lombok.Data;
|
||||
@Builder
|
||||
public class DataItem {
|
||||
|
||||
/** * This field uses an underscore (_) at the end. */
|
||||
private String id;
|
||||
|
||||
private String bizName;
|
||||
@@ -19,9 +18,10 @@ public class DataItem {
|
||||
|
||||
private TypeEnums type;
|
||||
|
||||
/** * This field uses an underscore (_) at the end. */
|
||||
private String modelId;
|
||||
|
||||
private String domainId;
|
||||
|
||||
private String defaultAgg;
|
||||
|
||||
public String getNewName() {
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
package com.tencent.supersonic.common.pojo;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import lombok.Data;
|
||||
|
||||
import java.util.List;
|
||||
@@ -18,5 +19,5 @@ public class ModelRela extends RecordInfo {
|
||||
// left join, inner join, right join, outer join
|
||||
private String joinType;
|
||||
|
||||
private List<JoinCondition> joinConditions;
|
||||
private List<JoinCondition> joinConditions = Lists.newArrayList();
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package dev.langchain4j.store.embedding;
|
||||
|
||||
import com.alibaba.fastjson.JSONObject;
|
||||
import com.tencent.supersonic.common.pojo.Constants;
|
||||
import com.tencent.supersonic.common.pojo.DataItem;
|
||||
import dev.langchain4j.data.document.Metadata;
|
||||
import dev.langchain4j.data.segment.TextSegment;
|
||||
@@ -17,10 +18,18 @@ public class TextSegmentConvert {
|
||||
public static final String QUERY_ID = "queryId";
|
||||
|
||||
public static List<TextSegment> convertToEmbedding(List<DataItem> dataItems) {
|
||||
return dataItems.stream().map(dataItem -> {
|
||||
Map meta = JSONObject.parseObject(JSONObject.toJSONString(dataItem), Map.class);
|
||||
TextSegment textSegment = TextSegment.from(dataItem.getName(), new Metadata(meta));
|
||||
addQueryId(textSegment, dataItem.getId() + dataItem.getType().name().toLowerCase());
|
||||
return dataItems.stream().map(item -> {
|
||||
// suffix with underscore to avoid embedding issue
|
||||
DataItem newItem = DataItem.builder().domainId(item.getDomainId())
|
||||
.bizName(item.getBizName()).type(item.getType()).newName(item.getNewName())
|
||||
.defaultAgg(item.getDefaultAgg()).name(item.getName())
|
||||
.id(item.getId() + Constants.UNDERLINE)
|
||||
.modelId(item.getModelId() + Constants.UNDERLINE)
|
||||
.domainId(item.getDomainId() + Constants.UNDERLINE).build();
|
||||
|
||||
Map meta = JSONObject.parseObject(JSONObject.toJSONString(newItem), Map.class);
|
||||
TextSegment textSegment = TextSegment.from(newItem.getName(), new Metadata(meta));
|
||||
addQueryId(textSegment, newItem.getId() + newItem.getType().name().toLowerCase());
|
||||
return textSegment;
|
||||
}).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user