[improvement]Use QueryWrapper in place of hard-coded SQLs (#1944)
Some checks are pending
supersonic CentOS CI / build (11) (push) Waiting to run
supersonic CentOS CI / build (21) (push) Waiting to run
supersonic CentOS CI / build (8) (push) Waiting to run
supersonic mac CI / build (11) (push) Waiting to run
supersonic mac CI / build (21) (push) Waiting to run
supersonic mac CI / build (8) (push) Waiting to run
supersonic ubuntu CI / build (11) (push) Waiting to run
supersonic ubuntu CI / build (21) (push) Waiting to run
supersonic ubuntu CI / build (8) (push) Waiting to run
supersonic windows CI / build (11) (push) Waiting to run
supersonic windows CI / build (21) (push) Waiting to run
supersonic windows CI / build (8) (push) Waiting to run

* [improvement][launcher]Use API to get element ID avoiding hard-code.

* [fix][launcher]Fix mysql scripts.

* [improvement][launcher]Support DuckDB database and refactor translator code structure.

* [improvement][headless-fe] Revamped the interaction for semantic modeling routing and successfully implemented the switching between dimension and dataset management.

* [improvement][Headless] Add table ddl in Dbschema

* [improvement][Headless] Add get database by type

* [improvement][Headless] Supports automatic batch creation of models based on db table names.

* [improvement][Headless] Supports getting domain by bizName

* [improvement][launcher]Refactor unit tests and demo data.

* [fix][launcher]Change default vector dimension to 512.

* [improvement](Dict) add dimValueAliasMap info for KnowledgeBaseService

* [improvement][headless]Use QueryWrapper to replace hard-code SQL in mapper xml.

* [improvement][chat]Introduce ChatMemory to delegate ChatMemoryDO.

* [fix][common]Fix embedding store sys configs.

* [fix][common]Fix postgres schema, using varchar instead of char.

* [improvement][launcher]Change supersonic docker deployment from mysql to postgres.

* [Fix][launcher]Fix a number of issues related to semantic modeling.

* [Fix][headless]Fix the evaluation logic of agg type.

* [fix][assembly]Fix Dockerfile and add docker compose run script.

* [fix][chat]Fix "multiple assignments to same column "similar_queries".

* [improvement][headless]Use LamdaQueryWrapper to avoid hard-coded column names.

* [improvement][headless]Refactor headless infra to support advanced semantic modelling.

* [improvement][headless]Change class name `Dim` to `Dimension`.

* [improvement][chat]Introduce `TimeFieldMapper` to always map time field.

* [fix][headless]Remove unnecessary dimension existence check.

* [fix][chat]Fix adjusted filters don't take effect.

---------
This commit is contained in:
Jun Zhang
2024-12-08 13:32:29 +08:00
committed by GitHub
parent 0fc29304a8
commit e55f43c737
120 changed files with 844 additions and 5810 deletions

View File

@@ -26,7 +26,7 @@ public class PgvectorEmbeddingStoreFactory extends BaseEmbeddingStoreFactory {
embeddingStore.setPort(storeConfig.getPost());
embeddingStore.setDatabase(storeConfig.getDatabaseName());
embeddingStore.setUser(storeConfig.getUser());
embeddingStore.setPassword(storeConfig.getApiKey());
embeddingStore.setPassword(storeConfig.getPassword());
return embeddingStore;
}

View File

@@ -1,6 +1,7 @@
package dev.langchain4j.store.embedding;
import com.alibaba.fastjson.JSONObject;
import com.tencent.supersonic.common.pojo.Constants;
import com.tencent.supersonic.common.pojo.DataItem;
import dev.langchain4j.data.document.Metadata;
import dev.langchain4j.data.segment.TextSegment;
@@ -17,10 +18,18 @@ public class TextSegmentConvert {
public static final String QUERY_ID = "queryId";
public static List<TextSegment> convertToEmbedding(List<DataItem> dataItems) {
return dataItems.stream().map(dataItem -> {
Map meta = JSONObject.parseObject(JSONObject.toJSONString(dataItem), Map.class);
TextSegment textSegment = TextSegment.from(dataItem.getName(), new Metadata(meta));
addQueryId(textSegment, dataItem.getId() + dataItem.getType().name().toLowerCase());
return dataItems.stream().map(item -> {
// suffix with underscore to avoid embedding issue
DataItem newItem = DataItem.builder().domainId(item.getDomainId())
.bizName(item.getBizName()).type(item.getType()).newName(item.getNewName())
.defaultAgg(item.getDefaultAgg()).name(item.getName())
.id(item.getId() + Constants.UNDERLINE)
.modelId(item.getModelId() + Constants.UNDERLINE)
.domainId(item.getDomainId() + Constants.UNDERLINE).build();
Map meta = JSONObject.parseObject(JSONObject.toJSONString(newItem), Map.class);
TextSegment textSegment = TextSegment.from(newItem.getName(), new Metadata(meta));
addQueryId(textSegment, newItem.getId() + newItem.getType().name().toLowerCase());
return textSegment;
}).collect(Collectors.toList());
}