mirror of
https://github.com/tencentmusic/supersonic.git
synced 2026-04-19 13:04:21 +08:00
Merge 16afcbc95a into d2a43a99c8
This commit is contained in:
@@ -34,8 +34,8 @@
|
||||
</dependencies>
|
||||
|
||||
<properties>
|
||||
<maven.compiler.source>8</maven.compiler.source>
|
||||
<maven.compiler.target>8</maven.compiler.target>
|
||||
<maven.compiler.source>21</maven.compiler.source>
|
||||
<maven.compiler.target>21</maven.compiler.target>
|
||||
</properties>
|
||||
|
||||
</project>
|
||||
@@ -77,11 +77,6 @@ public class SemanticSqlConformance implements SqlConformance {
|
||||
return SqlConformanceEnum.BIG_QUERY.isMinusAllowed();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isRegexReplaceCaptureGroupDollarIndexed() {
|
||||
return SqlConformanceEnum.BIG_QUERY.isRegexReplaceCaptureGroupDollarIndexed();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isApplyAllowed() {
|
||||
return SqlConformanceEnum.BIG_QUERY.isApplyAllowed();
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
package dev.langchain4j.inmemory.spring;
|
||||
|
||||
import dev.langchain4j.model.embedding.AllMiniLmL6V2QuantizedEmbeddingModel;
|
||||
import dev.langchain4j.model.embedding.BgeSmallZhEmbeddingModel;
|
||||
import dev.langchain4j.model.embedding.EmbeddingModel;
|
||||
import dev.langchain4j.model.embedding.S2OnnxEmbeddingModel;
|
||||
import dev.langchain4j.model.embedding.onnx.allminilml6v2q.AllMiniLmL6V2QuantizedEmbeddingModel;
|
||||
import dev.langchain4j.model.embedding.onnx.bgesmallzh.BgeSmallZhEmbeddingModel;
|
||||
import dev.langchain4j.provider.EmbeddingModelConstant;
|
||||
import dev.langchain4j.store.embedding.EmbeddingStoreFactory;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
package dev.langchain4j.model.embedding;
|
||||
|
||||
import dev.langchain4j.model.embedding.onnx.AbstractInProcessEmbeddingModel;
|
||||
import dev.langchain4j.model.embedding.onnx.OnnxBertBiEncoder;
|
||||
import dev.langchain4j.model.embedding.onnx.PoolingMode;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
@@ -9,6 +12,7 @@ import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.Objects;
|
||||
import java.util.concurrent.Executors;
|
||||
|
||||
/**
|
||||
* An embedding model that runs within your Java application's process. Any BERT-based model (e.g.,
|
||||
@@ -25,6 +29,7 @@ public class S2OnnxEmbeddingModel extends AbstractInProcessEmbeddingModel {
|
||||
private static volatile String cachedVocabularyPath;
|
||||
|
||||
public S2OnnxEmbeddingModel(String pathToModel, String vocabularyPath) {
|
||||
super(Executors.newSingleThreadExecutor());
|
||||
if (shouldReloadModel(pathToModel, vocabularyPath)) {
|
||||
synchronized (S2OnnxEmbeddingModel.class) {
|
||||
if (shouldReloadModel(pathToModel, vocabularyPath)) {
|
||||
@@ -61,7 +66,7 @@ public class S2OnnxEmbeddingModel extends AbstractInProcessEmbeddingModel {
|
||||
|
||||
static OnnxBertBiEncoder loadFromFileSystem(Path pathToModel, URL vocabularyFile) {
|
||||
try {
|
||||
return new OnnxBertBiEncoder(Files.newInputStream(pathToModel), vocabularyFile,
|
||||
return new OnnxBertBiEncoder(Files.newInputStream(pathToModel), vocabularyFile.openStream(),
|
||||
PoolingMode.MEAN);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
package dev.langchain4j.provider;
|
||||
|
||||
import dev.langchain4j.model.embedding.AllMiniLmL6V2QuantizedEmbeddingModel;
|
||||
import dev.langchain4j.model.embedding.BgeSmallZhEmbeddingModel;
|
||||
import dev.langchain4j.model.embedding.EmbeddingModel;
|
||||
import dev.langchain4j.model.embedding.onnx.allminilml6v2q.AllMiniLmL6V2QuantizedEmbeddingModel;
|
||||
import dev.langchain4j.model.embedding.onnx.bgesmallzh.BgeSmallZhEmbeddingModel;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
@Service
|
||||
|
||||
@@ -57,6 +57,7 @@ public class MilvusEmbeddingStore implements EmbeddingStore<TextSegment> {
|
||||
private final ConsistencyLevelEnum consistencyLevel;
|
||||
private final boolean retrieveEmbeddingsOnSearch;
|
||||
private final boolean autoFlushOnInsert;
|
||||
private final FieldDefinition fieldDefinition;
|
||||
|
||||
public MilvusEmbeddingStore(String host, Integer port, String collectionName, Integer dimension,
|
||||
IndexType indexType, MetricType metricType, String uri, String token, String username,
|
||||
@@ -78,11 +79,15 @@ public class MilvusEmbeddingStore implements EmbeddingStore<TextSegment> {
|
||||
this.retrieveEmbeddingsOnSearch = getOrDefault(retrieveEmbeddingsOnSearch, false);
|
||||
this.autoFlushOnInsert = getOrDefault(autoFlushOnInsert, false);
|
||||
|
||||
// Define the field structure for the collection
|
||||
this.fieldDefinition = new FieldDefinition(ID_FIELD_NAME, TEXT_FIELD_NAME,
|
||||
METADATA_FIELD_NAME, VECTOR_FIELD_NAME);
|
||||
|
||||
if (!hasCollection(this.milvusClient, this.collectionName)) {
|
||||
createCollection(this.milvusClient, this.collectionName,
|
||||
ensureNotNull(dimension, "dimension"));
|
||||
createIndex(this.milvusClient, this.collectionName, getOrDefault(indexType, FLAT),
|
||||
this.metricType);
|
||||
createCollection(this.milvusClient, this.collectionName, fieldDefinition,
|
||||
ensureNotNull(dimension, "dimension"));
|
||||
createIndex(this.milvusClient, this.collectionName, VECTOR_FIELD_NAME,
|
||||
getOrDefault(indexType, FLAT), this.metricType);
|
||||
}
|
||||
|
||||
loadCollectionInMemory(this.milvusClient, collectionName);
|
||||
@@ -128,7 +133,7 @@ public class MilvusEmbeddingStore implements EmbeddingStore<TextSegment> {
|
||||
public EmbeddingSearchResult<TextSegment> search(
|
||||
EmbeddingSearchRequest embeddingSearchRequest) {
|
||||
|
||||
SearchParam searchParam = buildSearchRequest(collectionName,
|
||||
SearchParam searchParam = buildSearchRequest(collectionName, fieldDefinition,
|
||||
embeddingSearchRequest.queryEmbedding().vectorAsList(),
|
||||
embeddingSearchRequest.filter(), embeddingSearchRequest.maxResults(), metricType,
|
||||
consistencyLevel);
|
||||
@@ -137,7 +142,7 @@ public class MilvusEmbeddingStore implements EmbeddingStore<TextSegment> {
|
||||
CollectionOperationsExecutor.search(milvusClient, searchParam);
|
||||
|
||||
List<EmbeddingMatch<TextSegment>> matches = toEmbeddingMatches(milvusClient, resultsWrapper,
|
||||
collectionName, consistencyLevel, retrieveEmbeddingsOnSearch);
|
||||
collectionName, fieldDefinition, consistencyLevel, retrieveEmbeddingsOnSearch);
|
||||
|
||||
List<EmbeddingMatch<TextSegment>> result =
|
||||
matches.stream().filter(match -> match.score() >= embeddingSearchRequest.minScore())
|
||||
@@ -226,7 +231,7 @@ public class MilvusEmbeddingStore implements EmbeddingStore<TextSegment> {
|
||||
@Override
|
||||
public void removeAll(Filter filter) {
|
||||
ensureNotNull(filter, "filter");
|
||||
removeForVector(this.milvusClient, this.collectionName, map(filter));
|
||||
removeForVector(this.milvusClient, this.collectionName, map(filter, METADATA_FIELD_NAME));
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -64,7 +64,7 @@
|
||||
<dependency>
|
||||
<groupId>com.alibaba</groupId>
|
||||
<artifactId>transmittable-thread-local</artifactId>
|
||||
<version>${transmittable.thread.local.version}</version>
|
||||
<version>${transmittable.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.aspectj</groupId>
|
||||
|
||||
@@ -44,10 +44,8 @@ public class MetaEmbeddingTask implements CommandLineRunner {
|
||||
|
||||
private void embeddingStorePersistFile() {
|
||||
EmbeddingStoreFactory embeddingStoreFactory = EmbeddingStoreFactoryProvider.getFactory();
|
||||
if (embeddingStoreFactory instanceof InMemoryEmbeddingStoreFactory) {
|
||||
if (embeddingStoreFactory instanceof InMemoryEmbeddingStoreFactory inMemoryFactory) {
|
||||
long startTime = System.currentTimeMillis();
|
||||
InMemoryEmbeddingStoreFactory inMemoryFactory =
|
||||
(InMemoryEmbeddingStoreFactory) embeddingStoreFactory;
|
||||
inMemoryFactory.persistFile();
|
||||
long duration = System.currentTimeMillis() - startTime;
|
||||
log.info("Embedding file has been regularly persisted in {} milliseconds", duration);
|
||||
|
||||
@@ -60,7 +60,7 @@
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-jar-plugin</artifactId>
|
||||
<version>2.4</version>
|
||||
<version>3.4.2</version>
|
||||
<configuration>
|
||||
<excludes>
|
||||
<exclude>*.*</exclude>
|
||||
@@ -70,7 +70,7 @@
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-assembly-plugin</artifactId>
|
||||
<version>2.4</version>
|
||||
<version>3.7.1</version>
|
||||
<configuration>
|
||||
<tarLongFileMode>gnu</tarLongFileMode>
|
||||
<skipAssembly>false</skipAssembly>
|
||||
|
||||
@@ -12,8 +12,8 @@
|
||||
<artifactId>launchers-headless</artifactId>
|
||||
|
||||
<properties>
|
||||
<maven.compiler.source>8</maven.compiler.source>
|
||||
<maven.compiler.target>8</maven.compiler.target>
|
||||
<maven.compiler.source>21</maven.compiler.source>
|
||||
<maven.compiler.target>21</maven.compiler.target>
|
||||
<start-class>com.tencent.supersonic.HeadlessLauncher</start-class>
|
||||
</properties>
|
||||
|
||||
@@ -71,7 +71,7 @@
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-jar-plugin</artifactId>
|
||||
<version>2.4</version>
|
||||
<version>3.4.2</version>
|
||||
<configuration>
|
||||
<excludes>
|
||||
<exclude>*.*</exclude>
|
||||
@@ -81,7 +81,7 @@
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-assembly-plugin</artifactId>
|
||||
<version>2.4</version>
|
||||
<version>3.7.1</version>
|
||||
<configuration>
|
||||
<tarLongFileMode>gnu</tarLongFileMode>
|
||||
<skipAssembly>false</skipAssembly>
|
||||
|
||||
@@ -20,7 +20,7 @@
|
||||
</modules>
|
||||
|
||||
<properties>
|
||||
<maven.compiler.source>8</maven.compiler.source>
|
||||
<maven.compiler.target>8</maven.compiler.target>
|
||||
<maven.compiler.source>21</maven.compiler.source>
|
||||
<maven.compiler.target>21</maven.compiler.target>
|
||||
</properties>
|
||||
</project>
|
||||
|
||||
@@ -55,8 +55,8 @@
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>mysql</groupId>
|
||||
<artifactId>mysql-connector-java</artifactId>
|
||||
<groupId>com.mysql</groupId>
|
||||
<artifactId>mysql-connector-j</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
@@ -149,7 +149,7 @@
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-jar-plugin</artifactId>
|
||||
<version>2.4</version>
|
||||
<version>3.4.2</version>
|
||||
<configuration>
|
||||
<excludes>
|
||||
<exclude>*.*</exclude>
|
||||
@@ -159,7 +159,7 @@
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-assembly-plugin</artifactId>
|
||||
<version>2.4</version>
|
||||
<version>3.7.1</version>
|
||||
<configuration>
|
||||
<tarLongFileMode>gnu</tarLongFileMode>
|
||||
<skipAssembly>false</skipAssembly>
|
||||
|
||||
43
pom.xml
43
pom.xml
@@ -22,7 +22,7 @@
|
||||
<parent>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-parent</artifactId>
|
||||
<version>3.2.4</version>
|
||||
<version>3.3.9</version>
|
||||
</parent>
|
||||
|
||||
<properties>
|
||||
@@ -32,55 +32,54 @@
|
||||
<maven.compiler.source>21</maven.compiler.source>
|
||||
<maven.compiler.target>21</maven.compiler.target>
|
||||
<file.encoding>UTF-8</file.encoding>
|
||||
<jsqlparser.version>4.7</jsqlparser.version>
|
||||
<jsqlparser.version>4.9</jsqlparser.version>
|
||||
<pagehelper.version>6.1.0</pagehelper.version>
|
||||
<pagehelper.spring.version>2.1.0</pagehelper.spring.version>
|
||||
<mybatis.version>3.5.3</mybatis.version>
|
||||
<mybatis.version>3.5.19</mybatis.version>
|
||||
<guava.version>32.0.0-jre</guava.version>
|
||||
<hanlp.version>portable-1.8.3</hanlp.version>
|
||||
<hanlp.version>portable-1.8.4</hanlp.version>
|
||||
<hadoop.version>2.7.2</hadoop.version>
|
||||
<commons.lang.version>2.6</commons.lang.version>
|
||||
<commons.lang3.version>3.7</commons.lang3.version>
|
||||
<org.testng.version>6.13.1</org.testng.version>
|
||||
<yaml.utils.version>2.14.1</yaml.utils.version>
|
||||
<transmittable.thread.local.version>2.12.1</transmittable.thread.local.version>
|
||||
<jjwt.version>0.12.3</jjwt.version>
|
||||
<jjwt.version>0.12.6</jjwt.version>
|
||||
<alibaba.druid.version>1.2.24</alibaba.druid.version>
|
||||
<mysql.connector.java.version>5.1.46</mysql.connector.java.version>
|
||||
<mysql.connector.java.version>9.2.0</mysql.connector.java.version>
|
||||
<kyuubi.version>1.10.1</kyuubi.version>
|
||||
<presto.version>0.291</presto.version>
|
||||
<trino.version>471</trino.version>
|
||||
<mybatis.plus.version>3.5.7</mybatis.plus.version>
|
||||
<httpclient5.version>5.4.1</httpclient5.version>
|
||||
<mybatis.plus.version>3.5.10.1</mybatis.plus.version>
|
||||
<httpclient5.version>5.4.2</httpclient5.version>
|
||||
<!-- <httpcore.version>4.4.16</httpcore.version>-->
|
||||
<httpcore5.version>5.3.1</httpcore5.version>
|
||||
<httpcore5.version>5.3.3</httpcore5.version>
|
||||
<clickhouse.jdbc.version>0.4.6</clickhouse.jdbc.version>
|
||||
<fastjson.version>2.0.40</fastjson.version>
|
||||
<fastjson.version>2.0.56</fastjson.version>
|
||||
<dozer.verson>7.0.0</dozer.verson>
|
||||
<!-- <httpmime.version>4.5.6</httpmime.version>-->
|
||||
<transmittable.version>2.12.1</transmittable.version>
|
||||
<commons.compress.version>1.26.0</commons.compress.version>
|
||||
<transmittable.version>2.14.5</transmittable.version>
|
||||
<commons.compress.version>1.27.1</commons.compress.version>
|
||||
<jetty.util.version>6.1.26</jetty.util.version>
|
||||
<!--<spring.version>2.7.2</spring.version>-->
|
||||
<jsonpath.version>2.8.0</jsonpath.version>
|
||||
<calcite.version>1.37.0</calcite.version>
|
||||
<calcite.avatica.version>1.23.0</calcite.avatica.version>
|
||||
<calcite.version>1.38.0</calcite.version>
|
||||
<calcite.avatica.version>1.26.0</calcite.avatica.version>
|
||||
<xk.time.version>3.2.4</xk.time.version>
|
||||
<mockito-inline.version>4.5.1</mockito-inline.version>
|
||||
<easyexcel.version>2.2.6</easyexcel.version>
|
||||
<easyexcel.version>2.2.11</easyexcel.version>
|
||||
<poi.version>3.17</poi.version>
|
||||
<langchain4j.version>0.35.0</langchain4j.version>
|
||||
<langchain4j.embedding.version>0.27.1</langchain4j.embedding.version>
|
||||
<langchain4j.version>0.36.2</langchain4j.version>
|
||||
<langchain4j.embedding.version>0.36.2</langchain4j.embedding.version>
|
||||
<!-- <postgresql.version>42.7.1</postgresql.version>-->
|
||||
<st.version>4.0.8</st.version>
|
||||
<duckdb_jdbc.version>0.10.0</duckdb_jdbc.version>
|
||||
<flight-sql.version>15.0.2</flight-sql.version>
|
||||
<arrow-jdbc.version>15.0.2</arrow-jdbc.version>
|
||||
<flight-sql-jdbc-driver.version>15.0.2</flight-sql-jdbc-driver.version>
|
||||
<gson.version>2.10.1</gson.version>
|
||||
<gson.version>2.12.1</gson.version>
|
||||
<spotless.version>2.27.1</spotless.version>
|
||||
<spotless.skip>false</spotless.skip>
|
||||
<stax2.version>4.2.1</stax2.version>
|
||||
<stax2.version>4.2.2</stax2.version>
|
||||
<aws-java-sdk.version>1.12.780</aws-java-sdk.version>
|
||||
<jgrapht.version>1.5.2</jgrapht.version>
|
||||
</properties>
|
||||
@@ -208,8 +207,8 @@
|
||||
<version>${stax2.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>mysql</groupId>
|
||||
<artifactId>mysql-connector-java</artifactId>
|
||||
<groupId>com.mysql</groupId>
|
||||
<artifactId>mysql-connector-j</artifactId>
|
||||
<version>${mysql.connector.java.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
|
||||
Reference in New Issue
Block a user