(improvement)(headless)Introduce headless-chat. #1155

This commit is contained in:
jerryjzhang
2024-06-15 18:56:39 +08:00
parent f2a12e56b7
commit c3ecc05715
158 changed files with 771 additions and 692 deletions

121
headless/chat/pom.xml Normal file
View File

@@ -0,0 +1,121 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.tencent.supersonic</groupId>
<artifactId>headless</artifactId>
<version>${revision}</version>
</parent>
<artifactId>headless-chat</artifactId>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>${lombok.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-context</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>com.tencent.supersonic</groupId>
<artifactId>headless-api</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.github.xkzhangsan</groupId>
<artifactId>xk-time</artifactId>
<version>${xk.time.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.curator</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<groupId>javax.servlet</groupId>
<artifactId>servlet-api</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>${hadoop.version}</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.curator</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<groupId>javax.servlet</groupId>
<artifactId>servlet-api</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
</project>

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.headless.core.pojo;
package com.tencent.supersonic.headless.chat;
import com.tencent.supersonic.headless.api.pojo.SemanticParseInfo;

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.headless.core.pojo;
package com.tencent.supersonic.headless.chat;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.tencent.supersonic.auth.api.authentication.pojo.User;
@@ -11,8 +11,8 @@ import com.tencent.supersonic.headless.api.pojo.SemanticSchema;
import com.tencent.supersonic.headless.api.pojo.enums.MapModeEnum;
import com.tencent.supersonic.headless.api.pojo.enums.WorkflowState;
import com.tencent.supersonic.headless.api.pojo.request.QueryFilters;
import com.tencent.supersonic.headless.core.chat.query.SemanticQuery;
import com.tencent.supersonic.headless.core.config.ParserConfig;
import com.tencent.supersonic.headless.chat.parser.ParserConfig;
import com.tencent.supersonic.headless.chat.query.SemanticQuery;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
@@ -26,8 +26,6 @@ import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import static com.tencent.supersonic.headless.core.config.ParserConfig.PARSER_SHOW_COUNT;
@Data
@Builder
@NoArgsConstructor
@@ -54,7 +52,7 @@ public class QueryContext {
public List<SemanticQuery> getCandidateQueries() {
ParserConfig parserConfig = ContextUtils.getBean(ParserConfig.class);
int parseShowCount = Integer.valueOf(parserConfig.getParameterValue(PARSER_SHOW_COUNT));
int parseShowCount = Integer.valueOf(parserConfig.getParameterValue(ParserConfig.PARSER_SHOW_COUNT));
candidateQueries = candidateQueries.stream()
.sorted(Comparator.comparing(semanticQuery -> semanticQuery.getParseInfo().getScore(),
Comparator.reverseOrder()))

View File

@@ -1,9 +1,9 @@
package com.tencent.supersonic.headless.core.chat.corrector;
package com.tencent.supersonic.headless.chat.corrector;
import com.tencent.supersonic.common.util.jsqlparser.SqlSelectHelper;
import com.tencent.supersonic.headless.api.pojo.SemanticParseInfo;
import com.tencent.supersonic.headless.core.pojo.QueryContext;
import com.tencent.supersonic.headless.chat.QueryContext;
import lombok.extern.slf4j.Slf4j;
import org.springframework.util.CollectionUtils;

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.headless.core.chat.corrector;
package com.tencent.supersonic.headless.chat.corrector;
import com.tencent.supersonic.common.pojo.enums.AggregateTypeEnum;
import com.tencent.supersonic.common.pojo.enums.TimeDimensionEnum;
@@ -8,7 +8,7 @@ import com.tencent.supersonic.common.util.jsqlparser.SqlSelectHelper;
import com.tencent.supersonic.headless.api.pojo.SchemaElement;
import com.tencent.supersonic.headless.api.pojo.SemanticParseInfo;
import com.tencent.supersonic.headless.api.pojo.SemanticSchema;
import com.tencent.supersonic.headless.core.pojo.QueryContext;
import com.tencent.supersonic.headless.chat.QueryContext;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Pair;

View File

@@ -1,8 +1,8 @@
package com.tencent.supersonic.headless.core.chat.corrector;
package com.tencent.supersonic.headless.chat.corrector;
import com.tencent.supersonic.common.util.jsqlparser.SqlRemoveHelper;
import com.tencent.supersonic.headless.api.pojo.SemanticParseInfo;
import com.tencent.supersonic.headless.core.pojo.QueryContext;
import com.tencent.supersonic.headless.chat.QueryContext;
import lombok.extern.slf4j.Slf4j;
import java.util.ArrayList;

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.headless.core.chat.corrector;
package com.tencent.supersonic.headless.chat.corrector;
import com.tencent.supersonic.common.pojo.enums.TimeDimensionEnum;
import com.tencent.supersonic.common.util.ContextUtils;
@@ -7,7 +7,7 @@ import com.tencent.supersonic.common.util.jsqlparser.SqlSelectHelper;
import com.tencent.supersonic.headless.api.pojo.SemanticParseInfo;
import com.tencent.supersonic.headless.api.pojo.SemanticSchema;
import com.tencent.supersonic.headless.api.pojo.SqlInfo;
import com.tencent.supersonic.headless.core.pojo.QueryContext;
import com.tencent.supersonic.headless.chat.QueryContext;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.springframework.core.env.Environment;

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.headless.core.chat.corrector;
package com.tencent.supersonic.headless.chat.corrector;
import com.tencent.supersonic.common.util.ContextUtils;
import com.tencent.supersonic.common.util.jsqlparser.SqlAddHelper;
@@ -6,7 +6,7 @@ import com.tencent.supersonic.common.util.jsqlparser.SqlSelectFunctionHelper;
import com.tencent.supersonic.common.util.jsqlparser.SqlSelectHelper;
import com.tencent.supersonic.headless.api.pojo.SemanticParseInfo;
import com.tencent.supersonic.headless.api.pojo.SemanticSchema;
import com.tencent.supersonic.headless.core.pojo.QueryContext;
import com.tencent.supersonic.headless.chat.QueryContext;
import lombok.extern.slf4j.Slf4j;
import net.sf.jsqlparser.expression.Expression;
import org.apache.commons.lang3.StringUtils;

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.headless.core.chat.corrector;
package com.tencent.supersonic.headless.chat.corrector;
import com.tencent.supersonic.common.pojo.Constants;
import com.tencent.supersonic.common.pojo.enums.FilterOperatorEnum;
@@ -13,9 +13,9 @@ import com.tencent.supersonic.common.util.jsqlparser.SqlSelectHelper;
import com.tencent.supersonic.headless.api.pojo.SemanticParseInfo;
import com.tencent.supersonic.headless.api.pojo.SemanticSchema;
import com.tencent.supersonic.headless.api.pojo.SqlInfo;
import com.tencent.supersonic.headless.core.chat.parser.llm.ParseResult;
import com.tencent.supersonic.headless.core.pojo.QueryContext;
import com.tencent.supersonic.headless.core.chat.query.llm.s2sql.LLMReq.ElementValue;
import com.tencent.supersonic.headless.chat.QueryContext;
import com.tencent.supersonic.headless.chat.query.llm.s2sql.LLMReq;
import com.tencent.supersonic.headless.chat.parser.llm.ParseResult;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.springframework.util.CollectionUtils;
@@ -68,14 +68,14 @@ public class SchemaCorrector extends BaseSemanticCorrector {
}
private void updateFieldNameByLinkingValue(SemanticParseInfo semanticParseInfo) {
List<ElementValue> linking = getLinkingValues(semanticParseInfo);
List<LLMReq.ElementValue> linking = getLinkingValues(semanticParseInfo);
if (CollectionUtils.isEmpty(linking)) {
return;
}
Map<String, Set<String>> fieldValueToFieldNames = linking.stream().collect(
Collectors.groupingBy(ElementValue::getFieldValue,
Collectors.mapping(ElementValue::getFieldName, Collectors.toSet())));
Collectors.groupingBy(LLMReq.ElementValue::getFieldValue,
Collectors.mapping(LLMReq.ElementValue::getFieldName, Collectors.toSet())));
SqlInfo sqlInfo = semanticParseInfo.getSqlInfo();
@@ -83,7 +83,7 @@ public class SchemaCorrector extends BaseSemanticCorrector {
sqlInfo.setCorrectS2SQL(sql);
}
private List<ElementValue> getLinkingValues(SemanticParseInfo semanticParseInfo) {
private List<LLMReq.ElementValue> getLinkingValues(SemanticParseInfo semanticParseInfo) {
Object context = semanticParseInfo.getProperties().get(Constants.CONTEXT);
if (Objects.isNull(context)) {
return null;
@@ -97,14 +97,14 @@ public class SchemaCorrector extends BaseSemanticCorrector {
}
private void updateFieldValueByLinkingValue(SemanticParseInfo semanticParseInfo) {
List<ElementValue> linking = getLinkingValues(semanticParseInfo);
List<LLMReq.ElementValue> linking = getLinkingValues(semanticParseInfo);
if (CollectionUtils.isEmpty(linking)) {
return;
}
Map<String, Map<String, String>> filedNameToValueMap = linking.stream().collect(
Collectors.groupingBy(ElementValue::getFieldName,
Collectors.mapping(ElementValue::getFieldValue, Collectors.toMap(
Collectors.groupingBy(LLMReq.ElementValue::getFieldName,
Collectors.mapping(LLMReq.ElementValue::getFieldValue, Collectors.toMap(
oldValue -> oldValue,
newValue -> newValue,
(existingValue, newValue) -> newValue)
@@ -122,7 +122,7 @@ public class SchemaCorrector extends BaseSemanticCorrector {
if (CollectionUtils.isEmpty(whereExpressionList)) {
return;
}
List<ElementValue> linkingValues = getLinkingValues(semanticParseInfo);
List<LLMReq.ElementValue> linkingValues = getLinkingValues(semanticParseInfo);
SemanticSchema semanticSchema = queryContext.getSemanticSchema();
Set<String> dimensions = getDimensions(semanticParseInfo.getDataSetId(), semanticSchema);

View File

@@ -1,10 +1,10 @@
package com.tencent.supersonic.headless.core.chat.corrector;
package com.tencent.supersonic.headless.chat.corrector;
import com.tencent.supersonic.common.util.jsqlparser.SqlReplaceHelper;
import com.tencent.supersonic.common.util.jsqlparser.SqlSelectHelper;
import com.tencent.supersonic.headless.api.pojo.SemanticParseInfo;
import com.tencent.supersonic.headless.core.pojo.QueryContext;
import com.tencent.supersonic.headless.chat.QueryContext;
import lombok.extern.slf4j.Slf4j;
import org.springframework.util.CollectionUtils;

View File

@@ -1,8 +1,8 @@
package com.tencent.supersonic.headless.core.chat.corrector;
package com.tencent.supersonic.headless.chat.corrector;
import com.tencent.supersonic.headless.api.pojo.SemanticParseInfo;
import com.tencent.supersonic.headless.core.pojo.QueryContext;
import com.tencent.supersonic.headless.chat.QueryContext;
/**
* A semantic corrector checks validity of extracted semantic information and

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.headless.core.chat.corrector;
package com.tencent.supersonic.headless.chat.corrector;
import com.tencent.supersonic.common.pojo.enums.TimeDimensionEnum;
@@ -10,8 +10,8 @@ import com.tencent.supersonic.common.util.jsqlparser.SqlSelectHelper;
import com.tencent.supersonic.common.util.jsqlparser.SqlRemoveHelper;
import com.tencent.supersonic.common.util.jsqlparser.DateVisitor.DateBoundInfo;
import com.tencent.supersonic.headless.api.pojo.SemanticParseInfo;
import com.tencent.supersonic.headless.core.pojo.QueryContext;
import com.tencent.supersonic.headless.core.utils.S2SqlDateHelper;
import com.tencent.supersonic.headless.chat.QueryContext;
import com.tencent.supersonic.headless.chat.utils.S2SqlDateHelper;
import lombok.extern.slf4j.Slf4j;
import net.sf.jsqlparser.JSQLParserException;
import net.sf.jsqlparser.expression.Expression;

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.headless.core.chat.corrector;
package com.tencent.supersonic.headless.chat.corrector;
import com.tencent.supersonic.common.pojo.Constants;
@@ -10,7 +10,7 @@ import com.tencent.supersonic.headless.api.pojo.SchemaValueMap;
import com.tencent.supersonic.headless.api.pojo.SemanticParseInfo;
import com.tencent.supersonic.headless.api.pojo.SemanticSchema;
import com.tencent.supersonic.headless.api.pojo.request.QueryFilters;
import com.tencent.supersonic.headless.core.pojo.QueryContext;
import com.tencent.supersonic.headless.chat.QueryContext;
import lombok.extern.slf4j.Slf4j;
import net.sf.jsqlparser.JSQLParserException;
import net.sf.jsqlparser.expression.Expression;

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.headless.core.chat.knowledge;
package com.tencent.supersonic.headless.chat.knowledge;
import lombok.Builder;
import lombok.Data;

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.headless.core.chat.knowledge;
package com.tencent.supersonic.headless.chat.knowledge;
import com.google.common.base.Objects;
import com.tencent.supersonic.headless.api.pojo.SchemaElement;

View File

@@ -1,5 +1,5 @@
package com.tencent.supersonic.headless.core.chat.knowledge;
package com.tencent.supersonic.headless.chat.knowledge;
public enum DictUpdateMode {

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.headless.core.chat.knowledge;
package com.tencent.supersonic.headless.chat.knowledge;
import java.util.Objects;
import lombok.Data;

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.headless.core.chat.knowledge;
package com.tencent.supersonic.headless.chat.knowledge;
import com.hankcs.hanlp.corpus.tag.Nature;
import com.hankcs.hanlp.dictionary.CoreDictionary;

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.headless.core.chat.knowledge;
package com.tencent.supersonic.headless.chat.knowledge;
import com.google.common.base.Objects;
import java.util.Map;

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.headless.core.chat.knowledge;
package com.tencent.supersonic.headless.chat.knowledge;
import com.hankcs.hanlp.corpus.io.IIOAdapter;
import java.io.IOException;

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.headless.core.chat.knowledge;
package com.tencent.supersonic.headless.chat.knowledge;
import com.google.common.base.Objects;
import java.util.List;

View File

@@ -1,8 +1,8 @@
package com.tencent.supersonic.headless.core.chat.knowledge;
package com.tencent.supersonic.headless.chat.knowledge;
import com.tencent.supersonic.common.pojo.enums.DictWordType;
import com.tencent.supersonic.headless.api.pojo.response.S2Term;
import com.tencent.supersonic.headless.core.chat.knowledge.helper.HanlpHelper;
import com.tencent.supersonic.headless.chat.knowledge.helper.HanlpHelper;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.headless.core.chat.knowledge;
package com.tencent.supersonic.headless.chat.knowledge;
import java.io.Serializable;
import lombok.Data;

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.headless.core.chat.knowledge;
package com.tencent.supersonic.headless.chat.knowledge;
import com.tencent.supersonic.common.config.EmbeddingConfig;
import com.tencent.supersonic.common.pojo.Constants;
@@ -7,7 +7,7 @@ import com.tencent.supersonic.common.util.embedding.Retrieval;
import com.tencent.supersonic.common.util.embedding.RetrieveQuery;
import com.tencent.supersonic.common.util.embedding.RetrieveQueryResult;
import com.tencent.supersonic.common.util.embedding.S2EmbeddingStore;
import com.tencent.supersonic.headless.core.chat.knowledge.helper.NatureHelper;
import com.tencent.supersonic.headless.chat.knowledge.helper.NatureHelper;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.headless.core.chat.knowledge;
package com.tencent.supersonic.headless.chat.knowledge;
import static com.hankcs.hanlp.utility.Predefine.logger;
@@ -13,9 +13,8 @@ import com.hankcs.hanlp.dictionary.DynamicCustomDictionary;
import com.hankcs.hanlp.dictionary.other.CharTable;
import com.hankcs.hanlp.seg.common.Term;
import com.hankcs.hanlp.utility.LexiconUtility;
import com.hankcs.hanlp.utility.Predefine;
import com.hankcs.hanlp.utility.TextUtility;
import com.tencent.supersonic.headless.core.chat.knowledge.helper.HanlpHelper;
import com.tencent.supersonic.headless.chat.knowledge.helper.HanlpHelper;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
@@ -146,10 +145,10 @@ public class MultiCustomDictionary extends DynamicCustomDictionary {
this.path = path;
long start = System.currentTimeMillis();
if (!this.loadMainDictionary(path[0])) {
Predefine.logger.warning("自定义词典" + Arrays.toString(path) + "加载失败");
logger.warning("自定义词典" + Arrays.toString(path) + "加载失败");
return false;
} else {
Predefine.logger.info(
logger.info(
"自定义词典加载成功:" + this.dat.size() + "个词条,耗时" + (System.currentTimeMillis() - start) + "ms");
this.path = path;
return true;
@@ -168,7 +167,7 @@ public class MultiCustomDictionary extends DynamicCustomDictionary {
public static boolean loadMainDictionary(String mainPath, String[] path,
DoubleArrayTrie<CoreDictionary.Attribute> dat, boolean isCache,
boolean addToSuggestTrie) {
Predefine.logger.info("自定义词典开始加载:" + mainPath);
logger.info("自定义词典开始加载:" + mainPath);
if (loadDat(mainPath, dat)) {
return true;
} else {
@@ -188,20 +187,20 @@ public class MultiCustomDictionary extends DynamicCustomDictionary {
try {
defaultNature = LexiconUtility.convertStringToNature(nature, customNatureCollector);
} catch (Exception var16) {
Predefine.logger.severe("配置文件【" + p + "】写错了!" + var16);
logger.severe("配置文件【" + p + "】写错了!" + var16);
continue;
}
}
Predefine.logger.info("以默认词性[" + defaultNature + "]加载自定义词典" + p + "中……");
logger.info("以默认词性[" + defaultNature + "]加载自定义词典" + p + "中……");
boolean success = load(p, defaultNature, map, customNatureCollector, addToSuggestTrie);
if (!success) {
Predefine.logger.warning("失败:" + p);
logger.warning("失败:" + p);
}
}
if (map.size() == 0) {
Predefine.logger.warning("没有加载到任何词条");
logger.warning("没有加载到任何词条");
map.put("未##它", null);
}
@@ -264,7 +263,8 @@ public class MultiCustomDictionary extends DynamicCustomDictionary {
public static boolean loadDat(String path, String[] customDicPath, DoubleArrayTrie<CoreDictionary.Attribute> dat) {
try {
if (HanLP.Config.CustomDictionaryAutoRefreshCache && isDicNeedUpdate(path, customDicPath)) {
if (HanLP.Config.CustomDictionaryAutoRefreshCache
&& DynamicCustomDictionary.isDicNeedUpdate(path, customDicPath)) {
return false;
} else {
ByteArray byteArray = ByteArray.createByteArray(path + ".bin");

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.headless.core.chat.knowledge;
package com.tencent.supersonic.headless.chat.knowledge;
import com.hankcs.hanlp.collection.trie.bintrie.BaseNode;
import com.hankcs.hanlp.collection.trie.bintrie.BinTrie;
@@ -7,7 +7,7 @@ import com.hankcs.hanlp.dictionary.CoreDictionary;
import com.hankcs.hanlp.seg.common.Term;
import com.tencent.supersonic.common.pojo.enums.DictWordType;
import com.tencent.supersonic.headless.api.pojo.request.DimensionValueReq;
import com.tencent.supersonic.headless.core.chat.knowledge.helper.NatureHelper;
import com.tencent.supersonic.headless.chat.knowledge.helper.NatureHelper;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.springframework.util.CollectionUtils;

View File

@@ -1,7 +1,8 @@
package com.tencent.supersonic.headless.core.chat.knowledge.builder;
package com.tencent.supersonic.headless.chat.knowledge.builder;
import com.tencent.supersonic.headless.api.pojo.SchemaElement;
import com.tencent.supersonic.headless.core.chat.knowledge.DictWord;
import com.tencent.supersonic.headless.chat.knowledge.DictWord;
import java.util.ArrayList;
import java.util.List;
import lombok.extern.slf4j.Slf4j;

View File

@@ -1,7 +1,7 @@
package com.tencent.supersonic.headless.core.chat.knowledge.builder;
package com.tencent.supersonic.headless.chat.knowledge.builder;
import com.tencent.supersonic.headless.api.pojo.SchemaElement;
import com.tencent.supersonic.headless.core.chat.knowledge.DictWord;
import com.tencent.supersonic.headless.chat.knowledge.DictWord;
import java.util.ArrayList;
import java.util.List;

View File

@@ -1,9 +1,10 @@
package com.tencent.supersonic.headless.core.chat.knowledge.builder;
package com.tencent.supersonic.headless.chat.knowledge.builder;
import com.google.common.collect.Lists;
import com.tencent.supersonic.common.pojo.enums.DictWordType;
import com.tencent.supersonic.headless.api.pojo.SchemaElement;
import com.tencent.supersonic.headless.core.chat.knowledge.DictWord;
import com.tencent.supersonic.headless.chat.knowledge.DictWord;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.springframework.stereotype.Service;

View File

@@ -1,10 +1,11 @@
package com.tencent.supersonic.headless.core.chat.knowledge.builder;
package com.tencent.supersonic.headless.chat.knowledge.builder;
import com.google.common.collect.Lists;
import com.tencent.supersonic.common.pojo.enums.DictWordType;
import com.tencent.supersonic.headless.api.pojo.SchemaElement;
import com.tencent.supersonic.headless.core.chat.knowledge.DictWord;
import com.tencent.supersonic.headless.chat.knowledge.DictWord;
import java.util.List;
import java.util.Objects;
import lombok.extern.slf4j.Slf4j;

View File

@@ -1,9 +1,10 @@
package com.tencent.supersonic.headless.core.chat.knowledge.builder;
package com.tencent.supersonic.headless.chat.knowledge.builder;
import com.google.common.collect.Lists;
import com.tencent.supersonic.common.pojo.enums.DictWordType;
import com.tencent.supersonic.headless.api.pojo.SchemaElement;
import com.tencent.supersonic.headless.core.chat.knowledge.DictWord;
import com.tencent.supersonic.headless.chat.knowledge.DictWord;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.springframework.stereotype.Service;

View File

@@ -1,9 +1,10 @@
package com.tencent.supersonic.headless.core.chat.knowledge.builder;
package com.tencent.supersonic.headless.chat.knowledge.builder;
import com.google.common.collect.Lists;
import com.tencent.supersonic.common.pojo.enums.DictWordType;
import com.tencent.supersonic.headless.api.pojo.SchemaElement;
import com.tencent.supersonic.headless.core.chat.knowledge.DictWord;
import com.tencent.supersonic.headless.chat.knowledge.DictWord;
import java.util.List;
import java.util.Objects;
import lombok.extern.slf4j.Slf4j;

View File

@@ -1,9 +1,9 @@
package com.tencent.supersonic.headless.core.chat.knowledge.builder;
package com.tencent.supersonic.headless.chat.knowledge.builder;
import com.google.common.collect.Lists;
import com.tencent.supersonic.common.pojo.enums.DictWordType;
import com.tencent.supersonic.headless.api.pojo.SchemaElement;
import com.tencent.supersonic.headless.core.chat.knowledge.DictWord;
import com.tencent.supersonic.headless.chat.knowledge.DictWord;
import org.apache.commons.lang3.StringUtils;
import org.springframework.stereotype.Service;

View File

@@ -1,10 +1,11 @@
package com.tencent.supersonic.headless.core.chat.knowledge.builder;
package com.tencent.supersonic.headless.chat.knowledge.builder;
import com.google.common.collect.Lists;
import com.tencent.supersonic.common.pojo.enums.DictWordType;
import com.tencent.supersonic.headless.api.pojo.SchemaElement;
import com.tencent.supersonic.headless.core.chat.knowledge.DictWord;
import com.tencent.supersonic.headless.chat.knowledge.DictWord;
import java.util.List;
import java.util.Objects;
import lombok.extern.slf4j.Slf4j;

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.headless.core.chat.knowledge.builder;
package com.tencent.supersonic.headless.chat.knowledge.builder;
import com.tencent.supersonic.common.pojo.enums.DictWordType;

View File

@@ -1,6 +1,6 @@
package com.tencent.supersonic.headless.core.config;
package com.tencent.supersonic.headless.chat.knowledge.file;
import com.tencent.supersonic.headless.core.chat.knowledge.helper.HanlpHelper;
import com.tencent.supersonic.headless.chat.knowledge.helper.HanlpHelper;
import lombok.Data;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Value;

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.headless.core.file;
package com.tencent.supersonic.headless.chat.knowledge.file;
import java.util.List;

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.headless.core.file;
package com.tencent.supersonic.headless.chat.knowledge.file;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;

View File

@@ -1,6 +1,6 @@
package com.tencent.supersonic.headless.core.file;
package com.tencent.supersonic.headless.chat.knowledge.file;
import com.tencent.supersonic.headless.core.chat.knowledge.helper.HanlpHelper;
import com.tencent.supersonic.headless.chat.knowledge.helper.HanlpHelper;
import lombok.Data;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Value;

View File

@@ -1,7 +1,6 @@
package com.tencent.supersonic.headless.core.chat.knowledge.helper;
import static com.hankcs.hanlp.HanLP.Config.CustomDictionaryPath;
package com.tencent.supersonic.headless.chat.knowledge.helper;
import com.hankcs.hanlp.HanLP.Config;
import com.hankcs.hanlp.dictionary.DynamicCustomDictionary;
import java.io.File;
@@ -56,7 +55,7 @@ public class FileHelper {
* @param customDictionary
*/
public static void resetCustomPath(DynamicCustomDictionary customDictionary) {
String[] path = CustomDictionaryPath;
String[] path = Config.CustomDictionaryPath;
String customPath = getCustomPath(path);
File customFolder = new File(customPath);
@@ -72,11 +71,11 @@ public class FileHelper {
}
log.debug("CustomDictionaryPath:{}", fileList);
CustomDictionaryPath = fileList.toArray(new String[0]);
customDictionary.path = (CustomDictionaryPath == null || CustomDictionaryPath.length == 0) ? path
: CustomDictionaryPath;
if (CustomDictionaryPath == null || CustomDictionaryPath.length == 0) {
CustomDictionaryPath = path;
Config.CustomDictionaryPath = fileList.toArray(new String[0]);
customDictionary.path = (Config.CustomDictionaryPath == null || Config.CustomDictionaryPath.length == 0) ? path
: Config.CustomDictionaryPath;
if (Config.CustomDictionaryPath == null || Config.CustomDictionaryPath.length == 0) {
Config.CustomDictionaryPath = path;
}
}
}

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.headless.core.chat.knowledge.helper;
package com.tencent.supersonic.headless.chat.knowledge.helper;
import com.google.common.collect.Lists;
import com.hankcs.hanlp.HanLP;
@@ -9,11 +9,11 @@ import com.hankcs.hanlp.seg.Segment;
import com.hankcs.hanlp.seg.common.Term;
import com.tencent.supersonic.common.pojo.enums.DictWordType;
import com.tencent.supersonic.headless.api.pojo.response.S2Term;
import com.tencent.supersonic.headless.core.chat.knowledge.DictWord;
import com.tencent.supersonic.headless.core.chat.knowledge.HadoopFileIOAdapter;
import com.tencent.supersonic.headless.core.chat.knowledge.MapResult;
import com.tencent.supersonic.headless.core.chat.knowledge.MultiCustomDictionary;
import com.tencent.supersonic.headless.core.chat.knowledge.SearchService;
import com.tencent.supersonic.headless.chat.knowledge.DictWord;
import com.tencent.supersonic.headless.chat.knowledge.HadoopFileIOAdapter;
import com.tencent.supersonic.headless.chat.knowledge.MapResult;
import com.tencent.supersonic.headless.chat.knowledge.MultiCustomDictionary;
import com.tencent.supersonic.headless.chat.knowledge.SearchService;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.BeanUtils;
@@ -30,8 +30,6 @@ import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import static com.hankcs.hanlp.HanLP.Config.CustomDictionaryPath;
/**
* HanLP helper
*/
@@ -74,7 +72,7 @@ public class HanlpHelper {
if (CustomDictionary == null) {
synchronized (HanlpHelper.class) {
if (CustomDictionary == null) {
CustomDictionary = new MultiCustomDictionary(CustomDictionaryPath);
CustomDictionary = new MultiCustomDictionary(HanLP.Config.CustomDictionaryPath);
}
}
}
@@ -90,7 +88,7 @@ public class HanlpHelper {
final long startTime = System.currentTimeMillis();
if (CustomDictionaryPath == null || CustomDictionaryPath.length == 0) {
if (HanLP.Config.CustomDictionaryPath == null || HanLP.Config.CustomDictionaryPath.length == 0) {
return false;
}
if (HanLP.Config.IOAdapter instanceof HadoopFileIOAdapter) {

View File

@@ -1,8 +1,6 @@
package com.tencent.supersonic.headless.core.chat.knowledge.helper;
package com.tencent.supersonic.headless.chat.knowledge.helper;
import static com.hankcs.hanlp.HanLP.Config.CustomDictionaryPath;
import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.HanLP.Config;
import com.hankcs.hanlp.dictionary.DynamicCustomDictionary;
import com.hankcs.hanlp.utility.Predefine;
import java.io.IOException;
@@ -57,7 +55,7 @@ public class HdfsFileHelper {
* @throws IOException
*/
public static void resetCustomPath(DynamicCustomDictionary customDictionary) throws IOException {
String[] path = HanLP.Config.CustomDictionaryPath;
String[] path = Config.CustomDictionaryPath;
FileSystem fs = FileSystem.get(URI.create(path[0]), new Configuration());
String cacheFilePath = path[0] + Predefine.BIN_EXT;
int customBase = cacheFilePath.lastIndexOf(FileHelper.FILE_SPILT);
@@ -65,11 +63,11 @@ public class HdfsFileHelper {
log.info("customPath:{}", customPath);
List<String> fileList = getFileList(fs, new Path(customPath));
log.info("CustomDictionaryPath:{}", fileList);
CustomDictionaryPath = fileList.toArray(new String[0]);
customDictionary.path = (CustomDictionaryPath == null || CustomDictionaryPath.length == 0) ? path
: CustomDictionaryPath;
if (CustomDictionaryPath == null || CustomDictionaryPath.length == 0) {
CustomDictionaryPath = path;
Config.CustomDictionaryPath = fileList.toArray(new String[0]);
customDictionary.path = (Config.CustomDictionaryPath == null || Config.CustomDictionaryPath.length == 0) ? path
: Config.CustomDictionaryPath;
if (Config.CustomDictionaryPath == null || Config.CustomDictionaryPath.length == 0) {
Config.CustomDictionaryPath = path;
}
}

View File

@@ -1,11 +1,11 @@
package com.tencent.supersonic.headless.core.chat.knowledge.helper;
package com.tencent.supersonic.headless.chat.knowledge.helper;
import com.google.common.collect.Lists;
import com.hankcs.hanlp.corpus.tag.Nature;
import com.tencent.supersonic.common.pojo.enums.DictWordType;
import com.tencent.supersonic.headless.api.pojo.SchemaElementType;
import com.tencent.supersonic.headless.api.pojo.response.S2Term;
import com.tencent.supersonic.headless.core.chat.knowledge.DataSetInfoStat;
import com.tencent.supersonic.headless.chat.knowledge.DataSetInfoStat;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.springframework.util.CollectionUtils;

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.headless.core.chat.mapper;
package com.tencent.supersonic.headless.chat.mapper;
import com.tencent.supersonic.headless.api.pojo.DataSetSchema;
import com.tencent.supersonic.headless.api.pojo.SchemaElement;
@@ -6,7 +6,7 @@ import com.tencent.supersonic.headless.api.pojo.SchemaElementMatch;
import com.tencent.supersonic.headless.api.pojo.SchemaElementType;
import com.tencent.supersonic.headless.api.pojo.SchemaMapInfo;
import com.tencent.supersonic.headless.api.pojo.SemanticSchema;
import com.tencent.supersonic.headless.core.pojo.QueryContext;
import com.tencent.supersonic.headless.chat.QueryContext;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.BeanUtils;

View File

@@ -1,11 +1,10 @@
package com.tencent.supersonic.headless.core.chat.mapper;
package com.tencent.supersonic.headless.chat.mapper;
import com.tencent.supersonic.headless.api.pojo.enums.MapModeEnum;
import com.tencent.supersonic.headless.api.pojo.response.S2Term;
import com.tencent.supersonic.headless.core.config.MapperConfig;
import com.tencent.supersonic.headless.core.pojo.QueryContext;
import com.tencent.supersonic.headless.core.chat.knowledge.helper.NatureHelper;
import com.tencent.supersonic.headless.chat.QueryContext;
import com.tencent.supersonic.headless.chat.knowledge.helper.NatureHelper;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;

View File

@@ -1,12 +1,12 @@
package com.tencent.supersonic.headless.core.chat.mapper;
package com.tencent.supersonic.headless.chat.mapper;
import com.tencent.supersonic.common.pojo.Constants;
import com.tencent.supersonic.headless.api.pojo.SchemaElement;
import com.tencent.supersonic.headless.api.pojo.SchemaElementMatch;
import com.tencent.supersonic.headless.api.pojo.response.S2Term;
import com.tencent.supersonic.headless.core.pojo.QueryContext;
import com.tencent.supersonic.headless.core.chat.knowledge.DatabaseMapResult;
import com.tencent.supersonic.headless.chat.QueryContext;
import com.tencent.supersonic.headless.chat.knowledge.DatabaseMapResult;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.springframework.stereotype.Service;
@@ -20,9 +20,6 @@ import java.util.Map.Entry;
import java.util.Set;
import java.util.stream.Collectors;
import static com.tencent.supersonic.headless.core.config.MapperConfig.MAPPER_NAME_THRESHOLD;
import static com.tencent.supersonic.headless.core.config.MapperConfig.MAPPER_NAME_THRESHOLD_MIN;
/**
* DatabaseMatchStrategy uses SQL LIKE operator to match schema elements.
* It currently supports fuzzy matching against names and aliases.
@@ -91,8 +88,8 @@ public class DatabaseMatchStrategy extends BaseMatchStrategy<DatabaseMapResult>
}
private Double getThreshold(QueryContext queryContext) {
Double threshold = Double.valueOf(mapperConfig.getParameterValue(MAPPER_NAME_THRESHOLD));
Double minThreshold = Double.valueOf(mapperConfig.getParameterValue(MAPPER_NAME_THRESHOLD_MIN));
Double threshold = Double.valueOf(mapperConfig.getParameterValue(MapperConfig.MAPPER_NAME_THRESHOLD));
Double minThreshold = Double.valueOf(mapperConfig.getParameterValue(MapperConfig.MAPPER_NAME_THRESHOLD_MIN));
Map<Long, List<SchemaElementMatch>> modelElementMatches = queryContext.getMapInfo().getDataSetElementMatches();

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.headless.core.chat.mapper;
package com.tencent.supersonic.headless.chat.mapper;
import com.tencent.supersonic.common.util.ContextUtils;
@@ -7,10 +7,10 @@ import com.tencent.supersonic.headless.api.pojo.SchemaElement;
import com.tencent.supersonic.headless.api.pojo.SchemaElementMatch;
import com.tencent.supersonic.headless.api.pojo.SchemaElementType;
import com.tencent.supersonic.headless.api.pojo.response.S2Term;
import com.tencent.supersonic.headless.core.pojo.QueryContext;
import com.tencent.supersonic.headless.core.chat.knowledge.EmbeddingResult;
import com.tencent.supersonic.headless.core.chat.knowledge.builder.BaseWordBuilder;
import com.tencent.supersonic.headless.core.chat.knowledge.helper.HanlpHelper;
import com.tencent.supersonic.headless.chat.knowledge.EmbeddingResult;
import com.tencent.supersonic.headless.chat.knowledge.builder.BaseWordBuilder;
import com.tencent.supersonic.headless.chat.knowledge.helper.HanlpHelper;
import com.tencent.supersonic.headless.chat.QueryContext;
import lombok.extern.slf4j.Slf4j;
import java.util.List;

View File

@@ -1,13 +1,13 @@
package com.tencent.supersonic.headless.core.chat.mapper;
package com.tencent.supersonic.headless.chat.mapper;
import com.google.common.collect.Lists;
import com.tencent.supersonic.common.pojo.Constants;
import com.tencent.supersonic.common.util.embedding.Retrieval;
import com.tencent.supersonic.common.util.embedding.RetrieveQuery;
import com.tencent.supersonic.common.util.embedding.RetrieveQueryResult;
import com.tencent.supersonic.headless.core.chat.knowledge.EmbeddingResult;
import com.tencent.supersonic.headless.core.chat.knowledge.MetaEmbeddingService;
import com.tencent.supersonic.headless.core.pojo.QueryContext;
import com.tencent.supersonic.headless.chat.QueryContext;
import com.tencent.supersonic.headless.chat.knowledge.EmbeddingResult;
import com.tencent.supersonic.headless.chat.knowledge.MetaEmbeddingService;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
@@ -21,13 +21,10 @@ import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import static com.tencent.supersonic.headless.core.config.MapperConfig.EMBEDDING_MAPPER_BATCH;
import static com.tencent.supersonic.headless.core.config.MapperConfig.EMBEDDING_MAPPER_MAX;
import static com.tencent.supersonic.headless.core.config.MapperConfig.EMBEDDING_MAPPER_MIN;
import static com.tencent.supersonic.headless.core.config.MapperConfig.EMBEDDING_MAPPER_NUMBER;
import static com.tencent.supersonic.headless.core.config.MapperConfig.EMBEDDING_MAPPER_ROUND_NUMBER;
import static com.tencent.supersonic.headless.core.config.MapperConfig.EMBEDDING_MAPPER_THRESHOLD;
import static com.tencent.supersonic.headless.core.config.MapperConfig.EMBEDDING_MAPPER_THRESHOLD_MIN;
import static com.tencent.supersonic.headless.chat.mapper.MapperConfig.EMBEDDING_MAPPER_NUMBER;
import static com.tencent.supersonic.headless.chat.mapper.MapperConfig.EMBEDDING_MAPPER_ROUND_NUMBER;
import static com.tencent.supersonic.headless.chat.mapper.MapperConfig.EMBEDDING_MAPPER_THRESHOLD;
import static com.tencent.supersonic.headless.chat.mapper.MapperConfig.EMBEDDING_MAPPER_THRESHOLD_MIN;
/**
* EmbeddingMatchStrategy uses vector database to perform
@@ -60,9 +57,9 @@ public class EmbeddingMatchStrategy extends BaseMatchStrategy<EmbeddingResult> {
@Override
protected void detectByBatch(QueryContext queryContext, Set<EmbeddingResult> results,
Set<Long> detectDataSetIds, Set<String> detectSegments) {
int embedddingMapperMin = Integer.valueOf(mapperConfig.getParameterValue(EMBEDDING_MAPPER_MIN));
int embedddingMapperMax = Integer.valueOf(mapperConfig.getParameterValue(EMBEDDING_MAPPER_MAX));
int embeddingMapperBatch = Integer.valueOf(mapperConfig.getParameterValue(EMBEDDING_MAPPER_BATCH));
int embedddingMapperMin = Integer.valueOf(mapperConfig.getParameterValue(MapperConfig.EMBEDDING_MAPPER_MIN));
int embedddingMapperMax = Integer.valueOf(mapperConfig.getParameterValue(MapperConfig.EMBEDDING_MAPPER_MAX));
int embeddingMapperBatch = Integer.valueOf(mapperConfig.getParameterValue(MapperConfig.EMBEDDING_MAPPER_BATCH));
List<String> queryTextsList = detectSegments.stream()
.map(detectSegment -> detectSegment.trim())

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.headless.core.chat.mapper;
package com.tencent.supersonic.headless.chat.mapper;
import com.tencent.supersonic.headless.api.pojo.DataSetSchema;
import com.tencent.supersonic.headless.api.pojo.SchemaElement;
@@ -6,7 +6,7 @@ import com.tencent.supersonic.headless.api.pojo.SchemaElementMatch;
import com.tencent.supersonic.headless.api.pojo.SchemaElementType;
import com.tencent.supersonic.headless.api.pojo.SchemaMapInfo;
import com.tencent.supersonic.headless.api.pojo.SemanticSchema;
import com.tencent.supersonic.headless.core.pojo.QueryContext;
import com.tencent.supersonic.headless.chat.QueryContext;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.BeanUtils;
import org.springframework.util.CollectionUtils;

View File

@@ -1,10 +1,10 @@
package com.tencent.supersonic.headless.core.chat.mapper;
package com.tencent.supersonic.headless.chat.mapper;
import com.tencent.supersonic.common.pojo.Constants;
import com.tencent.supersonic.headless.api.pojo.response.S2Term;
import com.tencent.supersonic.headless.core.chat.knowledge.HanlpMapResult;
import com.tencent.supersonic.headless.core.chat.knowledge.KnowledgeBaseService;
import com.tencent.supersonic.headless.core.pojo.QueryContext;
import com.tencent.supersonic.headless.chat.QueryContext;
import com.tencent.supersonic.headless.chat.knowledge.HanlpMapResult;
import com.tencent.supersonic.headless.chat.knowledge.KnowledgeBaseService;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
@@ -20,13 +20,9 @@ import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
import static com.tencent.supersonic.headless.core.config.MapperConfig.MAPPER_DETECTION_MAX_SIZE;
import static com.tencent.supersonic.headless.core.config.MapperConfig.MAPPER_DETECTION_SIZE;
import static com.tencent.supersonic.headless.core.config.MapperConfig.MAPPER_DIMENSION_VALUE_SIZE;
import static com.tencent.supersonic.headless.core.config.MapperConfig.MAPPER_NAME_THRESHOLD;
import static com.tencent.supersonic.headless.core.config.MapperConfig.MAPPER_NAME_THRESHOLD_MIN;
import static com.tencent.supersonic.headless.core.config.MapperConfig.MAPPER_VALUE_THRESHOLD;
import static com.tencent.supersonic.headless.core.config.MapperConfig.MAPPER_VALUE_THRESHOLD_MIN;
import static com.tencent.supersonic.headless.chat.mapper.MapperConfig.MAPPER_DETECTION_MAX_SIZE;
import static com.tencent.supersonic.headless.chat.mapper.MapperConfig.MAPPER_DETECTION_SIZE;
import static com.tencent.supersonic.headless.chat.mapper.MapperConfig.MAPPER_DIMENSION_VALUE_SIZE;
/**
* HanlpDictMatchStrategy uses <a href="https://www.hanlp.com/">HanLP</a> to
@@ -42,7 +38,7 @@ public class HanlpDictMatchStrategy extends BaseMatchStrategy<HanlpMapResult> {
@Override
public Map<MatchText, List<HanlpMapResult>> match(QueryContext queryContext, List<S2Term> terms,
Set<Long> detectDataSetIds) {
Set<Long> detectDataSetIds) {
String text = queryContext.getQueryText();
if (Objects.isNull(terms) || StringUtils.isEmpty(text)) {
return null;
@@ -131,11 +127,11 @@ public class HanlpDictMatchStrategy extends BaseMatchStrategy<HanlpMapResult> {
}
public double getThresholdMatch(List<String> natures, QueryContext queryContext) {
Double threshold = Double.valueOf(mapperConfig.getParameterValue(MAPPER_NAME_THRESHOLD));
Double minThreshold = Double.valueOf(mapperConfig.getParameterValue(MAPPER_NAME_THRESHOLD_MIN));
Double threshold = Double.valueOf(mapperConfig.getParameterValue(MapperConfig.MAPPER_NAME_THRESHOLD));
Double minThreshold = Double.valueOf(mapperConfig.getParameterValue(MapperConfig.MAPPER_NAME_THRESHOLD_MIN));
if (mapperHelper.existDimensionValues(natures)) {
threshold = Double.valueOf(mapperConfig.getParameterValue(MAPPER_VALUE_THRESHOLD));
minThreshold = Double.valueOf(mapperConfig.getParameterValue(MAPPER_VALUE_THRESHOLD_MIN));
threshold = Double.valueOf(mapperConfig.getParameterValue(MapperConfig.MAPPER_VALUE_THRESHOLD));
minThreshold = Double.valueOf(mapperConfig.getParameterValue(MapperConfig.MAPPER_VALUE_THRESHOLD_MIN));
}
return getThreshold(threshold, minThreshold, queryContext.getMapModeEnum());

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.headless.core.chat.mapper;
package com.tencent.supersonic.headless.chat.mapper;
import com.tencent.supersonic.common.util.ContextUtils;
import com.tencent.supersonic.headless.api.pojo.SchemaElement;
@@ -6,12 +6,12 @@ import com.tencent.supersonic.headless.api.pojo.SchemaElementMatch;
import com.tencent.supersonic.headless.api.pojo.SchemaElementType;
import com.tencent.supersonic.headless.api.pojo.SchemaMapInfo;
import com.tencent.supersonic.headless.api.pojo.response.S2Term;
import com.tencent.supersonic.headless.core.chat.knowledge.builder.BaseWordBuilder;
import com.tencent.supersonic.headless.core.pojo.QueryContext;
import com.tencent.supersonic.headless.core.chat.knowledge.DatabaseMapResult;
import com.tencent.supersonic.headless.core.chat.knowledge.HanlpMapResult;
import com.tencent.supersonic.headless.core.chat.knowledge.helper.HanlpHelper;
import com.tencent.supersonic.headless.core.chat.knowledge.helper.NatureHelper;
import com.tencent.supersonic.headless.chat.knowledge.DatabaseMapResult;
import com.tencent.supersonic.headless.chat.knowledge.HanlpMapResult;
import com.tencent.supersonic.headless.chat.knowledge.builder.BaseWordBuilder;
import com.tencent.supersonic.headless.chat.knowledge.helper.HanlpHelper;
import com.tencent.supersonic.headless.chat.knowledge.helper.NatureHelper;
import com.tencent.supersonic.headless.chat.QueryContext;
import lombok.extern.slf4j.Slf4j;
import org.springframework.util.CollectionUtils;

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.headless.core.config;
package com.tencent.supersonic.headless.chat.mapper;
import com.google.common.collect.Lists;
import com.tencent.supersonic.common.config.ParameterConfig;

View File

@@ -1,8 +1,8 @@
package com.tencent.supersonic.headless.core.chat.mapper;
package com.tencent.supersonic.headless.chat.mapper;
import com.hankcs.hanlp.algorithm.EditDistance;
import com.tencent.supersonic.headless.api.pojo.response.S2Term;
import com.tencent.supersonic.headless.core.chat.knowledge.helper.NatureHelper;
import com.tencent.supersonic.headless.chat.knowledge.helper.NatureHelper;
import lombok.Data;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;

View File

@@ -1,8 +1,8 @@
package com.tencent.supersonic.headless.core.chat.mapper;
package com.tencent.supersonic.headless.chat.mapper;
import com.tencent.supersonic.headless.api.pojo.response.S2Term;
import com.tencent.supersonic.headless.core.pojo.QueryContext;
import com.tencent.supersonic.headless.chat.QueryContext;
import java.util.List;
import java.util.Map;

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.headless.core.chat.mapper;
package com.tencent.supersonic.headless.chat.mapper;
import lombok.Builder;
import lombok.Data;

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.headless.core.chat.mapper;
package com.tencent.supersonic.headless.chat.mapper;
import com.tencent.supersonic.headless.api.pojo.SchemaElementType;
import lombok.Data;

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.headless.core.chat.mapper;
package com.tencent.supersonic.headless.chat.mapper;
import com.google.common.collect.Lists;
import com.tencent.supersonic.common.pojo.Constants;
@@ -8,8 +8,8 @@ import com.tencent.supersonic.headless.api.pojo.SchemaElementType;
import com.tencent.supersonic.headless.api.pojo.SchemaMapInfo;
import com.tencent.supersonic.headless.api.pojo.request.QueryFilter;
import com.tencent.supersonic.headless.api.pojo.request.QueryFilters;
import com.tencent.supersonic.headless.core.pojo.QueryContext;
import com.tencent.supersonic.headless.core.chat.knowledge.builder.BaseWordBuilder;
import com.tencent.supersonic.headless.chat.knowledge.builder.BaseWordBuilder;
import com.tencent.supersonic.headless.chat.QueryContext;
import lombok.extern.slf4j.Slf4j;
import org.springframework.util.CollectionUtils;

View File

@@ -1,7 +1,7 @@
package com.tencent.supersonic.headless.core.chat.mapper;
package com.tencent.supersonic.headless.chat.mapper;
import com.tencent.supersonic.headless.core.pojo.QueryContext;
import com.tencent.supersonic.headless.chat.QueryContext;
/**
* A schema mapper identifies references to schema elements(metrics/dimensions/entities/values)

View File

@@ -1,12 +1,12 @@
package com.tencent.supersonic.headless.core.chat.mapper;
package com.tencent.supersonic.headless.chat.mapper;
import com.google.common.collect.Lists;
import com.tencent.supersonic.common.pojo.enums.DictWordType;
import com.tencent.supersonic.headless.api.pojo.response.S2Term;
import com.tencent.supersonic.headless.core.pojo.QueryContext;
import com.tencent.supersonic.headless.core.chat.knowledge.HanlpMapResult;
import com.tencent.supersonic.headless.core.chat.knowledge.KnowledgeBaseService;
import com.tencent.supersonic.headless.core.chat.knowledge.SearchService;
import com.tencent.supersonic.headless.chat.QueryContext;
import com.tencent.supersonic.headless.chat.knowledge.HanlpMapResult;
import com.tencent.supersonic.headless.chat.knowledge.KnowledgeBaseService;
import com.tencent.supersonic.headless.chat.knowledge.SearchService;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.factory.annotation.Autowired;
@@ -33,7 +33,7 @@ public class SearchMatchStrategy extends BaseMatchStrategy<HanlpMapResult> {
@Override
public Map<MatchText, List<HanlpMapResult>> match(QueryContext queryContext, List<S2Term> originals,
Set<Long> detectDataSetIds) {
Set<Long> detectDataSetIds) {
String text = queryContext.getQueryText();
Map<Integer, Integer> regOffsetToLength = getRegOffsetToLength(originals);

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.headless.core.config;
package com.tencent.supersonic.headless.chat.parser;
import com.google.common.collect.Lists;
import com.tencent.supersonic.common.config.ParameterConfig;

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.headless.core.chat.parser;
package com.tencent.supersonic.headless.chat.parser;
import com.tencent.supersonic.auth.api.authentication.pojo.User;
import com.tencent.supersonic.common.pojo.enums.QueryType;
@@ -8,11 +8,11 @@ import com.tencent.supersonic.headless.api.pojo.SchemaElement;
import com.tencent.supersonic.headless.api.pojo.SemanticParseInfo;
import com.tencent.supersonic.headless.api.pojo.SemanticSchema;
import com.tencent.supersonic.headless.api.pojo.SqlInfo;
import com.tencent.supersonic.headless.core.chat.query.SemanticQuery;
import com.tencent.supersonic.headless.core.chat.query.llm.s2sql.LLMSqlQuery;
import com.tencent.supersonic.headless.core.chat.query.rule.RuleSemanticQuery;
import com.tencent.supersonic.headless.core.pojo.ChatContext;
import com.tencent.supersonic.headless.core.pojo.QueryContext;
import com.tencent.supersonic.headless.chat.query.SemanticQuery;
import com.tencent.supersonic.headless.chat.query.llm.s2sql.LLMSqlQuery;
import com.tencent.supersonic.headless.chat.query.rule.RuleSemanticQuery;
import com.tencent.supersonic.headless.chat.ChatContext;
import com.tencent.supersonic.headless.chat.QueryContext;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;

View File

@@ -1,16 +1,16 @@
package com.tencent.supersonic.headless.core.chat.parser;
package com.tencent.supersonic.headless.chat.parser;
import com.tencent.supersonic.common.util.ContextUtils;
import com.tencent.supersonic.headless.api.pojo.SemanticParseInfo;
import com.tencent.supersonic.headless.core.config.ParserConfig;
import com.tencent.supersonic.headless.core.pojo.QueryContext;
import com.tencent.supersonic.headless.core.chat.query.SemanticQuery;
import com.tencent.supersonic.headless.core.chat.query.llm.s2sql.LLMSqlQuery;
import com.tencent.supersonic.headless.chat.QueryContext;
import com.tencent.supersonic.headless.chat.query.SemanticQuery;
import com.tencent.supersonic.headless.chat.query.llm.s2sql.LLMSqlQuery;
import lombok.extern.slf4j.Slf4j;
import static com.tencent.supersonic.headless.core.config.ParserConfig.PARSER_TEXT_LENGTH_THRESHOLD;
import static com.tencent.supersonic.headless.core.config.ParserConfig.PARSER_TEXT_LENGTH_THRESHOLD_LONG;
import static com.tencent.supersonic.headless.core.config.ParserConfig.PARSER_TEXT_LENGTH_THRESHOLD_SHORT;
import static com.tencent.supersonic.headless.chat.parser.ParserConfig.PARSER_TEXT_LENGTH_THRESHOLD;
import static com.tencent.supersonic.headless.chat.parser.ParserConfig.PARSER_TEXT_LENGTH_THRESHOLD_LONG;
import static com.tencent.supersonic.headless.chat.parser.ParserConfig.PARSER_TEXT_LENGTH_THRESHOLD_SHORT;
/**
* This checker can be used by semantic parsers to check if query intent

View File

@@ -1,7 +1,7 @@
package com.tencent.supersonic.headless.core.chat.parser;
package com.tencent.supersonic.headless.chat.parser;
import com.tencent.supersonic.headless.core.pojo.ChatContext;
import com.tencent.supersonic.headless.core.pojo.QueryContext;
import com.tencent.supersonic.headless.chat.ChatContext;
import com.tencent.supersonic.headless.chat.QueryContext;
/**
* A semantic parser understands user queries and generates semantic query statement.

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.headless.core.chat.parser.llm;
package com.tencent.supersonic.headless.chat.parser.llm;
import lombok.Data;

View File

@@ -1,6 +1,7 @@
package com.tencent.supersonic.headless.core.chat.parser.llm;
package com.tencent.supersonic.headless.chat.parser.llm;
import com.tencent.supersonic.headless.core.pojo.QueryContext;
import com.tencent.supersonic.headless.chat.QueryContext;
import java.util.Set;

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.headless.core.chat.parser.llm;
package com.tencent.supersonic.headless.chat.parser.llm;
import lombok.Data;

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.headless.core.chat.parser.llm;
package com.tencent.supersonic.headless.chat.parser.llm;
import com.fasterxml.jackson.core.type.TypeReference;

View File

@@ -1,10 +1,10 @@
package com.tencent.supersonic.headless.core.chat.parser.llm;
package com.tencent.supersonic.headless.chat.parser.llm;
import com.tencent.supersonic.headless.api.pojo.SchemaElementMatch;
import com.tencent.supersonic.headless.api.pojo.SchemaElementType;
import com.tencent.supersonic.headless.api.pojo.SchemaMapInfo;
import com.tencent.supersonic.headless.core.pojo.QueryContext;
import com.tencent.supersonic.headless.core.chat.query.SemanticQuery;
import com.tencent.supersonic.headless.chat.query.SemanticQuery;
import com.tencent.supersonic.headless.chat.QueryContext;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections.CollectionUtils;
import java.util.ArrayList;

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.headless.core.chat.parser.llm;
package com.tencent.supersonic.headless.chat.parser.llm;
import lombok.extern.slf4j.Slf4j;

View File

@@ -1,8 +1,8 @@
package com.tencent.supersonic.headless.core.chat.parser.llm;
package com.tencent.supersonic.headless.chat.parser.llm;
import com.tencent.supersonic.headless.core.chat.query.llm.s2sql.LLMReq;
import com.tencent.supersonic.headless.core.chat.query.llm.s2sql.LLMResp;
import com.tencent.supersonic.headless.chat.query.llm.s2sql.LLMReq;
import com.tencent.supersonic.headless.chat.query.llm.s2sql.LLMResp;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.headless.core.config;
package com.tencent.supersonic.headless.chat.parser.llm;
import lombok.Data;

View File

@@ -1,8 +1,8 @@
package com.tencent.supersonic.headless.core.chat.parser.llm;
package com.tencent.supersonic.headless.chat.parser.llm;
import com.tencent.supersonic.headless.core.chat.query.llm.s2sql.LLMReq;
import com.tencent.supersonic.headless.core.chat.query.llm.s2sql.LLMResp;
import com.tencent.supersonic.headless.chat.query.llm.s2sql.LLMReq;
import com.tencent.supersonic.headless.chat.query.llm.s2sql.LLMResp;
/**
* LLMProxy encapsulates functions performed by LLMs so that multiple

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.headless.core.chat.parser.llm;
package com.tencent.supersonic.headless.chat.parser.llm;
import com.tencent.supersonic.common.pojo.enums.DataFormatTypeEnum;
import com.tencent.supersonic.common.pojo.enums.TimeDimensionEnum;
@@ -7,15 +7,13 @@ import com.tencent.supersonic.headless.api.pojo.SchemaElement;
import com.tencent.supersonic.headless.api.pojo.SchemaElementMatch;
import com.tencent.supersonic.headless.api.pojo.SchemaElementType;
import com.tencent.supersonic.headless.api.pojo.SemanticSchema;
import com.tencent.supersonic.headless.core.chat.parser.SatisfactionChecker;
import com.tencent.supersonic.headless.core.chat.query.llm.s2sql.LLMReq;
import com.tencent.supersonic.headless.core.chat.query.llm.s2sql.LLMReq.ElementValue;
import com.tencent.supersonic.headless.core.chat.query.llm.s2sql.LLMResp;
import com.tencent.supersonic.headless.core.config.ParserConfig;
import com.tencent.supersonic.headless.core.config.LLMParserConfig;
import com.tencent.supersonic.headless.core.pojo.QueryContext;
import com.tencent.supersonic.headless.core.utils.ComponentFactory;
import com.tencent.supersonic.headless.core.utils.S2SqlDateHelper;
import com.tencent.supersonic.headless.chat.utils.ComponentFactory;
import com.tencent.supersonic.headless.chat.parser.ParserConfig;
import com.tencent.supersonic.headless.chat.QueryContext;
import com.tencent.supersonic.headless.chat.utils.S2SqlDateHelper;
import com.tencent.supersonic.headless.chat.query.llm.s2sql.LLMReq;
import com.tencent.supersonic.headless.chat.query.llm.s2sql.LLMResp;
import com.tencent.supersonic.headless.chat.parser.SatisfactionChecker;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
@@ -32,8 +30,8 @@ import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
import static com.tencent.supersonic.headless.core.config.ParserConfig.PARSER_LINKING_VALUE_ENABLE;
import static com.tencent.supersonic.headless.core.config.ParserConfig.PARSER_STRATEGY_TYPE;
import static com.tencent.supersonic.headless.chat.parser.ParserConfig.PARSER_LINKING_VALUE_ENABLE;
import static com.tencent.supersonic.headless.chat.parser.ParserConfig.PARSER_STRATEGY_TYPE;
@Slf4j
@Service
@@ -92,7 +90,7 @@ public class LLMRequestService {
String priorExts = getPriorExts(queryCtx, fieldNameList);
llmReq.setPriorExts(priorExts);
List<ElementValue> linking = new ArrayList<>();
List<LLMReq.ElementValue> linking = new ArrayList<>();
boolean linkingValueEnabled = Boolean.valueOf(parserConfig.getParameterValue(PARSER_LINKING_VALUE_ENABLE));
if (linkingValueEnabled) {
@@ -173,13 +171,13 @@ public class LLMRequestService {
return extraInfoSb.toString();
}
public List<ElementValue> getValues(QueryContext queryCtx, Long dataSetId) {
public List<LLMReq.ElementValue> getValues(QueryContext queryCtx, Long dataSetId) {
Map<Long, String> itemIdToName = getItemIdToName(queryCtx, dataSetId);
List<SchemaElementMatch> matchedElements = queryCtx.getMapInfo().getMatchedElements(dataSetId);
if (CollectionUtils.isEmpty(matchedElements)) {
return new ArrayList<>();
}
Set<ElementValue> valueMatches = matchedElements
Set<LLMReq.ElementValue> valueMatches = matchedElements
.stream()
.filter(elementMatch -> !elementMatch.isInherited())
.filter(schemaElementMatch -> {
@@ -187,7 +185,7 @@ public class LLMRequestService {
return SchemaElementType.VALUE.equals(type) || SchemaElementType.ID.equals(type);
})
.map(elementMatch -> {
ElementValue elementValue = new ElementValue();
LLMReq.ElementValue elementValue = new LLMReq.ElementValue();
elementValue.setFieldName(itemIdToName.get(elementMatch.getElement().getId()));
elementValue.setFieldValue(elementMatch.getWord());
return elementValue;

View File

@@ -1,14 +1,14 @@
package com.tencent.supersonic.headless.core.chat.parser.llm;
package com.tencent.supersonic.headless.chat.parser.llm;
import com.tencent.supersonic.common.pojo.Constants;
import com.tencent.supersonic.common.util.jsqlparser.SqlEqualHelper;
import com.tencent.supersonic.headless.api.pojo.SemanticParseInfo;
import com.tencent.supersonic.headless.core.pojo.QueryContext;
import com.tencent.supersonic.headless.core.chat.query.QueryManager;
import com.tencent.supersonic.headless.core.chat.query.llm.LLMSemanticQuery;
import com.tencent.supersonic.headless.core.chat.query.llm.s2sql.LLMResp;
import com.tencent.supersonic.headless.core.chat.query.llm.s2sql.LLMSqlQuery;
import com.tencent.supersonic.headless.core.chat.query.llm.s2sql.LLMSqlResp;
import com.tencent.supersonic.headless.chat.query.QueryManager;
import com.tencent.supersonic.headless.chat.query.llm.LLMSemanticQuery;
import com.tencent.supersonic.headless.chat.query.llm.s2sql.LLMResp;
import com.tencent.supersonic.headless.chat.query.llm.s2sql.LLMSqlQuery;
import com.tencent.supersonic.headless.chat.query.llm.s2sql.LLMSqlResp;
import com.tencent.supersonic.headless.chat.QueryContext;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections4.MapUtils;
import org.springframework.stereotype.Service;

View File

@@ -1,14 +1,14 @@
package com.tencent.supersonic.headless.core.chat.parser.llm;
package com.tencent.supersonic.headless.chat.parser.llm;
import com.tencent.supersonic.common.util.ContextUtils;
import com.tencent.supersonic.headless.api.pojo.SemanticSchema;
import com.tencent.supersonic.headless.core.chat.parser.SemanticParser;
import com.tencent.supersonic.headless.core.pojo.ChatContext;
import com.tencent.supersonic.headless.core.pojo.QueryContext;
import com.tencent.supersonic.headless.core.chat.query.llm.s2sql.LLMReq;
import com.tencent.supersonic.headless.core.chat.query.llm.s2sql.LLMResp;
import com.tencent.supersonic.headless.core.chat.query.llm.s2sql.LLMSqlResp;
import com.tencent.supersonic.headless.chat.QueryContext;
import com.tencent.supersonic.headless.chat.query.llm.s2sql.LLMReq;
import com.tencent.supersonic.headless.chat.query.llm.s2sql.LLMResp;
import com.tencent.supersonic.headless.chat.query.llm.s2sql.LLMSqlResp;
import com.tencent.supersonic.headless.chat.parser.SemanticParser;
import com.tencent.supersonic.headless.chat.ChatContext;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections4.MapUtils;
import org.apache.commons.lang3.StringUtils;

View File

@@ -1,8 +1,8 @@
package com.tencent.supersonic.headless.core.chat.parser.llm;
package com.tencent.supersonic.headless.chat.parser.llm;
import com.google.common.collect.Lists;
import com.tencent.supersonic.headless.core.chat.query.llm.s2sql.LLMReq;
import com.tencent.supersonic.headless.core.chat.query.llm.s2sql.LLMResp;
import com.tencent.supersonic.headless.chat.query.llm.s2sql.LLMReq;
import com.tencent.supersonic.headless.chat.query.llm.s2sql.LLMResp;
import dev.langchain4j.data.message.AiMessage;
import dev.langchain4j.model.chat.ChatLanguageModel;
import dev.langchain4j.model.input.Prompt;

View File

@@ -1,6 +1,6 @@
package com.tencent.supersonic.headless.core.chat.parser.llm;
package com.tencent.supersonic.headless.chat.parser.llm;
import com.tencent.supersonic.headless.core.chat.query.llm.s2sql.LLMSqlResp;
import com.tencent.supersonic.headless.chat.query.llm.s2sql.LLMSqlResp;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.tuple.Pair;

View File

@@ -1,9 +1,8 @@
package com.tencent.supersonic.headless.core.chat.parser.llm;
package com.tencent.supersonic.headless.chat.parser.llm;
import com.tencent.supersonic.headless.api.pojo.request.QueryReq;
import com.tencent.supersonic.headless.core.chat.query.llm.s2sql.LLMReq;
import com.tencent.supersonic.headless.core.chat.query.llm.s2sql.LLMReq.ElementValue;
import com.tencent.supersonic.headless.core.chat.query.llm.s2sql.LLMResp;
import com.tencent.supersonic.headless.chat.query.llm.s2sql.LLMReq;
import com.tencent.supersonic.headless.chat.query.llm.s2sql.LLMResp;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
@@ -25,5 +24,5 @@ public class ParseResult {
private QueryReq request;
private List<ElementValue> linkingValues;
private List<LLMReq.ElementValue> linkingValues;
}

View File

@@ -1,8 +1,7 @@
package com.tencent.supersonic.headless.core.chat.parser.llm;
package com.tencent.supersonic.headless.chat.parser.llm;
import com.tencent.supersonic.headless.core.chat.query.llm.s2sql.LLMReq;
import com.tencent.supersonic.headless.core.chat.query.llm.s2sql.LLMReq.ElementValue;
import com.tencent.supersonic.headless.core.config.ParserConfig;
import com.tencent.supersonic.headless.chat.parser.ParserConfig;
import com.tencent.supersonic.headless.chat.query.llm.s2sql.LLMReq;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
@@ -15,9 +14,9 @@ import java.util.Collections;
import java.util.List;
import java.util.Map;
import static com.tencent.supersonic.headless.core.config.ParserConfig.PARSER_EXEMPLAR_RECALL_NUMBER;
import static com.tencent.supersonic.headless.core.config.ParserConfig.PARSER_FEW_SHOT_NUMBER;
import static com.tencent.supersonic.headless.core.config.ParserConfig.PARSER_SELF_CONSISTENCY_NUMBER;
import static com.tencent.supersonic.headless.chat.parser.ParserConfig.PARSER_EXEMPLAR_RECALL_NUMBER;
import static com.tencent.supersonic.headless.chat.parser.ParserConfig.PARSER_FEW_SHOT_NUMBER;
import static com.tencent.supersonic.headless.chat.parser.ParserConfig.PARSER_SELF_CONSISTENCY_NUMBER;
@Component
@Slf4j
@@ -58,7 +57,7 @@ public class PromptHelper {
String dbSchema = "Table: " + tableName + ", Columns = " + fieldNameList;
List<String> priorLinkingList = new ArrayList<>();
for (ElementValue value : linkedValues) {
for (LLMReq.ElementValue value : linkedValues) {
String fieldName = value.getFieldName();
String fieldValue = value.getFieldValue();
priorLinkingList.add("" + fieldValue + "‘是一个‘" + fieldName + "");

View File

@@ -1,10 +1,9 @@
package com.tencent.supersonic.headless.core.chat.parser.llm;
package com.tencent.supersonic.headless.chat.parser.llm;
import com.tencent.supersonic.common.util.ContextUtils;
import com.tencent.supersonic.common.util.JsonUtil;
import com.tencent.supersonic.headless.core.config.LLMParserConfig;
import com.tencent.supersonic.headless.core.chat.query.llm.s2sql.LLMReq;
import com.tencent.supersonic.headless.core.chat.query.llm.s2sql.LLMResp;
import com.tencent.supersonic.headless.chat.query.llm.s2sql.LLMReq;
import com.tencent.supersonic.headless.chat.query.llm.s2sql.LLMResp;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections4.MapUtils;
import org.slf4j.Logger;

View File

@@ -1,10 +1,7 @@
package com.tencent.supersonic.headless.server.listener;
package com.tencent.supersonic.headless.chat.parser.llm;
import com.tencent.supersonic.common.config.EmbeddingConfig;
import com.tencent.supersonic.headless.core.chat.parser.llm.JavaLLMProxy;
import com.tencent.supersonic.headless.core.chat.parser.llm.ExemplarManager;
import com.tencent.supersonic.headless.core.chat.parser.llm.Exemplar;
import com.tencent.supersonic.headless.core.utils.ComponentFactory;
import com.tencent.supersonic.headless.chat.utils.ComponentFactory;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.CommandLineRunner;

View File

@@ -1,8 +1,8 @@
package com.tencent.supersonic.headless.core.chat.parser.llm;
package com.tencent.supersonic.headless.chat.parser.llm;
import com.tencent.supersonic.common.config.LLMConfig;
import com.tencent.supersonic.headless.core.chat.query.llm.s2sql.LLMReq;
import com.tencent.supersonic.headless.core.chat.query.llm.s2sql.LLMResp;
import com.tencent.supersonic.headless.chat.query.llm.s2sql.LLMReq;
import com.tencent.supersonic.headless.chat.query.llm.s2sql.LLMResp;
import com.tencent.supersonic.common.util.S2ChatModelProvider;
import dev.langchain4j.model.chat.ChatLanguageModel;
import org.slf4j.Logger;

View File

@@ -1,6 +1,6 @@
package com.tencent.supersonic.headless.core.chat.parser.llm;
package com.tencent.supersonic.headless.chat.parser.llm;
import com.tencent.supersonic.headless.core.chat.query.llm.s2sql.LLMReq;
import com.tencent.supersonic.headless.chat.query.llm.s2sql.LLMReq;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;

View File

@@ -1,9 +1,8 @@
package com.tencent.supersonic.headless.core.chat.parser.llm;
package com.tencent.supersonic.headless.chat.parser.llm;
import com.tencent.supersonic.common.util.JsonUtil;
import com.tencent.supersonic.headless.core.chat.query.llm.s2sql.LLMReq;
import com.tencent.supersonic.headless.core.chat.query.llm.s2sql.LLMReq.SqlGenType;
import com.tencent.supersonic.headless.core.chat.query.llm.s2sql.LLMResp;
import com.tencent.supersonic.headless.chat.query.llm.s2sql.LLMReq;
import com.tencent.supersonic.headless.chat.query.llm.s2sql.LLMResp;
import dev.langchain4j.data.message.AiMessage;
import dev.langchain4j.model.chat.ChatLanguageModel;
import dev.langchain4j.model.input.Prompt;
@@ -115,6 +114,6 @@ public class TwoPassSCSqlGenStrategy extends SqlGenStrategy {
@Override
public void afterPropertiesSet() {
SqlGenStrategyFactory.addSqlGenerationForFactory(SqlGenType.TWO_PASS_AUTO_COT_SELF_CONSISTENCY, this);
SqlGenStrategyFactory.addSqlGenerationForFactory(LLMReq.SqlGenType.TWO_PASS_AUTO_COT_SELF_CONSISTENCY, this);
}
}

View File

@@ -1,10 +1,10 @@
package com.tencent.supersonic.headless.core.chat.parser.rule;
package com.tencent.supersonic.headless.chat.parser.rule;
import com.tencent.supersonic.common.pojo.enums.AggregateTypeEnum;
import com.tencent.supersonic.headless.core.chat.parser.SemanticParser;
import com.tencent.supersonic.headless.core.pojo.ChatContext;
import com.tencent.supersonic.headless.core.pojo.QueryContext;
import com.tencent.supersonic.headless.core.chat.query.SemanticQuery;
import com.tencent.supersonic.headless.chat.ChatContext;
import com.tencent.supersonic.headless.chat.QueryContext;
import com.tencent.supersonic.headless.chat.query.SemanticQuery;
import com.tencent.supersonic.headless.chat.parser.SemanticParser;
import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
@@ -17,12 +17,8 @@ import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static com.tencent.supersonic.common.pojo.enums.AggregateTypeEnum.AVG;
import static com.tencent.supersonic.common.pojo.enums.AggregateTypeEnum.MAX;
import static com.tencent.supersonic.common.pojo.enums.AggregateTypeEnum.MIN;
import static com.tencent.supersonic.common.pojo.enums.AggregateTypeEnum.NONE;
import static com.tencent.supersonic.common.pojo.enums.AggregateTypeEnum.SUM;
import static com.tencent.supersonic.common.pojo.enums.AggregateTypeEnum.TOPN;
import static com.tencent.supersonic.common.pojo.enums.AggregateTypeEnum.COUNT;
import static com.tencent.supersonic.common.pojo.enums.AggregateTypeEnum.DISTINCT;
/**
* AggregateTypeParser extracts aggregation type specified in the user query
@@ -34,12 +30,14 @@ import static com.tencent.supersonic.common.pojo.enums.AggregateTypeEnum.TOPN;
public class AggregateTypeParser implements SemanticParser {
private static final Map<AggregateTypeEnum, Pattern> REGX_MAP = Stream.of(
new AbstractMap.SimpleEntry<>(MAX, Pattern.compile("(?i)(最大值|最大|max|峰值|最高|最多)")),
new AbstractMap.SimpleEntry<>(MIN, Pattern.compile("(?i)(最小值|最小|min|最低|最少)")),
new AbstractMap.SimpleEntry<>(SUM, Pattern.compile("(?i)(汇总|总和|sum)")),
new AbstractMap.SimpleEntry<>(AVG, Pattern.compile("(?i)(平均值|日均|平均|avg)")),
new AbstractMap.SimpleEntry<>(TOPN, Pattern.compile("(?i)(top)")),
new AbstractMap.SimpleEntry<>(NONE, Pattern.compile("(?i)(明细)"))
new AbstractMap.SimpleEntry<>(AggregateTypeEnum.MAX, Pattern.compile("(?i)(最大值|最大|max|峰值|最高|最多)")),
new AbstractMap.SimpleEntry<>(AggregateTypeEnum.MIN, Pattern.compile("(?i)(最小值|最小|min|最低|最少)")),
new AbstractMap.SimpleEntry<>(AggregateTypeEnum.SUM, Pattern.compile("(?i)(汇总|总和|sum)")),
new AbstractMap.SimpleEntry<>(AggregateTypeEnum.AVG, Pattern.compile("(?i)(平均值|日均|平均|avg)")),
new AbstractMap.SimpleEntry<>(AggregateTypeEnum.TOPN, Pattern.compile("(?i)(top)")),
new AbstractMap.SimpleEntry<>(DISTINCT, Pattern.compile("(?i)(uv)")),
new AbstractMap.SimpleEntry<>(COUNT, Pattern.compile("(?i)(总数|pv)")),
new AbstractMap.SimpleEntry<>(AggregateTypeEnum.NONE, Pattern.compile("(?i)(明细)"))
).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, (k1, k2) -> k2));
@Override
@@ -85,7 +83,7 @@ public class AggregateTypeParser implements SemanticParser {
}
AggregateTypeEnum type = aggregateCount.entrySet().stream().max(Map.Entry.comparingByValue())
.map(entry -> entry.getKey()).orElse(NONE);
.map(entry -> entry.getKey()).orElse(AggregateTypeEnum.NONE);
String detectWord = aggregateWord.get(type);
return new AggregateConf(type, detectWord);
}

View File

@@ -1,16 +1,16 @@
package com.tencent.supersonic.headless.core.chat.parser.rule;
package com.tencent.supersonic.headless.chat.parser.rule;
import com.tencent.supersonic.headless.api.pojo.SchemaElementMatch;
import com.tencent.supersonic.headless.api.pojo.SchemaElementType;
import com.tencent.supersonic.headless.core.chat.parser.SemanticParser;
import com.tencent.supersonic.headless.core.pojo.ChatContext;
import com.tencent.supersonic.headless.core.pojo.QueryContext;
import com.tencent.supersonic.headless.core.chat.query.QueryManager;
import com.tencent.supersonic.headless.core.chat.query.SemanticQuery;
import com.tencent.supersonic.headless.core.chat.query.rule.RuleSemanticQuery;
import com.tencent.supersonic.headless.core.chat.query.rule.metric.MetricModelQuery;
import com.tencent.supersonic.headless.core.chat.query.rule.metric.MetricSemanticQuery;
import com.tencent.supersonic.headless.core.chat.query.rule.metric.MetricIdQuery;
import com.tencent.supersonic.headless.chat.query.QueryManager;
import com.tencent.supersonic.headless.chat.query.SemanticQuery;
import com.tencent.supersonic.headless.chat.query.rule.RuleSemanticQuery;
import com.tencent.supersonic.headless.chat.parser.SemanticParser;
import com.tencent.supersonic.headless.chat.ChatContext;
import com.tencent.supersonic.headless.chat.QueryContext;
import com.tencent.supersonic.headless.chat.query.rule.metric.MetricModelQuery;
import com.tencent.supersonic.headless.chat.query.rule.metric.MetricSemanticQuery;
import com.tencent.supersonic.headless.chat.query.rule.metric.MetricIdQuery;
import lombok.extern.slf4j.Slf4j;
import java.util.AbstractMap;

View File

@@ -1,11 +1,11 @@
package com.tencent.supersonic.headless.core.chat.parser.rule;
package com.tencent.supersonic.headless.chat.parser.rule;
import com.tencent.supersonic.headless.api.pojo.SchemaElementMatch;
import com.tencent.supersonic.headless.api.pojo.SchemaMapInfo;
import com.tencent.supersonic.headless.core.chat.parser.SemanticParser;
import com.tencent.supersonic.headless.core.chat.query.rule.RuleSemanticQuery;
import com.tencent.supersonic.headless.core.pojo.ChatContext;
import com.tencent.supersonic.headless.core.pojo.QueryContext;
import com.tencent.supersonic.headless.chat.query.rule.RuleSemanticQuery;
import com.tencent.supersonic.headless.chat.parser.SemanticParser;
import com.tencent.supersonic.headless.chat.ChatContext;
import com.tencent.supersonic.headless.chat.QueryContext;
import lombok.extern.slf4j.Slf4j;
import java.util.Arrays;
import java.util.List;

View File

@@ -1,13 +1,13 @@
package com.tencent.supersonic.headless.core.chat.parser.rule;
package com.tencent.supersonic.headless.chat.parser.rule;
import com.tencent.supersonic.common.pojo.Constants;
import com.tencent.supersonic.common.pojo.DateConf;
import com.tencent.supersonic.headless.core.chat.parser.SemanticParser;
import com.tencent.supersonic.headless.core.pojo.ChatContext;
import com.tencent.supersonic.headless.core.pojo.QueryContext;
import com.tencent.supersonic.headless.core.chat.query.QueryManager;
import com.tencent.supersonic.headless.core.chat.query.SemanticQuery;
import com.tencent.supersonic.headless.core.chat.query.rule.RuleSemanticQuery;
import com.tencent.supersonic.headless.chat.query.QueryManager;
import com.tencent.supersonic.headless.chat.query.SemanticQuery;
import com.tencent.supersonic.headless.chat.query.rule.RuleSemanticQuery;
import com.tencent.supersonic.headless.chat.parser.SemanticParser;
import com.tencent.supersonic.headless.chat.ChatContext;
import com.tencent.supersonic.headless.chat.QueryContext;
import com.xkzhangsan.time.nlp.TimeNLP;
import com.xkzhangsan.time.nlp.TimeNLPUtil;
import lombok.extern.slf4j.Slf4j;

View File

@@ -1,5 +1,5 @@
package com.tencent.supersonic.headless.core.chat.query;
package com.tencent.supersonic.headless.chat.query;
import com.tencent.supersonic.common.pojo.Aggregator;
import com.tencent.supersonic.common.pojo.Filter;
@@ -10,8 +10,8 @@ import com.tencent.supersonic.headless.api.pojo.SemanticParseInfo;
import com.tencent.supersonic.headless.api.pojo.SemanticSchema;
import com.tencent.supersonic.headless.api.pojo.request.QuerySqlReq;
import com.tencent.supersonic.headless.api.pojo.request.QueryStructReq;
import com.tencent.supersonic.headless.core.config.ParserConfig;
import com.tencent.supersonic.headless.core.utils.QueryReqBuilder;
import com.tencent.supersonic.headless.chat.parser.ParserConfig;
import com.tencent.supersonic.headless.chat.utils.QueryReqBuilder;
import lombok.ToString;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections4.CollectionUtils;
@@ -21,7 +21,7 @@ import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import static com.tencent.supersonic.headless.core.config.ParserConfig.PARSER_S2SQL_ENABLE;
import static com.tencent.supersonic.headless.chat.parser.ParserConfig.PARSER_S2SQL_ENABLE;
@Slf4j
@ToString

View File

@@ -1,9 +1,9 @@
package com.tencent.supersonic.headless.core.chat.query;
package com.tencent.supersonic.headless.chat.query;
import com.tencent.supersonic.headless.core.chat.query.llm.LLMSemanticQuery;
import com.tencent.supersonic.headless.core.chat.query.rule.RuleSemanticQuery;
import com.tencent.supersonic.headless.core.chat.query.rule.metric.MetricSemanticQuery;
import com.tencent.supersonic.headless.core.chat.query.rule.detail.DetailSemanticQuery;
import com.tencent.supersonic.headless.chat.query.llm.LLMSemanticQuery;
import com.tencent.supersonic.headless.chat.query.rule.RuleSemanticQuery;
import com.tencent.supersonic.headless.chat.query.rule.metric.MetricSemanticQuery;
import com.tencent.supersonic.headless.chat.query.rule.detail.DetailSemanticQuery;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.headless.core.chat.query;
package com.tencent.supersonic.headless.chat.query;
import com.tencent.supersonic.auth.api.authentication.pojo.User;
import com.tencent.supersonic.headless.api.pojo.SemanticParseInfo;

View File

@@ -0,0 +1,8 @@
package com.tencent.supersonic.headless.chat.query.llm;
import com.tencent.supersonic.headless.chat.query.BaseSemanticQuery;
import lombok.extern.slf4j.Slf4j;
@Slf4j
public abstract class LLMSemanticQuery extends BaseSemanticQuery {
}

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.headless.core.chat.query.llm.s2sql;
package com.tencent.supersonic.headless.chat.query.llm.s2sql;
import com.fasterxml.jackson.annotation.JsonValue;
import com.google.common.collect.Lists;

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.headless.core.chat.query.llm.s2sql;
package com.tencent.supersonic.headless.chat.query.llm.s2sql;
import lombok.Data;

View File

@@ -1,12 +1,12 @@
package com.tencent.supersonic.headless.core.chat.query.llm.s2sql;
package com.tencent.supersonic.headless.chat.query.llm.s2sql;
import com.tencent.supersonic.auth.api.authentication.pojo.User;
import com.tencent.supersonic.headless.api.pojo.SemanticSchema;
import com.tencent.supersonic.headless.api.pojo.SqlInfo;
import com.tencent.supersonic.headless.api.pojo.request.SemanticQueryReq;
import com.tencent.supersonic.headless.core.chat.query.QueryManager;
import com.tencent.supersonic.headless.core.chat.query.llm.LLMSemanticQuery;
import com.tencent.supersonic.headless.core.utils.QueryReqBuilder;
import com.tencent.supersonic.headless.chat.utils.QueryReqBuilder;
import com.tencent.supersonic.headless.chat.query.QueryManager;
import com.tencent.supersonic.headless.chat.query.llm.LLMSemanticQuery;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.headless.core.chat.query.llm.s2sql;
package com.tencent.supersonic.headless.chat.query.llm.s2sql;
import lombok.AllArgsConstructor;
import lombok.Builder;

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.headless.core.chat.query.rule;
package com.tencent.supersonic.headless.chat.query.rule;
import lombok.Data;

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.headless.core.chat.query.rule;
package com.tencent.supersonic.headless.chat.query.rule;
import com.tencent.supersonic.common.pojo.enums.AggregateTypeEnum;

View File

@@ -1,5 +1,5 @@
package com.tencent.supersonic.headless.core.chat.query.rule;
package com.tencent.supersonic.headless.chat.query.rule;
import com.tencent.supersonic.auth.api.authentication.pojo.User;
import com.tencent.supersonic.common.pojo.enums.FilterOperatorEnum;
@@ -12,11 +12,11 @@ import com.tencent.supersonic.headless.api.pojo.request.QueryFilter;
import com.tencent.supersonic.headless.api.pojo.request.QueryMultiStructReq;
import com.tencent.supersonic.headless.api.pojo.request.QueryStructReq;
import com.tencent.supersonic.headless.api.pojo.request.SemanticQueryReq;
import com.tencent.supersonic.headless.core.chat.query.BaseSemanticQuery;
import com.tencent.supersonic.headless.core.chat.query.QueryManager;
import com.tencent.supersonic.headless.core.pojo.ChatContext;
import com.tencent.supersonic.headless.core.pojo.QueryContext;
import com.tencent.supersonic.headless.core.utils.QueryReqBuilder;
import com.tencent.supersonic.headless.chat.QueryContext;
import com.tencent.supersonic.headless.chat.utils.QueryReqBuilder;
import com.tencent.supersonic.headless.chat.query.BaseSemanticQuery;
import com.tencent.supersonic.headless.chat.query.QueryManager;
import com.tencent.supersonic.headless.chat.ChatContext;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;

View File

@@ -1,11 +1,11 @@
package com.tencent.supersonic.headless.core.chat.query.rule.detail;
package com.tencent.supersonic.headless.chat.query.rule.detail;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;
import static com.tencent.supersonic.headless.api.pojo.SchemaElementType.VALUE;
import static com.tencent.supersonic.headless.core.chat.query.rule.QueryMatchOption.OptionType.REQUIRED;
import static com.tencent.supersonic.headless.core.chat.query.rule.QueryMatchOption.RequireNumberType.AT_LEAST;
import static com.tencent.supersonic.headless.chat.query.rule.QueryMatchOption.OptionType.REQUIRED;
import static com.tencent.supersonic.headless.chat.query.rule.QueryMatchOption.RequireNumberType.AT_LEAST;
@Slf4j
@Component

Some files were not shown because too many files have changed in this diff Show More