(improvement)(Headless) Refactor the SemanticModeller to rule first and then llm, and automatically infer field types in the rule method. (#1900)

Co-authored-by: lxwcodemonkey
This commit is contained in:
LXW
2024-11-11 00:10:58 +08:00
committed by GitHub
parent ea6a9ebc5f
commit 87729956e8
12 changed files with 101 additions and 23 deletions

View File

@@ -46,6 +46,7 @@ com.tencent.supersonic.headless.core.cache.QueryCache=\
### headless-server SPIs
com.tencent.supersonic.headless.server.modeller.SemanticModeller=\
com.tencent.supersonic.headless.server.modeller.RuleSemanticModeller, \
com.tencent.supersonic.headless.server.modeller.LLMSemanticModeller
### chat-server SPIs

View File

@@ -20,7 +20,7 @@ import java.util.Map;
@Disabled
@TestPropertySource(properties = {"s2.model.building.exemplars.enabled = false"})
public class LLMSemanticModellerTest extends BaseTest {
public class SemanticModellerTest extends BaseTest {
private LLMConfigUtils.LLMType llmType = LLMConfigUtils.LLMType.OLLAMA_LLAMA3;
@@ -49,7 +49,7 @@ public class LLMSemanticModellerTest extends BaseTest {
Assertions.assertEquals(4, stayTimeModelSchema.getColumnSchemas().size());
Assertions.assertEquals(FieldType.foreign_key,
stayTimeModelSchema.getColumnByName("user_name").getFiledType());
Assertions.assertEquals(FieldType.data_time,
Assertions.assertEquals(FieldType.partition_time,
stayTimeModelSchema.getColumnByName("imp_date").getFiledType());
Assertions.assertEquals(FieldType.dimension,
stayTimeModelSchema.getColumnByName("page").getFiledType());
@@ -73,7 +73,7 @@ public class LLMSemanticModellerTest extends BaseTest {
ModelSchema pvModelSchema = modelSchemaMap.values().iterator().next();
Assertions.assertEquals(5, pvModelSchema.getColumnSchemas().size());
Assertions.assertEquals(FieldType.data_time,
Assertions.assertEquals(FieldType.partition_time,
pvModelSchema.getColumnByName("imp_date").getFiledType());
Assertions.assertEquals(FieldType.dimension,
pvModelSchema.getColumnByName("user_name").getFiledType());