(improvement)(Headless) Refactor the SemanticModeller to rule first and then llm, and automatically infer field types in the rule method. (#1900)

Co-authored-by: lxwcodemonkey
This commit is contained in:
LXW
2024-11-11 00:10:58 +08:00
committed by GitHub
parent ea6a9ebc5f
commit 87729956e8
12 changed files with 101 additions and 23 deletions

View File

@@ -2,6 +2,7 @@ package com.tencent.supersonic.headless.core.adaptor.db;
import com.google.common.collect.Lists;
import com.tencent.supersonic.headless.api.pojo.DBColumn;
import com.tencent.supersonic.headless.api.pojo.enums.FieldType;
import com.tencent.supersonic.headless.core.pojo.ConnectInfo;
import lombok.extern.slf4j.Slf4j;
@@ -71,7 +72,8 @@ public abstract class BaseDbAdaptor implements DbAdaptor {
String columnName = columns.getString("COLUMN_NAME");
String dataType = columns.getString("TYPE_NAME");
String remarks = columns.getString("REMARKS");
dbColumns.add(new DBColumn(columnName, dataType, remarks));
FieldType fieldType = classifyColumnType(dataType);
dbColumns.add(new DBColumn(columnName, dataType, remarks, fieldType));
}
return dbColumns;
}
@@ -82,4 +84,25 @@ public abstract class BaseDbAdaptor implements DbAdaptor {
return connection.getMetaData();
}
protected static FieldType classifyColumnType(String typeName) {
switch (typeName.toUpperCase()) {
case "INT":
case "INTEGER":
case "BIGINT":
case "SMALLINT":
case "TINYINT":
case "FLOAT":
case "DOUBLE":
case "DECIMAL":
case "NUMERIC":
return FieldType.measure;
case "DATE":
case "TIME":
case "TIMESTAMP":
return FieldType.time;
default:
return FieldType.dimension;
}
}
}

View File

@@ -4,6 +4,7 @@ import com.google.common.collect.Lists;
import com.tencent.supersonic.common.pojo.Constants;
import com.tencent.supersonic.common.pojo.enums.TimeDimensionEnum;
import com.tencent.supersonic.headless.api.pojo.DBColumn;
import com.tencent.supersonic.headless.api.pojo.enums.FieldType;
import com.tencent.supersonic.headless.core.pojo.ConnectInfo;
import lombok.extern.slf4j.Slf4j;
@@ -54,7 +55,8 @@ public class H2Adaptor extends BaseDbAdaptor {
String columnName = columns.getString("COLUMN_NAME");
String dataType = columns.getString("TYPE_NAME");
String remarks = columns.getString("REMARKS");
dbColumns.add(new DBColumn(columnName, dataType, remarks));
FieldType fieldType = classifyColumnType(dataType);
dbColumns.add(new DBColumn(columnName, dataType, remarks, fieldType));
}
return dbColumns;
}

View File

@@ -5,6 +5,7 @@ import com.tencent.supersonic.common.jsqlparser.SqlReplaceHelper;
import com.tencent.supersonic.common.pojo.Constants;
import com.tencent.supersonic.common.pojo.enums.TimeDimensionEnum;
import com.tencent.supersonic.headless.api.pojo.DBColumn;
import com.tencent.supersonic.headless.api.pojo.enums.FieldType;
import com.tencent.supersonic.headless.core.pojo.ConnectInfo;
import lombok.extern.slf4j.Slf4j;
import net.sf.jsqlparser.expression.StringValue;
@@ -105,8 +106,41 @@ public class PostgresqlAdaptor extends BaseDbAdaptor {
String columnName = columns.getString("COLUMN_NAME");
String dataType = columns.getString("TYPE_NAME");
String remarks = columns.getString("REMARKS");
dbColumns.add(new DBColumn(columnName, dataType, remarks));
FieldType fieldType = classifyColumnType(dataType);
dbColumns.add(new DBColumn(columnName, dataType, remarks, fieldType));
}
return dbColumns;
}
protected static FieldType classifyColumnType(String typeName) {
switch (typeName.toUpperCase()) {
case "INT":
case "INTEGER":
case "BIGINT":
case "SMALLINT":
case "SERIAL":
case "BIGSERIAL":
case "SMALLSERIAL":
case "REAL":
case "DOUBLE PRECISION":
case "NUMERIC":
case "DECIMAL":
return FieldType.measure;
case "DATE":
case "TIME":
case "TIMESTAMP":
case "TIMESTAMPTZ":
case "INTERVAL":
return FieldType.time;
case "VARCHAR":
case "CHAR":
case "TEXT":
case "CHARACTER VARYING":
case "CHARACTER":
case "UUID":
default:
return FieldType.dimension;
}
}
}