feat:add create index scripts for opensearch (#2055)

This commit is contained in:
zyclove
2025-02-14 21:22:01 +08:00
committed by GitHub
parent 6cc145935d
commit e9d9c4591d
6 changed files with 520 additions and 0 deletions

View File

@@ -0,0 +1,95 @@
{
"settings": {
"index": {
"number_of_shards": 6,
"number_of_replicas": 2,
"knn": true
},
"analysis": {
"analyzer": {
"ik_max_word_lowercase_html_strip": {
"filter": [
"lowercase",
"asciifolding"
],
"char_filter": [
"html_strip"
],
"tokenizer": "ik_max_word"
}
}
}
},
"mappings": {
"properties": {
"metadata": {
"type": "object",
"properties": {
"dbSchema": {
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"type": "text"
},
"queryId": {
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"type": "text"
},
"question": {
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"type": "text"
},
"sideInfo": {
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"type": "text"
},
"sql": {
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"type": "text"
}
}
},
"text": {
"analyzer": "ik_max_word_lowercase_html_strip",
"search_analyzer": "ik_smart",
"type": "text"
},
"vector": {
"dimension": 512,
"method": {
"engine": "lucene",
"space_type": "cosinesimil",
"name": "hnsw",
"parameters": {
"ef_construction": 100,
"m": 16
}
},
"type": "knn_vector"
}
}
}
}

View File

@@ -0,0 +1,98 @@
{
"index_patterns": ["ai_sql_memory_*"],
"template": {
"settings": {
"index": {
"number_of_shards": 6,
"number_of_replicas": 2,
"knn": true
},
"analysis": {
"analyzer": {
"ik_max_word_lowercase_html_strip": {
"filter": [
"lowercase",
"asciifolding"
],
"char_filter": [
"html_strip"
],
"tokenizer": "ik_max_word"
}
}
}
},
"mappings": {
"properties": {
"metadata": {
"type": "object",
"properties": {
"dbSchema": {
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"type": "text"
},
"queryId": {
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"type": "text"
},
"question": {
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"type": "text"
},
"sideInfo": {
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"type": "text"
},
"sql": {
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"type": "text"
}
}
},
"text": {
"analyzer": "ik_max_word_lowercase_html_strip",
"search_analyzer": "ik_smart",
"type": "text"
},
"vector": {
"dimension": 512,
"method": {
"engine": "lucene",
"space_type": "cosinesimil",
"name": "hnsw",
"parameters": {
"ef_construction": 100,
"m": 16
}
},
"type": "knn_vector"
}
}
}
}
}

View File

@@ -0,0 +1,131 @@
{
"settings": {
"index": {
"number_of_shards": 6,
"number_of_replicas": 2,
"knn": true
},
"analysis": {
"analyzer": {
"ik_max_word_lowercase_html_strip": {
"filter": [
"lowercase",
"asciifolding"
],
"char_filter": [
"html_strip"
],
"tokenizer": "ik_max_word"
}
}
}
},
"mappings": {
"properties": {
"metadata": {
"type": "object",
"properties": {
"bizName": {
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"type": "text"
},
"defaultAgg": {
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"type": "text"
},
"domainId": {
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"type": "text"
},
"id": {
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"type": "text"
},
"modelId": {
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"type": "text"
},
"name": {
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"type": "text"
},
"newName": {
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"type": "text"
},
"queryId": {
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"type": "text"
},
"type": {
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"type": "text"
}
}
},
"text": {
"analyzer": "ik_max_word_lowercase_html_strip",
"search_analyzer": "ik_smart",
"type": "text"
},
"vector": {
"dimension": 512,
"method": {
"engine": "lucene",
"space_type": "cosinesimil",
"name": "hnsw",
"parameters": {
"ef_construction": 100,
"m": 16
}
},
"type": "knn_vector"
}
}
}
}

View File

@@ -0,0 +1,59 @@
{
"settings": {
"index": {
"number_of_shards": 6,
"number_of_replicas": 2,
"knn": true
},
"analysis": {
"analyzer": {
"ik_max_word_lowercase_html_strip": {
"filter": [
"lowercase",
"asciifolding"
],
"char_filter": [
"html_strip"
],
"tokenizer": "ik_max_word"
}
}
}
},
"mappings": {
"properties": {
"metadata": {
"type": "object",
"properties": {
"queryId": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"text": {
"type": "text",
"analyzer": "ik_max_word_lowercase_html_strip",
"search_analyzer": "ik_smart"
},
"vector": {
"type": "knn_vector",
"dimension": 512,
"method": {
"name": "hnsw",
"engine": "lucene",
"space_type": "cosinesimil",
"parameters": {
"ef_construction": 100,
"m": 16
}
}
}
}
}
}

View File

@@ -0,0 +1,95 @@
{
"settings": {
"index": {
"number_of_shards": 6,
"number_of_replicas": 2,
"knn": true
},
"analysis": {
"analyzer": {
"ik_max_word_lowercase_html_strip": {
"filter": [
"lowercase",
"asciifolding"
],
"char_filter": [
"html_strip"
],
"tokenizer": "ik_max_word"
}
}
}
},
"mappings": {
"properties": {
"metadata": {
"type": "object",
"properties": {
"dbSchema": {
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"type": "text"
},
"queryId": {
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"type": "text"
},
"question": {
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"type": "text"
},
"sideInfo": {
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"type": "text"
},
"sql": {
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"type": "text"
}
}
},
"text": {
"analyzer": "ik_max_word_lowercase_html_strip",
"search_analyzer": "ik_smart",
"type": "text"
},
"vector": {
"dimension": 512,
"method": {
"engine": "lucene",
"space_type": "cosinesimil",
"name": "hnsw",
"parameters": {
"ef_construction": 100,
"m": 16
}
},
"type": "knn_vector"
}
}
}
}

View File

@@ -0,0 +1,42 @@
#!/bin/bash
# This script is used to create index pattern for AI SQL
# Usage: ./opensearch_create_index.sh
# Note: Please make sure the opensearch is running and the index pattern is not exist。
# To confirm the vector dimension, the default is 512.
# If you need to modify it, please first adjust the corresponding index configuration.
# configure the following parameters for opensearch
# es_host: the host of opensearch
# es_user: the user of opensearch
# es_password: the password of opensearch
# es_index_prefix: the index prefix of opensearch
es_host="https://opensearch-node:7799"
es_user="admin"
es_password="admin"
es_index_prefix="ai_sql"
echo "Creating index pattern for AI SQL"
echo "creating index ${es_index_prefix}_meta_collection"
curl -X PUT "${es_host}/${es_index_prefix}_meta_collection" \
-u "${es_user}:${es_password}" \
-H "Content-Type: application/json" \
-d @ai_sql_meta_collection.json
echo "creating index ${es_index_prefix}_text2dsl_agent_collection"
curl -X PUT "${es_host}/${es_index_prefix}_text2dsl_agent_collection" \
-u "${es_user}:${es_password}" \
-H "Content-Type: application/json" \
-d @ai_sql_text2dsl_agent_collection.json
echo "creating index ${es_index_prefix}_preset_query_collection"
curl -X PUT "${es_host}/${es_index_prefix}_preset_query_collection" \
-u "${es_user}:${es_password}" \
-H "Content-Type: application/json" \
-d @ai_sql_preset_query_collection.json
for i in {1..10}; do
echo "creating index ${es_index_prefix}_memory_${i}"
curl -X PUT "${es_host}/${es_index_prefix}_memory_${i}" \
-u "${es_user}:${es_password}" \
-H "Content-Type: application/json" \
-d @ai_sql_memory.json
done