From e9d9c4591d612bf437630cdac1eadbbb503767e5 Mon Sep 17 00:00:00 2001 From: zyclove Date: Fri, 14 Feb 2025 21:22:01 +0800 Subject: [PATCH] feat:add create index scripts for opensearch (#2055) --- .../db/opensearch/ai_sql_memory.json | 95 +++++++++++++ .../db/opensearch/ai_sql_memory_template.json | 98 +++++++++++++ .../db/opensearch/ai_sql_meta_collection.json | 131 ++++++++++++++++++ .../ai_sql_preset_query_collection.json | 59 ++++++++ .../ai_sql_text2dsl_agent_collection.json | 95 +++++++++++++ .../db/opensearch/opensearch_create_index.sh | 42 ++++++ 6 files changed, 520 insertions(+) create mode 100644 launchers/standalone/src/main/resources/db/opensearch/ai_sql_memory.json create mode 100644 launchers/standalone/src/main/resources/db/opensearch/ai_sql_memory_template.json create mode 100644 launchers/standalone/src/main/resources/db/opensearch/ai_sql_meta_collection.json create mode 100644 launchers/standalone/src/main/resources/db/opensearch/ai_sql_preset_query_collection.json create mode 100644 launchers/standalone/src/main/resources/db/opensearch/ai_sql_text2dsl_agent_collection.json create mode 100755 launchers/standalone/src/main/resources/db/opensearch/opensearch_create_index.sh diff --git a/launchers/standalone/src/main/resources/db/opensearch/ai_sql_memory.json b/launchers/standalone/src/main/resources/db/opensearch/ai_sql_memory.json new file mode 100644 index 000000000..abeae9e18 --- /dev/null +++ b/launchers/standalone/src/main/resources/db/opensearch/ai_sql_memory.json @@ -0,0 +1,95 @@ +{ + "settings": { + "index": { + "number_of_shards": 6, + "number_of_replicas": 2, + "knn": true + }, + "analysis": { + "analyzer": { + "ik_max_word_lowercase_html_strip": { + "filter": [ + "lowercase", + "asciifolding" + ], + "char_filter": [ + "html_strip" + ], + "tokenizer": "ik_max_word" + } + } + } + }, + "mappings": { + "properties": { + "metadata": { + "type": "object", + "properties": { + "dbSchema": { + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + }, + "type": "text" + }, + "queryId": { + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + }, + "type": "text" + }, + "question": { + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + }, + "type": "text" + }, + "sideInfo": { + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + }, + "type": "text" + }, + "sql": { + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + }, + "type": "text" + } + } + }, + "text": { + "analyzer": "ik_max_word_lowercase_html_strip", + "search_analyzer": "ik_smart", + "type": "text" + }, + "vector": { + "dimension": 512, + "method": { + "engine": "lucene", + "space_type": "cosinesimil", + "name": "hnsw", + "parameters": { + "ef_construction": 100, + "m": 16 + } + }, + "type": "knn_vector" + } + } + } +} \ No newline at end of file diff --git a/launchers/standalone/src/main/resources/db/opensearch/ai_sql_memory_template.json b/launchers/standalone/src/main/resources/db/opensearch/ai_sql_memory_template.json new file mode 100644 index 000000000..3000f30cf --- /dev/null +++ b/launchers/standalone/src/main/resources/db/opensearch/ai_sql_memory_template.json @@ -0,0 +1,98 @@ +{ + "index_patterns": ["ai_sql_memory_*"], + "template": { + "settings": { + "index": { + "number_of_shards": 6, + "number_of_replicas": 2, + "knn": true + }, + "analysis": { + "analyzer": { + "ik_max_word_lowercase_html_strip": { + "filter": [ + "lowercase", + "asciifolding" + ], + "char_filter": [ + "html_strip" + ], + "tokenizer": "ik_max_word" + } + } + } + }, + "mappings": { + "properties": { + "metadata": { + "type": "object", + "properties": { + "dbSchema": { + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + }, + "type": "text" + }, + "queryId": { + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + }, + "type": "text" + }, + "question": { + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + }, + "type": "text" + }, + "sideInfo": { + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + }, + "type": "text" + }, + "sql": { + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + }, + "type": "text" + } + } + }, + "text": { + "analyzer": "ik_max_word_lowercase_html_strip", + "search_analyzer": "ik_smart", + "type": "text" + }, + "vector": { + "dimension": 512, + "method": { + "engine": "lucene", + "space_type": "cosinesimil", + "name": "hnsw", + "parameters": { + "ef_construction": 100, + "m": 16 + } + }, + "type": "knn_vector" + } + } + } + } +} \ No newline at end of file diff --git a/launchers/standalone/src/main/resources/db/opensearch/ai_sql_meta_collection.json b/launchers/standalone/src/main/resources/db/opensearch/ai_sql_meta_collection.json new file mode 100644 index 000000000..102ac4f05 --- /dev/null +++ b/launchers/standalone/src/main/resources/db/opensearch/ai_sql_meta_collection.json @@ -0,0 +1,131 @@ +{ + "settings": { + "index": { + "number_of_shards": 6, + "number_of_replicas": 2, + "knn": true + }, + "analysis": { + "analyzer": { + "ik_max_word_lowercase_html_strip": { + "filter": [ + "lowercase", + "asciifolding" + ], + "char_filter": [ + "html_strip" + ], + "tokenizer": "ik_max_word" + } + } + } + }, + "mappings": { + "properties": { + "metadata": { + "type": "object", + "properties": { + "bizName": { + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + }, + "type": "text" + }, + "defaultAgg": { + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + }, + "type": "text" + }, + "domainId": { + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + }, + "type": "text" + }, + "id": { + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + }, + "type": "text" + }, + "modelId": { + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + }, + "type": "text" + }, + "name": { + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + }, + "type": "text" + }, + "newName": { + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + }, + "type": "text" + }, + "queryId": { + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + }, + "type": "text" + }, + "type": { + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + }, + "type": "text" + } + } + }, + "text": { + "analyzer": "ik_max_word_lowercase_html_strip", + "search_analyzer": "ik_smart", + "type": "text" + }, + "vector": { + "dimension": 512, + "method": { + "engine": "lucene", + "space_type": "cosinesimil", + "name": "hnsw", + "parameters": { + "ef_construction": 100, + "m": 16 + } + }, + "type": "knn_vector" + } + } + } +} \ No newline at end of file diff --git a/launchers/standalone/src/main/resources/db/opensearch/ai_sql_preset_query_collection.json b/launchers/standalone/src/main/resources/db/opensearch/ai_sql_preset_query_collection.json new file mode 100644 index 000000000..53c87fc9b --- /dev/null +++ b/launchers/standalone/src/main/resources/db/opensearch/ai_sql_preset_query_collection.json @@ -0,0 +1,59 @@ +{ + "settings": { + "index": { + "number_of_shards": 6, + "number_of_replicas": 2, + "knn": true + }, + "analysis": { + "analyzer": { + "ik_max_word_lowercase_html_strip": { + "filter": [ + "lowercase", + "asciifolding" + ], + "char_filter": [ + "html_strip" + ], + "tokenizer": "ik_max_word" + } + } + } + }, + "mappings": { + "properties": { + "metadata": { + "type": "object", + "properties": { + "queryId": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + } + } + }, + "text": { + "type": "text", + "analyzer": "ik_max_word_lowercase_html_strip", + "search_analyzer": "ik_smart" + }, + "vector": { + "type": "knn_vector", + "dimension": 512, + "method": { + "name": "hnsw", + "engine": "lucene", + "space_type": "cosinesimil", + "parameters": { + "ef_construction": 100, + "m": 16 + } + } + } + } + } +} \ No newline at end of file diff --git a/launchers/standalone/src/main/resources/db/opensearch/ai_sql_text2dsl_agent_collection.json b/launchers/standalone/src/main/resources/db/opensearch/ai_sql_text2dsl_agent_collection.json new file mode 100644 index 000000000..abeae9e18 --- /dev/null +++ b/launchers/standalone/src/main/resources/db/opensearch/ai_sql_text2dsl_agent_collection.json @@ -0,0 +1,95 @@ +{ + "settings": { + "index": { + "number_of_shards": 6, + "number_of_replicas": 2, + "knn": true + }, + "analysis": { + "analyzer": { + "ik_max_word_lowercase_html_strip": { + "filter": [ + "lowercase", + "asciifolding" + ], + "char_filter": [ + "html_strip" + ], + "tokenizer": "ik_max_word" + } + } + } + }, + "mappings": { + "properties": { + "metadata": { + "type": "object", + "properties": { + "dbSchema": { + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + }, + "type": "text" + }, + "queryId": { + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + }, + "type": "text" + }, + "question": { + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + }, + "type": "text" + }, + "sideInfo": { + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + }, + "type": "text" + }, + "sql": { + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + }, + "type": "text" + } + } + }, + "text": { + "analyzer": "ik_max_word_lowercase_html_strip", + "search_analyzer": "ik_smart", + "type": "text" + }, + "vector": { + "dimension": 512, + "method": { + "engine": "lucene", + "space_type": "cosinesimil", + "name": "hnsw", + "parameters": { + "ef_construction": 100, + "m": 16 + } + }, + "type": "knn_vector" + } + } + } +} \ No newline at end of file diff --git a/launchers/standalone/src/main/resources/db/opensearch/opensearch_create_index.sh b/launchers/standalone/src/main/resources/db/opensearch/opensearch_create_index.sh new file mode 100755 index 000000000..0ba6b4244 --- /dev/null +++ b/launchers/standalone/src/main/resources/db/opensearch/opensearch_create_index.sh @@ -0,0 +1,42 @@ +#!/bin/bash +# This script is used to create index pattern for AI SQL +# Usage: ./opensearch_create_index.sh +# Note: Please make sure the opensearch is running and the index pattern is not exist。 +# To confirm the vector dimension, the default is 512. +# If you need to modify it, please first adjust the corresponding index configuration. +# configure the following parameters for opensearch +# es_host: the host of opensearch +# es_user: the user of opensearch +# es_password: the password of opensearch +# es_index_prefix: the index prefix of opensearch +es_host="https://opensearch-node:7799" +es_user="admin" +es_password="admin" +es_index_prefix="ai_sql" + +echo "Creating index pattern for AI SQL" +echo "creating index ${es_index_prefix}_meta_collection" +curl -X PUT "${es_host}/${es_index_prefix}_meta_collection" \ + -u "${es_user}:${es_password}" \ + -H "Content-Type: application/json" \ + -d @ai_sql_meta_collection.json + +echo "creating index ${es_index_prefix}_text2dsl_agent_collection" +curl -X PUT "${es_host}/${es_index_prefix}_text2dsl_agent_collection" \ + -u "${es_user}:${es_password}" \ + -H "Content-Type: application/json" \ + -d @ai_sql_text2dsl_agent_collection.json + +echo "creating index ${es_index_prefix}_preset_query_collection" +curl -X PUT "${es_host}/${es_index_prefix}_preset_query_collection" \ + -u "${es_user}:${es_password}" \ + -H "Content-Type: application/json" \ + -d @ai_sql_preset_query_collection.json + +for i in {1..10}; do + echo "creating index ${es_index_prefix}_memory_${i}" + curl -X PUT "${es_host}/${es_index_prefix}_memory_${i}" \ + -u "${es_user}:${es_password}" \ + -H "Content-Type: application/json" \ + -d @ai_sql_memory.json +done