(improvement)(launcher)Introduce supersonic-env.sh to incorporate LLM-related configs.

2025-12-11 03:58:14 +00:00 · 2024-05-20 11:16:47 +08:00
parent cbafff0935
commit 542121210e
7 changed files with 63 additions and 169 deletions
--- a/assembly/bin/supersonic-build.sh
+++ b/assembly/bin/supersonic-build.sh
@@ -15,7 +15,7 @@ fi
 function buildJavaService {
  model_name=$1
  echo "starting building supersonic-${model_name} service"
-  mvn -f $projectDir/launchers/${model_name} clean package -DskipTests
+  mvn -f $projectDir clean package -DskipTests
  if [ $? -ne 0 ]; then
      echo "Failed to build backend Java modules."
      exit 1
@@ -72,7 +72,7 @@ elif [ "$service" == "webapp" ]; then
  buildWebapp
  target_path=$projectDir/launchers/$STANDALONE_SERVICE/target/classes
  tar xvf $projectDir/webapp/supersonic-webapp.tar.gz -C $target_path
-  mv $target_path/supersonic_webapp $target_path/webapp
+  mv $target_path/supersonic-webapp $target_path/webapp
 else
  buildJavaService $service
  buildWebapp
--- a/assembly/bin/supersonic-daemon.sh
+++ b/assembly/bin/supersonic-daemon.sh
@@ -1,9 +1,12 @@
 #!/usr/bin/env bash

 sbinDir=$(cd "$(dirname "$0")"; pwd)
-chmod +x $sbinDir/supersonic-common.sh
 source $sbinDir/supersonic-common.sh

+set -a
+source $sbinDir/../conf/supersonic-env.sh
+set +a
+
 command=$1
 service=$2
 if [ -z "$service"  ]; then
--- a/headless/python/config/config_parse.py
+++ b/headless/python/config/config_parse.py
@@ -1,5 +1,4 @@
 # -*- coding:utf-8 -*-
-import os
 import configparser

 import os
@@ -8,6 +7,12 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 sys.path.append(os.path.dirname(os.path.abspath(__file__)))

+class EnvInterpolation(configparser.BasicInterpolation):
+    """Interpolation which expands environment variables in values."""
+
+    def before_get(self, parser, section, option, value, defaults):
+        value = super().before_get(parser, section, option, value, defaults)
+        return os.path.expandvars(value)

 def type_convert(input_str: str):
    try:
@@ -16,13 +21,13 @@ def type_convert(input_str: str):
        return input_str


-PROJECT_DIR_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 
+PROJECT_DIR_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 config_dir = "config"
-CONFIG_DIR_PATH = os.path.join(PROJECT_DIR_PATH, config_dir) 
+CONFIG_DIR_PATH = os.path.join(PROJECT_DIR_PATH, config_dir)
 config_file = "run_config.ini"
 config_path = os.path.join(CONFIG_DIR_PATH, config_file)

-config = configparser.ConfigParser()
+config = configparser.ConfigParser(interpolation=EnvInterpolation())
 config.read(config_path)

 log_dir = "log"
@@ -77,5 +82,3 @@ if __name__ == "__main__":
    print(f"ACT_MIN_WINDOWN_SIZE: {ACT_MIN_WINDOWN_SIZE}")
    print(f"ACT_MAX_WINDOWN_SIZE: {ACT_MAX_WINDOWN_SIZE}")
    print(f"LOG_FILE_PATH: {LOG_FILE_PATH}")
-
-    
--- a/headless/python/config/run_config.ini
+++ b/headless/python/config/run_config.ini
@@ -17,13 +17,11 @@ ACT_MAX_WINDOWN_SIZE = 10
 [Text2Vec]
 HF_TEXT2VEC_MODEL_NAME = GanymedeNil/text2vec-large-chinese

-
 [LLMProvider]
 LLM_PROVIDER_NAME = openai

-
 [LLMModel]
-MODEL_NAME = gpt-3.5-turbo
-OPENAI_API_KEY = YOUR_API_KEY
-OPENAI_API_BASE = http://YOUR_API_BASE
-TEMPERATURE = 0.0
+OPENAI_API_KEY = ${OPENAI_API_KEY}
+OPENAI_API_BASE = ${OPENAI_API_BASE}
+MODEL_NAME = ${OPENAI_MODEL_NAME}
+TEMPERATURE = ${OPENAI_TEMPERATURE}
--- a/launchers/standalone/src/main/resources/application-local.yaml
+++ b/launchers/standalone/src/main/resources/application-local.yaml
@@ -1,3 +1,10 @@
+server:
+  port: 9080
+  compression:
+    enabled: true
+    min-response-size: 1024
+    mime-types: application/javascript,application/json,application/xml,text/html,text/xml,text/plain,text/css,image/*
+
 spring:
  h2:
    console:
@@ -12,15 +19,8 @@ spring:
    username: root
    password: semantic

-demo:
-  enabled: true
-
-server:
-  port: 9080
-  compression:
-    enabled: true
-    min-response-size: 1024
-    mime-types: application/javascript,application/json,application/xml,text/html,text/xml,text/plain,text/css,image/*
+mybatis:
+  mapper-locations=classpath:mappers/custom/*.xml,classpath*:/mappers/*.xml

 authentication:
  enable: true
@@ -31,6 +31,16 @@ authentication:
      header:
        key: Authorization

+demo:
+  enabled: true
+
+query:
+  optimizer:
+    enable: true
+
+multi:
+  turn: false
+
 time:
  threshold: 100

@@ -39,20 +49,19 @@ dimension:
 metric:
  topn: 20

-mybatis:
-  mapper-locations=classpath:mappers/custom/*.xml,classpath*:/mappers/*.xml
-
 corrector:
  additional:
    information: true
+
 pyllm:
  url: http://127.0.0.1:9092
-
 llm:
  parser:
    url: ${pyllm.url}
+
 embedding:
  url: ${pyllm.url}
+
 functionCall:
  url: ${pyllm.url}

@@ -60,7 +69,6 @@ text2sql:
  example:
    num: 1

-#langchain4j config
 s2:
  langchain4j:
    #1.chat-model
@@ -68,20 +76,21 @@ s2:
      provider: open_ai
      openai:
        # Replace with your LLM configs
-        # Note: Below API key `demo` is provided by langchain4j community which limits 1000 tokens per request.
-        base-url: https://api.openai.com/v1
-        api-key: demo
-        model-name: gpt-3.5-turbo
-        temperature: 0.0
-        timeout: PT60S
+        # Note: The default API key `demo` is provided by langchain4j community
+        #       which limits 1000 tokens per request.
+        base-url: ${OPENAI_API_BASE:https://api.openai.com/v1}
+        api-key: ${OPENAI_API_KEY:demo}
+        model-name: ${OPENAI_MODEL_NAME:gpt-3.5-turbo}
+        temperature: ${OPENAI_TEMPERATURE:0.0}
+        timeout: ${OPENAI_TIMEOUT:PT60S}
    #2.embedding-model
    #2.1 in_memory(default)
    embedding-model:
      provider: in_process
-#      inProcess:
-#        modelPath: /data/model.onnx
-#        vocabularyPath: /data/onnx_vocab.txt
-#        shibing624/text2vec-base-chinese
+    #      inProcess:
+    #        modelPath: /data/model.onnx
+    #        vocabularyPath: /data/onnx_vocab.txt
+    #        shibing624/text2vec-base-chinese
    #2.2 open_ai
    #  embedding-model:
    #    provider: open_ai
@@ -105,11 +114,4 @@ logging:

 inMemoryEmbeddingStore:
  persistent:
-    path: /tmp
-
-query:
-  optimizer:
-    enable: true
-multi:
-  turn: false
-  num: 5
+    path: /tmp
--- a/launchers/standalone/src/main/resources/rewrite_examplar.json
+++ b/launchers/standalone/src/main/resources/rewrite_examplar.json
@@ -1,122 +0,0 @@
-[
-    {
-        "contextualQuestions": "[“近7天纯音乐的歌曲播放量 (补充信息:’ '纯音乐'‘是一个’语种‘。)”]",
-        "currentQuestion": "对比翻唱版呢 (补充信息:’ '翻唱版'‘是一个’歌曲版本‘。)",
-        "rewritingCurrentQuestion": "对比近7天翻唱版和纯音乐的歌曲播放量"
-    },
-    {
-        "contextualQuestions": "[]",
-        "currentQuestion": "robinlee在内容库的访问次数 (补充信息:’ 'robinlee'‘是一个’用户名‘。)",
-        "rewritingCurrentQuestion": "robinlee在内容库的访问次数"
-    },
-    {
-        "contextualQuestions": "[\"robinlee在内容库的访问次数 (补充信息:’ 'robinlee'‘是一个’用户名‘。)\"]",
-        "currentQuestion": "对比jackjchen呢？ (补充信息:’ 'jackjchen'‘是一个’用户名‘。)",
-        "rewritingCurrentQuestion": "robinlee对比jackjchen在内容库的访问次数"
-    },
-    {
-        "contextualQuestions": "[\"robinlee在内容库的访问次数 (补充信息:’ 'robinlee'‘是一个’用户名‘。)\",\"对比jackjchen呢？ (补充信息:’ 'jackjchen'‘是一个’用户名‘。)\"]。",
-        "currentQuestion": "内容库近12个月访问人数按部门",
-        "rewritingCurrentQuestion": "内容库近12个月访问人数按部门"
-    },
-    {
-        "contextualQuestions": "[\"robinlee在内容库的访问次数 (补充信息:’ 'robinlee'‘是一个’用户名‘。)\",\"对比jackjchen呢？ (补充信息:’ 'jackjchen'‘是一个’用户名‘。)\",\"内容库近12个月访问人数按部门\"]",
-        "currentQuestion": "访问次数呢？",
-        "rewritingCurrentQuestion": "内容库近12个月访问次数按部门"
-    },
-    {
-        "contextualQuestions": "[]",
-        "currentQuestion": "近3天海田飞系MPPM结算播放份额 (补充信息:’'海田飞系'‘是一个’严选版权归属系‘)",
-        "rewritingCurrentQuestion": "近3天海田飞系MPPM结算播放份额"
-    },
-    {
-        "contextualQuestions": "[\"近3天海田飞系MPPM结算播放份额(补充信息:’'海田飞系'‘是一个’严选版权归属系‘) \"]",
-        "currentQuestion": "近60天呢",
-        "rewritingCurrentQuestion": "近60天海田飞系MPPM结算播放份额"
-    },
-    {
-        "contextualQuestions": "[\"近3天海田飞系MPPM结算播放份额(补充信息:’'海田飞系'‘是一个’严选版权归属系‘) \",\"近60天呢\"]",
-        "currentQuestion": "飞天系呢(补充信息:’'飞天系'‘是一个’严选版权归属系‘)",
-        "rewritingCurrentQuestion": "近60天飞天系MPPM结算播放份额"
-    },
-    {
-        "contextualQuestions": "[“近90天袁亚伟播放量是多少 (补充信息:'袁亚伟'是一个歌手名)”]",
-        "currentQuestion": "平均值是多少",
-        "rewritingCurrentQuestion": "近90天袁亚伟播放量的平均值是多少"
-    },
-    {
-        "contextualQuestions": "[“近90天袁亚伟播放量是多少 (补充信息:'袁亚伟'是一个歌手名)”,\"平均值是多少\",\"总和是多少\"]",
-        "currentQuestion": "总和是多少",
-        "rewritingCurrentQuestion": "近90天袁亚伟播放量的总和是多少"
-    },
-    {
-        "contextualQuestions": "[\"播放量大于1万的歌曲有多少\"]",
-        "currentQuestion": "下载量大于10万的呢",
-        "rewritingCurrentQuestion": "下载量大于10万的歌曲有多少"
-    },
-    {
-        "contextualQuestions": "[\"周杰伦2023年6月之后发布的歌曲有哪些(补充信息:'周杰伦'是一个歌手名)\"]",
-        "currentQuestion": "这些歌曲有哪些播放量大于500W的？",
-        "rewritingCurrentQuestion": "周杰伦2023年6月之后发布的歌曲，有哪些播放量大于500W的？"
-    },
-    {
-        "contextualQuestions": "[“陈奕迅唱的所有的播放量大于20万的歌曲有哪些(补充信息:'陈奕迅'是一个歌手名)”]",
-        "currentQuestion": "大于100万的呢",
-        "rewritingCurrentQuestion": "陈奕迅唱的所有的播放量大于100万的歌曲有哪些"
-    },
-    {
-        "contextualQuestions": "[“陈奕迅唱的所有的播放量大于20万的歌曲有哪些(补充信息:'陈奕迅'是一个歌手名)”,\"大于100万的呢\"]",
-        "currentQuestion": "周杰伦去年发布的歌曲有哪些(补充信息:'周杰伦'是一个歌手名)",
-        "rewritingCurrentQuestion": "周杰伦去年发布的歌曲有哪些"
-    },
-    {
-        "contextualQuestions": "[“陈奕迅唱的所有的播放量大于20万的歌曲有哪些(补充信息:'陈奕迅'是一个歌手名)”,\"大于100万的呢\",\"周杰伦去年发布的歌曲有哪些(补充信息:'周杰伦'是一个歌手名)\"]",
-        "currentQuestion": "他今年发布的呢",
-        "rewritingCurrentQuestion": "周杰伦今年发布的歌曲有哪些"
-    },
-    {
-        "contextualQuestions": "[“陈奕迅唱的所有的播放量大于20万的歌曲有哪些(补充信息:'陈奕迅'是一个歌手名)”,\"大于100万的呢\",\"周杰伦去年发布的歌曲有哪些(补充信息:'周杰伦'是一个歌手名)\",\"他今年发布的呢\"]",
-        "currentQuestion": "我想要近半年签约的播放量前十的歌手有哪些",
-        "rewritingCurrentQuestion": "我想要近半年签约的播放量前十的歌手有哪些"
-    },
-    {
-        "contextualQuestions": "[]",
-        "currentQuestion": "最近一年发行的歌曲中，有哪些在近7天播放超过一千万的",
-        "rewritingCurrentQuestion": "最近一年发行的歌曲中，有哪些在近7天播放超过一千万的"
-    },
-    {
-        "contextualQuestions": "[“最近一年发行的歌曲中，有哪些在近7天播放超过一千万的”]",
-        "currentQuestion": "今年以来呢？",
-        "rewritingCurrentQuestion": "今年以来发行的歌曲中，有哪些在近7天播放超过一千万的"
-    },
-    {
-        "contextualQuestions": "[“最近一年发行的歌曲中，有哪些在近7天播放超过一千万的”,\"今年以来呢？\"]",
-        "currentQuestion": "2023年以来呢？",
-        "rewritingCurrentQuestion": "2023年以来发行的歌曲中，有哪些在近7天播放超过一千万的"
-    },
-    {
-        "contextualQuestions": "[\"内容库近20天访问次数\"]",
-        "currentQuestion": "按部门看一下",
-        "rewritingCurrentQuestion": "内容库近20天按部门的访问次数"
-    },
-    {
-        "contextualQuestions": "[\"内容库近20天访问次数\",\"按部门看一下\"]",
-        "currentQuestion": "按模块看一下",
-        "rewritingCurrentQuestion": "内容库近20天按模块的访问次数"
-    },
-    {
-        "contextualQuestions": "[\"内容库近20天访问次数\",\"按部门看一下\",\"按模块看一下\"]",
-        "currentQuestion": "看一下技术部的 (补充信息:’'技术部'‘是一个’部门‘)",
-        "rewritingCurrentQuestion": "技术部在内容库近20天的访问次数"
-    },
-    {
-        "contextualQuestions": "[\"内容库近20天访问次数\",\"按部门看一下\",\"按模块看一下\",\"看一下技术部的 (补充信息:’'技术部'‘是一个’部门‘)\"]",
-        "currentQuestion": "看一下产品部的 (补充信息:’'产品部'‘是一个’部门‘)",
-        "rewritingCurrentQuestion": "产品部在内容库近20天的访问次数"
-    },
-    {
-        "contextualQuestions": "[\"内容库近20天访问次数\",\"按部门看一下\",\"按模块看一下\",\"看一下技术部的 (补充信息:’'技术部'‘是一个’部门‘)\",\"看一下产品部的 (补充信息:’'产品部'‘是一个’部门‘)\"]",
-        "currentQuestion": "对比一下技术部、产品部(补充信息:'技术部'、‘产品部’分别是一个’部门‘)",
-        "rewritingCurrentQuestion": "对比一下技术部、产品部在内容库近20天的访问次数"
-    }
-]
--- a/launchers/standalone/src/main/resources/supersonic-env.sh
+++ b/launchers/standalone/src/main/resources/supersonic-env.sh
@@ -0,0 +1,10 @@
+#!/usr/bin/env bash
+
+# Replace with your LLM configs
+# Note: The default API key `demo` is provided by langchain4j community
+#       which limits 1000 tokens per request.
+OPENAI_API_BASE=https://api.openai.com/v1
+OPENAI_API_KEY=demo
+OPENAI_MODEL_NAME=gpt-3.5-turbo
+OPENAI_TEMPERATURE=0.0
+OPENAI_TIMEOUT=PT60S