From 529c8dc79b1bf8b29704d9b38afa22d413ef6a81 Mon Sep 17 00:00:00 2001 From: jerryjzhang Date: Thu, 6 Jun 2024 13:00:00 +0800 Subject: [PATCH] (improvement)(headless)Replace terms in exemplar files --- .../python/few_shot_example/s2sql_exemplar.py | 98 +++++++------- .../s2sql_exemplar3_transformed.json | 126 +++++++++--------- .../src/main/resources/s2ql_exemplar.json | 104 +++++++-------- .../src/test/resources/s2ql_exemplar.json | 104 +++++++-------- 4 files changed, 216 insertions(+), 216 deletions(-) diff --git a/headless/python/few_shot_example/s2sql_exemplar.py b/headless/python/few_shot_example/s2sql_exemplar.py index f89089c8e..e27f6b22a 100644 --- a/headless/python/few_shot_example/s2sql_exemplar.py +++ b/headless/python/few_shot_example/s2sql_exemplar.py @@ -1,37 +1,37 @@ exemplars= [ { "currentDate":"2020-12-01", - "tableName":"内容库产品", + "tableName":"超音数产品", "fieldsList":"""["部门", "模块", "用户名", "访问次数", "访问人数", "访问时长", "数据日期"]""", - "question":"比较jackjchen和robinlee在内容库的访问次数", + "question":"比较jackjchen和robinlee在超音数的访问次数", "priorSchemaLinks":"""['jackjchen'->用户名, 'robinlee'->用户名]""", - "analysis": """让我们一步一步地思考。在问题“比较jackjchen和robinlee在内容库的访问次数“中,我们被问: + "analysis": """让我们一步一步地思考。在问题“比较jackjchen和robinlee在超音数的访问次数“中,我们被问: “比较jackjchen和robinlee”,所以我们需要column=[用户名],cell values = ['jackjchen', 'robinlee'],所以有[用户名:('jackjchen', 'robinlee')] -”内容库的访问次数“,所以我们需要column=[访问次数]""", +”超音数的访问次数“,所以我们需要column=[访问次数]""", "schemaLinks":"""["用户名":("'jackjchen'", "'robinlee'"), "访问次数"]""", - "sql":"""SELECT 用户名, 访问次数 FROM 内容库产品 WHERE 用户名 IN ('jackjchen', 'robinlee')""" + "sql":"""SELECT 用户名, 访问次数 FROM 超音数产品 WHERE 用户名 IN ('jackjchen', 'robinlee')""" }, { "currentDate":"2022-11-06", - "tableName":"内容库产品", + "tableName":"超音数产品", "fieldsList":"""["部门", "模块", "用户名", "访问次数", "访问人数", "访问时长", "数据日期"]""", - "question":"内容库近12个月访问人数 按部门", + "question":"超音数近12个月访问人数 按部门", "priorSchemaLinks":"""[]""", - "analysis": """让我们一步一步地思考。在问题“内容库近12个月访问人数 按部门“中,我们被问: -”内容库近12个月“,所以我们需要column=[数据日期],cell values = [12],所以有[数据日期:(12)] + "analysis": """让我们一步一步地思考。在问题“超音数近12个月访问人数 按部门“中,我们被问: +”超音数近12个月“,所以我们需要column=[数据日期],cell values = [12],所以有[数据日期:(12)] “访问人数”,所以我们需要column=[访问人数] ”按部门“,所以我们需要column=[部门]""", "schemaLinks":"""["数据日期":(12), "访问人数", "部门"]""", - "sql":"""SELECT 部门, 数据日期, 访问人数 FROM 内容库产品 WHERE datediff('month', 数据日期, '2022-11-06') <= 12 """ + "sql":"""SELECT 部门, 数据日期, 访问人数 FROM 超音数产品 WHERE datediff('month', 数据日期, '2022-11-06') <= 12 """ }, { "currentDate":"2023-04-21", - "tableName":"内容库产品", + "tableName":"超音数产品", "fieldsList":"""["部门", "模块", "用户名", "访问次数", "访问人数", "访问时长", "数据日期"]""", - "question":"内容库美术部、技术研发部的访问时长", + "question":"超音数美术部、技术研发部的访问时长", "priorSchemaLinks":"""['美术部'->部门, '技术研发部'->部门]""", - "analysis": """让我们一步一步地思考。在问题“内容库美术部、技术研发部的访问时长“中,我们被问: + "analysis": """让我们一步一步地思考。在问题“超音数美术部、技术研发部的访问时长“中,我们被问: “访问时长”,所以我们需要column=[访问时长] -”内容库美术部、技术研发部“,所以我们需要column=[部门], cell values = ['美术部', '技术研发部'],所以有[部门:('美术部', '技术研发部')]""", +”超音数美术部、技术研发部“,所以我们需要column=[部门], cell values = ['美术部', '技术研发部'],所以有[部门:('美术部', '技术研发部')]""", "schemaLinks":"""["访问时长", "部门":("'美术部'", "'技术研发部'")]""", - "sql":"""SELECT 部门, 访问时长 FROM 内容库产品 WHERE 部门 IN ('美术部', '技术研发部')""" + "sql":"""SELECT 部门, 访问时长 FROM 超音数产品 WHERE 部门 IN ('美术部', '技术研发部')""" }, { "currentDate":"2023-08-21", "tableName":"严选", @@ -81,27 +81,27 @@ exemplars= [ "sql":"""SELECT 歌曲名 FROM 歌曲库 WHERE 结算播放量 > 10000""" }, { "currentDate":"2023-07-31", - "tableName":"内容库产品", + "tableName":"超音数产品", "fieldsList":"""["用户名", "部门", "模块", "访问时长", "访问次数", "访问人数", "数据日期"]""", - "question":"内容库访问时长小于1小时,且来自美术部的用户是哪些", + "question":"超音数访问时长小于1小时,且来自美术部的用户是哪些", "priorSchemaLinks":"""['美术部'->部门]""", - "analysis": """让我们一步一步地思考。在问题“内容库访问时长小于1小时,且来自美术部的用户是哪些“中,我们被问: + "analysis": """让我们一步一步地思考。在问题“超音数访问时长小于1小时,且来自美术部的用户是哪些“中,我们被问: “用户是哪些”,所以我们需要column=[用户名] ”美术部的“,所以我们需要column=[部门], cell values = ['美术部'],所以有[部门:('美术部')] ”访问时长小于1小时“,所以我们需要column=[访问时长], cell values = [1],所以有[访问时长:(1)]""", "schemaLinks":"""["用户名", "部门":("'美术部'"), "访问时长":(1)]""", - "sql":"""SELECT 用户名 FROM 内容库产品 WHERE 部门 = '美术部' AND 访问时长 < 1""" + "sql":"""SELECT 用户名 FROM 超音数产品 WHERE 部门 = '美术部' AND 访问时长 < 1""" }, { "currentDate":"2023-08-31", - "tableName":"内容库产品", + "tableName":"超音数产品", "fieldsList":"""["用户名", "部门", "模块", "访问时长", "访问次数", "访问人数", "数据日期"]""", - "question":"内容库pv最高的用户有哪些", + "question":"超音数pv最高的用户有哪些", "priorSchemaLinks":"""[]""", - "analysis": """让我们一步一步地思考。在问题“内容库pv最高的用户有哪些“中,我们被问: + "analysis": """让我们一步一步地思考。在问题“超音数pv最高的用户有哪些“中,我们被问: “用户有哪些”,所以我们需要column=[用户名] ”pv最高的“,所以我们需要column=[访问次数], cell values = [1],所以有[访问次数:(1)]""", "schemaLinks":"""["用户名", "访问次数":(1)]""", - "sql":"""SELECT 用户名 FROM 内容库产品 ORDER BY 访问次数 DESC LIMIT 1""" + "sql":"""SELECT 用户名 FROM 超音数产品 ORDER BY 访问次数 DESC LIMIT 1""" }, { "currentDate":"2023-08-31", "tableName":"艺人库", @@ -128,15 +128,15 @@ exemplars= [ "sql":"""SELECT SUM(结算播放量) FROM 艺人库 WHERE 歌手名 = '周倩倩' AND datediff('day', 数据日期, '2023-08-31') <= 7 """ }, { "currentDate":"2023-09-14", - "tableName":"内容库产品", + "tableName":"超音数产品", "fieldsList":"""["部门", "模块", "用户名", "访问次数", "访问人数", "访问时长", "数据日期"]""", - "question":"内容库访问次数大于1k的部门是哪些", + "question":"超音数访问次数大于1k的部门是哪些", "priorSchemaLinks":"""[]""", - "analysis": """让我们一步一步地思考。在问题“内容库访问次数大于1k的部门是哪些“中,我们被问: + "analysis": """让我们一步一步地思考。在问题“超音数访问次数大于1k的部门是哪些“中,我们被问: “部门是哪些”,所以我们需要column=[部门] ”访问次数大于1k的“,所以我们需要column=[访问次数], cell values = [1000],所以有[访问次数:(1000)]""", "schemaLinks":"""["部门", "访问次数":(1000)]""", - "sql":"""SELECT 部门 FROM 内容库产品 WHERE 访问次数 > 1000""" + "sql":"""SELECT 部门 FROM 超音数产品 WHERE 访问次数 > 1000""" }, { "currentDate":"2023-09-18", "tableName":"歌曲库", @@ -273,55 +273,55 @@ exemplars= [ }, { "currentDate":"2023-09-04", - "tableName":"内容库产品", + "tableName":"超音数产品", "fieldsList":"""["用户名", "部门", "模块", "访问时长", "访问次数", "访问人数", "数据日期"]""", - "question":"内容库近30天访问次数的平均数", + "question":"超音数近30天访问次数的平均数", "priorSchemaLinks":"""[]""", - "analysis": """让我们一步一步地思考。在问题“内容库近30天访问次数的平均数“中,我们被问: + "analysis": """让我们一步一步地思考。在问题“超音数近30天访问次数的平均数“中,我们被问: “访问次数的平均数”,所以我们需要column=[访问次数] -”内容库近30天“,所以我们需要column=[数据日期], cell values = [30],所以有[数据日期:(30)]""", +”超音数近30天“,所以我们需要column=[数据日期], cell values = [30],所以有[数据日期:(30)]""", "schemaLinks":"""["访问次数", "数据日期":(30)]""", - "sql":"""SELECT AVG(访问次数) FROM 内容库产品 WHERE datediff('day', 数据日期, '2023-09-04') <= 30 """ + "sql":"""SELECT AVG(访问次数) FROM 超音数产品 WHERE datediff('day', 数据日期, '2023-09-04') <= 30 """ }, { "currentDate":"2023-09-04", - "tableName":"内容库产品", + "tableName":"超音数产品", "fieldsList":"""["用户名", "部门", "模块", "访问时长", "访问次数", "访问人数", "数据日期"]""", - "question":"内容库近半年哪个月的访问次数汇总最高", + "question":"超音数近半年哪个月的访问次数汇总最高", "priorSchemaLinks":"""[]""", - "analysis": """让我们一步一步地思考。在问题“内容库近半年哪个月的访问次数汇总最高“中,我们被问: + "analysis": """让我们一步一步地思考。在问题“超音数近半年哪个月的访问次数汇总最高“中,我们被问: “访问次数汇总最高”,所以我们需要column=[访问次数], cell values = [1],所以有[访问次数:(1)] -”内容库近半年“,所以我们需要column=[数据日期], cell values = [0.5],所以有[数据日期:(0.5)]""", +”超音数近半年“,所以我们需要column=[数据日期], cell values = [0.5],所以有[数据日期:(0.5)]""", "schemaLinks":"""["访问次数":(1), "数据日期":(0.5)]""", - "sql":"""SELECT MONTH(数据日期), SUM(访问次数) FROM 内容库产品 WHERE datediff('year', 数据日期, '2023-09-04') <= 0.5 GROUP BY MONTH(数据日期) ORDER BY SUM(访问次数) DESC LIMIT 1""" + "sql":"""SELECT MONTH(数据日期), SUM(访问次数) FROM 超音数产品 WHERE datediff('year', 数据日期, '2023-09-04') <= 0.5 GROUP BY MONTH(数据日期) ORDER BY SUM(访问次数) DESC LIMIT 1""" }, { "currentDate":"2023-09-04", - "tableName":"内容库产品", + "tableName":"超音数产品", "fieldsList":"""["用户名", "部门", "模块", "访问时长", "访问次数", "访问人数", "数据日期"]""", - "question":"内容库近半年每个月的平均访问次数", + "question":"超音数近半年每个月的平均访问次数", "priorSchemaLinks":"""[]""", - "analysis": """让我们一步一步地思考。在问题“内容库近半年每个月的平均访问次数“中,我们被问: + "analysis": """让我们一步一步地思考。在问题“超音数近半年每个月的平均访问次数“中,我们被问: “每个月的平均访问次数”,所以我们需要column=[访问次数] -”内容库近半年“,所以我们需要column=[数据日期], cell values = [0.5],所以有[数据日期:(0.5)]""", +”超音数近半年“,所以我们需要column=[数据日期], cell values = [0.5],所以有[数据日期:(0.5)]""", "schemaLinks":"""["访问次数", "数据日期":(0.5)]""", - "sql":"""SELECT MONTH(数据日期), AVG(访问次数) FROM 内容库产品 WHERE datediff('year', 数据日期, '2023-09-04') <= 0.5 GROUP BY MONTH(数据日期)""" + "sql":"""SELECT MONTH(数据日期), AVG(访问次数) FROM 超音数产品 WHERE datediff('year', 数据日期, '2023-09-04') <= 0.5 GROUP BY MONTH(数据日期)""" }, { "currentDate":"2023-09-10", - "tableName":"内容库产品", + "tableName":"超音数产品", "fieldsList":"""["用户名", "部门", "模块", "访问时长", "访问次数", "访问人数", "数据日期"]""", - "question":"内容库 按部门统计访问次数 top10 的部门", + "question":"超音数 按部门统计访问次数 top10 的部门", "priorSchemaLinks":"""[]""", - "analysis": """让我们一步一步地思考。在问题“内容库 按部门统计访问次数 top10 的部门“中,我们被问: + "analysis": """让我们一步一步地思考。在问题“超音数 按部门统计访问次数 top10 的部门“中,我们被问: “访问次数 top10 的部门”,所以我们需要column=[访问次数], cell values = [10],所以有[访问次数:(10)] -”内容库 按部门统计“,所以我们需要column=[部门]""", +”超音数 按部门统计“,所以我们需要column=[部门]""", "schemaLinks":"""["访问次数":(10), "部门"]""", - "sql":"""SELECT 部门, SUM(访问次数) FROM 内容库产品 GROUP BY 部门 ORDER BY SUM(访问次数) DESC LIMIT 10""" + "sql":"""SELECT 部门, SUM(访问次数) FROM 超音数产品 GROUP BY 部门 ORDER BY SUM(访问次数) DESC LIMIT 10""" }, { "currentDate":"2023-09-10", - "tableName":"内容库产品", + "tableName":"超音数产品", "fieldsList":"""["用户名", "部门", "模块", "访问时长", "访问次数", "访问人数", "数据日期"]""", "question":"超音速 近7个月,月度总访问量超过 2万的月份", "priorSchemaLinks":"""[]""", @@ -329,7 +329,7 @@ exemplars= [ “月度总访问量超过 2万的月份”,所以我们需要column=[访问次数], cell values = [20000],所以有[访问次数:(20000)] ”超音速 近7个月“,所以我们需要column=[数据日期], cell values = [7],所以有[数据日期:(7)]""", "schemaLinks":"""["访问次数":(20000), "数据日期":(7)]""", - "sql":"""SELECT MONTH(数据日期) FROM 内容库产品 WHERE datediff('month', 数据日期, '2023-09-10') <= 7 GROUP BY MONTH(数据日期) HAVING SUM(访问次数) > 20000""" + "sql":"""SELECT MONTH(数据日期) FROM 超音数产品 WHERE datediff('month', 数据日期, '2023-09-10') <= 7 GROUP BY MONTH(数据日期) HAVING SUM(访问次数) > 20000""" }, { "currentDate":"2023-09-10", diff --git a/headless/python/few_shot_example/s2sql_exemplar3_transformed.json b/headless/python/few_shot_example/s2sql_exemplar3_transformed.json index 708ac0b50..38f836367 100644 --- a/headless/python/few_shot_example/s2sql_exemplar3_transformed.json +++ b/headless/python/few_shot_example/s2sql_exemplar3_transformed.json @@ -1,29 +1,29 @@ [ { - "question": "比较jackjchen和robinlee在内容库的访问次数", - "questionAugmented": "比较jackjchen和robinlee在内容库的访问次数 (补充信息:’'jackjchen'‘是一个’用户名‘,’ 'robinlee'‘是一个’用户名‘。当前的日期是2020-12-01) (备注: )", - "modelName": "内容库产品", - "dbSchema": "Table: 内容库产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]", - "sql": "SELECT 用户名, 访问次数 FROM 内容库产品 WHERE 用户名 IN ('jackjchen', 'robinlee')", - "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"比较jackjchen和robinlee在内容库的访问次数 (补充信息:’'jackjchen'‘是一个’用户名‘,’ 'robinlee'‘是一个’用户名‘。当前的日期是2020-12-01) (备注: )\", we are asked:\n\"的访问次数 \" so we need column = [访问次数]\n\"’用户名‘,\" so we need column = [用户名]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [jackjchen,robinlee]. So the Schema_links are:\nSchema_links: [访问次数,用户名,jackjchen,robinlee]", + "question": "比较jackjchen和robinlee在超音数的访问次数", + "questionAugmented": "比较jackjchen和robinlee在超音数的访问次数 (补充信息:’'jackjchen'‘是一个’用户名‘,’ 'robinlee'‘是一个’用户名‘。当前的日期是2020-12-01) (备注: )", + "modelName": "超音数产品", + "dbSchema": "Table: 超音数产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]", + "sql": "SELECT 用户名, 访问次数 FROM 超音数产品 WHERE 用户名 IN ('jackjchen', 'robinlee')", + "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"比较jackjchen和robinlee在超音数的访问次数 (补充信息:’'jackjchen'‘是一个’用户名‘,’ 'robinlee'‘是一个’用户名‘。当前的日期是2020-12-01) (备注: )\", we are asked:\n\"的访问次数 \" so we need column = [访问次数]\n\"’用户名‘,\" so we need column = [用户名]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [jackjchen,robinlee]. So the Schema_links are:\nSchema_links: [访问次数,用户名,jackjchen,robinlee]", "generatedSchemaLinkings": "[访问次数,用户名,jackjchen,robinlee]" }, { - "question": "内容库近12个月访问人数 按部门", - "questionAugmented": "内容库近12个月访问人数 按部门 (补充信息:。当前的日期是2022-11-06) (备注: )", - "modelName": "内容库产品", - "dbSchema": "Table: 内容库产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]", - "sql": "SELECT 部门, 数据日期, 访问人数 FROM 内容库产品 WHERE datediff('month', 数据日期, '2022-11-06') <= 12 ", - "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库近12个月访问人数 按部门 (补充信息:。当前的日期是2022-11-06) (备注: )\", we are asked:\n\"当前的日期是\" so we need column = [数据日期]\n\" 按部门 (\" so we need column = [部门]\n\"访问人数 按\" so we need column = [访问人数]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [12,month]. So the Schema_links are:\nSchema_links: [数据日期,部门,访问人数,12,month]", + "question": "超音数近12个月访问人数 按部门", + "questionAugmented": "超音数近12个月访问人数 按部门 (补充信息:。当前的日期是2022-11-06) (备注: )", + "modelName": "超音数产品", + "dbSchema": "Table: 超音数产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]", + "sql": "SELECT 部门, 数据日期, 访问人数 FROM 超音数产品 WHERE datediff('month', 数据日期, '2022-11-06') <= 12 ", + "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"超音数近12个月访问人数 按部门 (补充信息:。当前的日期是2022-11-06) (备注: )\", we are asked:\n\"当前的日期是\" so we need column = [数据日期]\n\" 按部门 (\" so we need column = [部门]\n\"访问人数 按\" so we need column = [访问人数]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [12,month]. So the Schema_links are:\nSchema_links: [数据日期,部门,访问人数,12,month]", "generatedSchemaLinkings": "[数据日期,部门,访问人数,12,month]" }, { - "question": "内容库美术部、技术研发部的访问时长", - "questionAugmented": "内容库美术部、技术研发部的访问时长 (补充信息:’'美术部'‘是一个’部门‘,’ '技术研发部'‘是一个’部门‘。当前的日期是2023-04-21) (备注: )", - "modelName": "内容库产品", - "dbSchema": "Table: 内容库产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]", - "sql": "SELECT 部门, 访问时长 FROM 内容库产品 WHERE 部门 IN ('美术部', '技术研发部')", - "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库美术部、技术研发部的访问时长 (补充信息:’'美术部'‘是一个’部门‘,’ '技术研发部'‘是一个’部门‘。当前的日期是2023-04-21) (备注: )\", we are asked:\n\"部门‘,’ \" so we need column = [部门]\n\"的访问时长 \" so we need column = [访问时长]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [美术部,技术研发部]. So the Schema_links are:\nSchema_links: [部门,访问时长,美术部,技术研发部]", + "question": "超音数美术部、技术研发部的访问时长", + "questionAugmented": "超音数美术部、技术研发部的访问时长 (补充信息:’'美术部'‘是一个’部门‘,’ '技术研发部'‘是一个’部门‘。当前的日期是2023-04-21) (备注: )", + "modelName": "超音数产品", + "dbSchema": "Table: 超音数产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]", + "sql": "SELECT 部门, 访问时长 FROM 超音数产品 WHERE 部门 IN ('美术部', '技术研发部')", + "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"超音数美术部、技术研发部的访问时长 (补充信息:’'美术部'‘是一个’部门‘,’ '技术研发部'‘是一个’部门‘。当前的日期是2023-04-21) (备注: )\", we are asked:\n\"部门‘,’ \" so we need column = [部门]\n\"的访问时长 \" so we need column = [访问时长]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [美术部,技术研发部]. So the Schema_links are:\nSchema_links: [部门,访问时长,美术部,技术研发部]", "generatedSchemaLinkings": "[部门,访问时长,美术部,技术研发部]" }, { @@ -63,21 +63,21 @@ "generatedSchemaLinkings": "[歌曲名,结算播放量,10000]" }, { - "question": "内容库访问时长小于1小时,且来自美术部的用户是哪些", - "questionAugmented": "内容库访问时长小于1小时,且来自美术部的用户是哪些 (补充信息:’'美术部'‘是一个’部门‘。当前的日期是2023-07-31) (备注: )", - "modelName": "内容库产品", - "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", - "sql": "SELECT 用户名 FROM 内容库产品 WHERE 部门 = '美术部' AND 访问时长 < 1", - "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库访问时长小于1小时,且来自美术部的用户是哪些 (补充信息:’'美术部'‘是一个’部门‘。当前的日期是2023-07-31) (备注: )\", we are asked:\n\"一个’部门‘\" so we need column = [部门]\n\"访问时长小于\" so we need column = [访问时长]\n\"术部的用户是\" so we need column = [用户名]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [美术部,1]. So the Schema_links are:\nSchema_links: [部门,访问时长,用户名,美术部,1]", + "question": "超音数访问时长小于1小时,且来自美术部的用户是哪些", + "questionAugmented": "超音数访问时长小于1小时,且来自美术部的用户是哪些 (补充信息:’'美术部'‘是一个’部门‘。当前的日期是2023-07-31) (备注: )", + "modelName": "超音数产品", + "dbSchema": "Table: 超音数产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", + "sql": "SELECT 用户名 FROM 超音数产品 WHERE 部门 = '美术部' AND 访问时长 < 1", + "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"超音数访问时长小于1小时,且来自美术部的用户是哪些 (补充信息:’'美术部'‘是一个’部门‘。当前的日期是2023-07-31) (备注: )\", we are asked:\n\"一个’部门‘\" so we need column = [部门]\n\"访问时长小于\" so we need column = [访问时长]\n\"术部的用户是\" so we need column = [用户名]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [美术部,1]. So the Schema_links are:\nSchema_links: [部门,访问时长,用户名,美术部,1]", "generatedSchemaLinkings": "[部门,访问时长,用户名,美术部,1]" }, { - "question": "内容库pv最高的用户有哪些", - "questionAugmented": "内容库pv最高的用户有哪些 (补充信息:。当前的日期是2023-08-31) (备注: )", - "modelName": "内容库产品", - "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", - "sql": "SELECT 用户名 FROM 内容库产品 ORDER BY 访问次数 DESC LIMIT 1", - "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库pv最高的用户有哪些 (补充信息:。当前的日期是2023-08-31) (备注: )\", we are asked:\n\"最高的用户有\" so we need column = [访问次数]\n\"用户有哪些 (\" so we need column = [用户名]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1]. So the Schema_links are:\nSchema_links: [访问次数,用户名,1]", + "question": "超音数pv最高的用户有哪些", + "questionAugmented": "超音数pv最高的用户有哪些 (补充信息:。当前的日期是2023-08-31) (备注: )", + "modelName": "超音数产品", + "dbSchema": "Table: 超音数产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", + "sql": "SELECT 用户名 FROM 超音数产品 ORDER BY 访问次数 DESC LIMIT 1", + "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"超音数pv最高的用户有哪些 (补充信息:。当前的日期是2023-08-31) (备注: )\", we are asked:\n\"最高的用户有\" so we need column = [访问次数]\n\"用户有哪些 (\" so we need column = [用户名]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1]. So the Schema_links are:\nSchema_links: [访问次数,用户名,1]", "generatedSchemaLinkings": "[访问次数,用户名,1]" }, { @@ -99,12 +99,12 @@ "generatedSchemaLinkings": "[歌手名,数据日期,结算播放量,周倩倩,7,day]" }, { - "question": "内容库访问次数大于1k的部门是哪些", - "questionAugmented": "内容库访问次数大于1k的部门是哪些 (补充信息:。当前的日期是2023-09-14) (备注: )", - "modelName": "内容库产品", - "dbSchema": "Table: 内容库产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]", - "sql": "SELECT 部门 FROM 内容库产品 WHERE 访问次数 > 1000", - "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库访问次数大于1k的部门是哪些 (补充信息:。当前的日期是2023-09-14) (备注: )\", we are asked:\n\"部门是哪些 \" so we need column = [部门]\n\"访问次数大于\" so we need column = [访问次数]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1000]. So the Schema_links are:\nSchema_links: [部门,访问次数,1000]", + "question": "超音数访问次数大于1k的部门是哪些", + "questionAugmented": "超音数访问次数大于1k的部门是哪些 (补充信息:。当前的日期是2023-09-14) (备注: )", + "modelName": "超音数产品", + "dbSchema": "Table: 超音数产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]", + "sql": "SELECT 部门 FROM 超音数产品 WHERE 访问次数 > 1000", + "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"超音数访问次数大于1k的部门是哪些 (补充信息:。当前的日期是2023-09-14) (备注: )\", we are asked:\n\"部门是哪些 \" so we need column = [部门]\n\"访问次数大于\" so we need column = [访问次数]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1000]. So the Schema_links are:\nSchema_links: [部门,访问次数,1000]", "generatedSchemaLinkings": "[部门,访问次数,1000]" }, { @@ -207,47 +207,47 @@ "generatedSchemaLinkings": "[歌曲名,结算播放量,歌手名,发布时间,200000,刘锝桦,1992-04-02,2020-05-02]" }, { - "question": "内容库近30天访问次数的平均数", - "questionAugmented": "内容库近30天访问次数的平均数 (补充信息:。当前的日期是2023-09-04) (备注: )", - "modelName": "内容库产品", - "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", - "sql": "SELECT AVG(访问次数) FROM 内容库产品 WHERE datediff('day', 数据日期, '2023-09-04') <= 30 ", - "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库近30天访问次数的平均数 (补充信息:。当前的日期是2023-09-04) (备注: )\", we are asked:\n\"当前的日期是\" so we need column = [数据日期]\n\"访问次数的平均数\" so we need column = [访问次数]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [30,day]. So the Schema_links are:\nSchema_links: [数据日期,访问次数,30,day]", + "question": "超音数近30天访问次数的平均数", + "questionAugmented": "超音数近30天访问次数的平均数 (补充信息:。当前的日期是2023-09-04) (备注: )", + "modelName": "超音数产品", + "dbSchema": "Table: 超音数产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", + "sql": "SELECT AVG(访问次数) FROM 超音数产品 WHERE datediff('day', 数据日期, '2023-09-04') <= 30 ", + "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"超音数近30天访问次数的平均数 (补充信息:。当前的日期是2023-09-04) (备注: )\", we are asked:\n\"当前的日期是\" so we need column = [数据日期]\n\"访问次数的平均数\" so we need column = [访问次数]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [30,day]. So the Schema_links are:\nSchema_links: [数据日期,访问次数,30,day]", "generatedSchemaLinkings": "[数据日期,访问次数,30,day]" }, { - "question": "内容库近半年哪个月的访问次数汇总最高", - "questionAugmented": "内容库近半年哪个月的访问次数汇总最高 (补充信息:。当前的日期是2023-09-04) (备注: )", - "modelName": "内容库产品", - "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", - "sql": "SELECT MONTH(数据日期), SUM(访问次数) FROM 内容库产品 WHERE datediff('year', 数据日期, '2023-09-04') <= 0.5 GROUP BY MONTH(数据日期) ORDER BY SUM(访问次数) DESC LIMIT 1", - "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库近半年哪个月的访问次数汇总最高 (补充信息:。当前的日期是2023-09-04) (备注: )\", we are asked:\n\"当前的日期是\" so we need column = [数据日期]\n\"的访问次数汇总\" so we need column = [访问次数]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1,0.5,year]. So the Schema_links are:\nSchema_links: [数据日期,访问次数,1,0.5,year]", + "question": "超音数近半年哪个月的访问次数汇总最高", + "questionAugmented": "超音数近半年哪个月的访问次数汇总最高 (补充信息:。当前的日期是2023-09-04) (备注: )", + "modelName": "超音数产品", + "dbSchema": "Table: 超音数产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", + "sql": "SELECT MONTH(数据日期), SUM(访问次数) FROM 超音数产品 WHERE datediff('year', 数据日期, '2023-09-04') <= 0.5 GROUP BY MONTH(数据日期) ORDER BY SUM(访问次数) DESC LIMIT 1", + "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"超音数近半年哪个月的访问次数汇总最高 (补充信息:。当前的日期是2023-09-04) (备注: )\", we are asked:\n\"当前的日期是\" so we need column = [数据日期]\n\"的访问次数汇总\" so we need column = [访问次数]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1,0.5,year]. So the Schema_links are:\nSchema_links: [数据日期,访问次数,1,0.5,year]", "generatedSchemaLinkings": "[数据日期,访问次数,1,0.5,year]" }, { - "question": "内容库近半年每个月的平均访问次数", - "questionAugmented": "内容库近半年每个月的平均访问次数 (补充信息:。当前的日期是2023-09-04) (备注: )", - "modelName": "内容库产品", - "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", - "sql": "SELECT MONTH(数据日期), AVG(访问次数) FROM 内容库产品 WHERE datediff('year', 数据日期, '2023-09-04') <= 0.5 GROUP BY MONTH(数据日期)", - "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库近半年每个月的平均访问次数 (补充信息:。当前的日期是2023-09-04) (备注: )\", we are asked:\n\"当前的日期是\" so we need column = [数据日期]\n\"访问次数 (\" so we need column = [访问次数]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [0.5,year]. So the Schema_links are:\nSchema_links: [数据日期,访问次数,0.5,year]", + "question": "超音数近半年每个月的平均访问次数", + "questionAugmented": "超音数近半年每个月的平均访问次数 (补充信息:。当前的日期是2023-09-04) (备注: )", + "modelName": "超音数产品", + "dbSchema": "Table: 超音数产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", + "sql": "SELECT MONTH(数据日期), AVG(访问次数) FROM 超音数产品 WHERE datediff('year', 数据日期, '2023-09-04') <= 0.5 GROUP BY MONTH(数据日期)", + "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"超音数近半年每个月的平均访问次数 (补充信息:。当前的日期是2023-09-04) (备注: )\", we are asked:\n\"当前的日期是\" so we need column = [数据日期]\n\"访问次数 (\" so we need column = [访问次数]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [0.5,year]. So the Schema_links are:\nSchema_links: [数据日期,访问次数,0.5,year]", "generatedSchemaLinkings": "[数据日期,访问次数,0.5,year]" }, { - "question": "内容库 按部门统计访问次数 top10 的部门", - "questionAugmented": "内容库 按部门统计访问次数 top10 的部门 (补充信息:。当前的日期是2023-09-10) (备注: )", - "modelName": "内容库产品", - "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", - "sql": "SELECT 部门, SUM(访问次数) FROM 内容库产品 GROUP BY 部门 ORDER BY SUM(访问次数) DESC LIMIT 10", - "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库 按部门统计访问次数 top10 的部门 (补充信息:。当前的日期是2023-09-10) (备注: )\", we are asked:\n\" 的部门 (\" so we need column = [部门]\n\"计访问次数 \" so we need column = [访问次数]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [10]. So the Schema_links are:\nSchema_links: [部门,访问次数,10]", + "question": "超音数 按部门统计访问次数 top10 的部门", + "questionAugmented": "超音数 按部门统计访问次数 top10 的部门 (补充信息:。当前的日期是2023-09-10) (备注: )", + "modelName": "超音数产品", + "dbSchema": "Table: 超音数产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", + "sql": "SELECT 部门, SUM(访问次数) FROM 超音数产品 GROUP BY 部门 ORDER BY SUM(访问次数) DESC LIMIT 10", + "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"超音数 按部门统计访问次数 top10 的部门 (补充信息:。当前的日期是2023-09-10) (备注: )\", we are asked:\n\" 的部门 (\" so we need column = [部门]\n\"计访问次数 \" so we need column = [访问次数]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [10]. So the Schema_links are:\nSchema_links: [部门,访问次数,10]", "generatedSchemaLinkings": "[部门,访问次数,10]" }, { "question": "超音速 近7个月,月度总访问量超过 2万的月份", "questionAugmented": "超音速 近7个月,月度总访问量超过 2万的月份 (补充信息:。当前的日期是2023-09-10) (备注: )", - "modelName": "内容库产品", - "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", - "sql": "SELECT MONTH(数据日期) FROM 内容库产品 WHERE datediff('month', 数据日期, '2023-09-10') <= 7 GROUP BY MONTH(数据日期) HAVING SUM(访问次数) > 20000", + "modelName": "超音数产品", + "dbSchema": "Table: 超音数产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", + "sql": "SELECT MONTH(数据日期) FROM 超音数产品 WHERE datediff('month', 数据日期, '2023-09-10') <= 7 GROUP BY MONTH(数据日期) HAVING SUM(访问次数) > 20000", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"超音速 近7个月,月度总访问量超过 2万的月份 (补充信息:。当前的日期是2023-09-10) (备注: )\", we are asked:\n\"当前的日期是\" so we need column = [数据日期]\n\"访问量超过 \" so we need column = [访问次数]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [7,20000,month]. So the Schema_links are:\nSchema_links: [数据日期,访问次数,7,20000,month]", "generatedSchemaLinkings": "[数据日期,访问次数,7,20000,month]" }, diff --git a/launchers/standalone/src/main/resources/s2ql_exemplar.json b/launchers/standalone/src/main/resources/s2ql_exemplar.json index 24880bdf6..2dfca5a1f 100644 --- a/launchers/standalone/src/main/resources/s2ql_exemplar.json +++ b/launchers/standalone/src/main/resources/s2ql_exemplar.json @@ -1,26 +1,26 @@ [ { - "question": "比较jackjchen和robinlee在内容库的访问次数", - "questionAugmented": "比较jackjchen和robinlee在内容库的访问次数 (补充信息:’'jackjchen'‘是一个’用户名‘,’ 'robinlee'‘是一个’用户名‘。当前的日期是2020-12-01) ", - "dbSchema": "Table: 内容库产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]", - "sql": "SELECT 用户名, 访问次数 FROM 内容库产品 WHERE 用户名 IN ('jackjchen', 'robinlee')", - "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"比较jackjchen和robinlee在内容库的访问次数 (补充信息:’'jackjchen'‘是一个’用户名‘,’ 'robinlee'‘是一个’用户名‘。当前的日期是2020-12-01) \", we are asked:\n\"’用户名‘,\" so we need column = [用户名]\n\"的访问次数 \" so we need column = [访问次数]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [jackjchen,robinlee]. So the Schema_links are:\nSchema_links: [用户名,访问次数,jackjchen,robinlee]", + "question": "比较jackjchen和robinlee在超音数的访问次数", + "questionAugmented": "比较jackjchen和robinlee在超音数的访问次数 (补充信息:’'jackjchen'‘是一个’用户名‘,’ 'robinlee'‘是一个’用户名‘。当前的日期是2020-12-01) ", + "dbSchema": "Table: 超音数产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]", + "sql": "SELECT 用户名, 访问次数 FROM 超音数产品 WHERE 用户名 IN ('jackjchen', 'robinlee')", + "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"比较jackjchen和robinlee在超音数的访问次数 (补充信息:’'jackjchen'‘是一个’用户名‘,’ 'robinlee'‘是一个’用户名‘。当前的日期是2020-12-01) \", we are asked:\n\"’用户名‘,\" so we need column = [用户名]\n\"的访问次数 \" so we need column = [访问次数]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [jackjchen,robinlee]. So the Schema_links are:\nSchema_links: [用户名,访问次数,jackjchen,robinlee]", "generatedSchemaLinkings": "[用户名,访问次数,jackjchen,robinlee]" }, { - "question": "内容库近12个月访问人数 按部门", - "questionAugmented": "内容库近12个月访问人数 按部门 (补充信息:。当前的日期是2022-11-06) ", - "dbSchema": "Table: 内容库产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]", - "sql": "SELECT 部门, 数据日期, 访问人数 FROM 内容库产品 WHERE datediff('month', 数据日期, '2022-11-06') <= 12 ", - "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库近12个月访问人数 按部门 (补充信息:。当前的日期是2022-11-06) \", we are asked:\n\" 按部门 (\" so we need column = [部门]\n\"访问人数 按\" so we need column = [访问人数]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [12,month]. So the Schema_links are:\nSchema_links: [部门,访问人数,数据日期,12,month]", + "question": "超音数近12个月访问人数 按部门", + "questionAugmented": "超音数近12个月访问人数 按部门 (补充信息:。当前的日期是2022-11-06) ", + "dbSchema": "Table: 超音数产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]", + "sql": "SELECT 部门, 数据日期, 访问人数 FROM 超音数产品 WHERE datediff('month', 数据日期, '2022-11-06') <= 12 ", + "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"超音数近12个月访问人数 按部门 (补充信息:。当前的日期是2022-11-06) \", we are asked:\n\" 按部门 (\" so we need column = [部门]\n\"访问人数 按\" so we need column = [访问人数]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [12,month]. So the Schema_links are:\nSchema_links: [部门,访问人数,数据日期,12,month]", "generatedSchemaLinkings": "[部门,访问人数,数据日期,12,month]" }, { - "question": "内容库美术部、技术研发部的访问时长", - "questionAugmented": "内容库美术部、技术研发部的访问时长 (补充信息:’'美术部'‘是一个’部门‘,’ '技术研发部'‘是一个’部门‘。当前的日期是2023-04-21) ", - "dbSchema": "Table: 内容库产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]", - "sql": "SELECT 部门, 访问时长 FROM 内容库产品 WHERE 部门 IN ('美术部', '技术研发部')", - "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库美术部、技术研发部的访问时长 (补充信息:’'美术部'‘是一个’部门‘,’ '技术研发部'‘是一个’部门‘。当前的日期是2023-04-21) \", we are asked:\n\"部门‘,’ \" so we need column = [部门]\n\"的访问时长 \" so we need column = [访问时长]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [美术部,技术研发部]. So the Schema_links are:\nSchema_links: [部门,访问时长,美术部,技术研发部]", + "question": "超音数美术部、技术研发部的访问时长", + "questionAugmented": "超音数美术部、技术研发部的访问时长 (补充信息:’'美术部'‘是一个’部门‘,’ '技术研发部'‘是一个’部门‘。当前的日期是2023-04-21) ", + "dbSchema": "Table: 超音数产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]", + "sql": "SELECT 部门, 访问时长 FROM 超音数产品 WHERE 部门 IN ('美术部', '技术研发部')", + "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"超音数美术部、技术研发部的访问时长 (补充信息:’'美术部'‘是一个’部门‘,’ '技术研发部'‘是一个’部门‘。当前的日期是2023-04-21) \", we are asked:\n\"部门‘,’ \" so we need column = [部门]\n\"的访问时长 \" so we need column = [访问时长]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [美术部,技术研发部]. So the Schema_links are:\nSchema_links: [部门,访问时长,美术部,技术研发部]", "generatedSchemaLinkings": "[部门,访问时长,美术部,技术研发部]" }, { @@ -56,19 +56,19 @@ "generatedSchemaLinkings": "[歌曲名,结算播放量,10000]" }, { - "question": "内容库访问时长小于1小时,且来自美术部的用户是哪些", - "questionAugmented": "内容库访问时长小于1小时,且来自美术部的用户是哪些 (补充信息:’'美术部'‘是一个’部门‘。当前的日期是2023-07-31) ", - "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", - "sql": "SELECT 用户名 FROM 内容库产品 WHERE 部门 = '美术部' AND 访问时长 < 1", - "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库访问时长小于1小时,且来自美术部的用户是哪些 (补充信息:’'美术部'‘是一个’部门‘。当前的日期是2023-07-31) \", we are asked:\n\"术部的用户是\" so we need column = [用户名]\n\"一个’部门‘\" so we need column = [部门]\n\"访问时长小于\" so we need column = [访问时长]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [美术部,1]. So the Schema_links are:\nSchema_links: [用户名,部门,访问时长,美术部,1]", + "question": "超音数访问时长小于1小时,且来自美术部的用户是哪些", + "questionAugmented": "超音数访问时长小于1小时,且来自美术部的用户是哪些 (补充信息:’'美术部'‘是一个’部门‘。当前的日期是2023-07-31) ", + "dbSchema": "Table: 超音数产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", + "sql": "SELECT 用户名 FROM 超音数产品 WHERE 部门 = '美术部' AND 访问时长 < 1", + "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"超音数访问时长小于1小时,且来自美术部的用户是哪些 (补充信息:’'美术部'‘是一个’部门‘。当前的日期是2023-07-31) \", we are asked:\n\"术部的用户是\" so we need column = [用户名]\n\"一个’部门‘\" so we need column = [部门]\n\"访问时长小于\" so we need column = [访问时长]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [美术部,1]. So the Schema_links are:\nSchema_links: [用户名,部门,访问时长,美术部,1]", "generatedSchemaLinkings": "[用户名,部门,访问时长,美术部,1]" }, { - "question": "内容库pv最高的用户有哪些", - "questionAugmented": "内容库pv最高的用户有哪些 (补充信息:。当前的日期是2023-08-31) ", - "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", - "sql": "SELECT 用户名 FROM 内容库产品 ORDER BY 访问次数 DESC LIMIT 1", - "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库pv最高的用户有哪些 (补充信息:。当前的日期是2023-08-31) \", we are asked:\n\"用户有哪些 (\" so we need column = [用户名]\n\"最高的用户有\" so we need column = [访问次数]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1]. So the Schema_links are:\nSchema_links: [用户名,访问次数,1]", + "question": "超音数pv最高的用户有哪些", + "questionAugmented": "超音数pv最高的用户有哪些 (补充信息:。当前的日期是2023-08-31) ", + "dbSchema": "Table: 超音数产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", + "sql": "SELECT 用户名 FROM 超音数产品 ORDER BY 访问次数 DESC LIMIT 1", + "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"超音数pv最高的用户有哪些 (补充信息:。当前的日期是2023-08-31) \", we are asked:\n\"用户有哪些 (\" so we need column = [用户名]\n\"最高的用户有\" so we need column = [访问次数]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1]. So the Schema_links are:\nSchema_links: [用户名,访问次数,1]", "generatedSchemaLinkings": "[用户名,访问次数,1]" }, { @@ -88,11 +88,11 @@ "generatedSchemaLinkings": "[结算播放量,歌手名,数据日期,周倩倩,7,day]" }, { - "question": "内容库访问次数大于1k的部门是哪些", - "questionAugmented": "内容库访问次数大于1k的部门是哪些 (补充信息:。当前的日期是2023-09-14) ", - "dbSchema": "Table: 内容库产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]", - "sql": "SELECT 部门 FROM 内容库产品 WHERE 访问次数 > 1000", - "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库访问次数大于1k的部门是哪些 (补充信息:。当前的日期是2023-09-14) \", we are asked:\n\"访问次数大于\" so we need column = [访问次数]\n\"部门是哪些 \" so we need column = [部门]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1000]. So the Schema_links are:\nSchema_links: [访问次数,部门,1000]", + "question": "超音数访问次数大于1k的部门是哪些", + "questionAugmented": "超音数访问次数大于1k的部门是哪些 (补充信息:。当前的日期是2023-09-14) ", + "dbSchema": "Table: 超音数产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]", + "sql": "SELECT 部门 FROM 超音数产品 WHERE 访问次数 > 1000", + "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"超音数访问次数大于1k的部门是哪些 (补充信息:。当前的日期是2023-09-14) \", we are asked:\n\"访问次数大于\" so we need column = [访问次数]\n\"部门是哪些 \" so we need column = [部门]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1000]. So the Schema_links are:\nSchema_links: [访问次数,部门,1000]", "generatedSchemaLinkings": "[访问次数,部门,1000]" }, { @@ -184,42 +184,42 @@ "generatedSchemaLinkings": "[歌曲名,结算播放量,歌手名,发布时间,200000,刘锝桦,1992-04-02,2020-05-02]" }, { - "question": "内容库近30天访问次数的平均数", - "questionAugmented": "内容库近30天访问次数的平均数 (补充信息:。当前的日期是2023-09-04) ", - "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", - "sql": "SELECT AVG(访问次数) FROM 内容库产品 WHERE datediff('day', 数据日期, '2023-09-04') <= 30 ", - "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库近30天访问次数的平均数 (补充信息:。当前的日期是2023-09-04) \", we are asked:\n\"访问次数的平均数\" so we need column = [访问次数]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [30,day]. So the Schema_links are:\nSchema_links: [访问次数,数据日期,30,day]", + "question": "超音数近30天访问次数的平均数", + "questionAugmented": "超音数近30天访问次数的平均数 (补充信息:。当前的日期是2023-09-04) ", + "dbSchema": "Table: 超音数产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", + "sql": "SELECT AVG(访问次数) FROM 超音数产品 WHERE datediff('day', 数据日期, '2023-09-04') <= 30 ", + "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"超音数近30天访问次数的平均数 (补充信息:。当前的日期是2023-09-04) \", we are asked:\n\"访问次数的平均数\" so we need column = [访问次数]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [30,day]. So the Schema_links are:\nSchema_links: [访问次数,数据日期,30,day]", "generatedSchemaLinkings": "[访问次数,数据日期,30,day]" }, { - "question": "内容库近半年哪个月的访问次数汇总最高", - "questionAugmented": "内容库近半年哪个月的访问次数汇总最高 (补充信息:。当前的日期是2023-09-04) ", - "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", - "sql": "SELECT MONTH(数据日期), SUM(访问次数) FROM 内容库产品 WHERE datediff('year', 数据日期, '2023-09-04') <= 0.5 GROUP BY MONTH(数据日期) ORDER BY SUM(访问次数) DESC LIMIT 1", - "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库近半年哪个月的访问次数汇总最高 (补充信息:。当前的日期是2023-09-04) \", we are asked:\n\"的访问次数汇总\" so we need column = [访问次数]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1,0.5,year]. So the Schema_links are:\nSchema_links: [访问次数,数据日期,1,0.5,year]", + "question": "超音数近半年哪个月的访问次数汇总最高", + "questionAugmented": "超音数近半年哪个月的访问次数汇总最高 (补充信息:。当前的日期是2023-09-04) ", + "dbSchema": "Table: 超音数产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", + "sql": "SELECT MONTH(数据日期), SUM(访问次数) FROM 超音数产品 WHERE datediff('year', 数据日期, '2023-09-04') <= 0.5 GROUP BY MONTH(数据日期) ORDER BY SUM(访问次数) DESC LIMIT 1", + "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"超音数近半年哪个月的访问次数汇总最高 (补充信息:。当前的日期是2023-09-04) \", we are asked:\n\"的访问次数汇总\" so we need column = [访问次数]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1,0.5,year]. So the Schema_links are:\nSchema_links: [访问次数,数据日期,1,0.5,year]", "generatedSchemaLinkings": "[访问次数,数据日期,1,0.5,year]" }, { - "question": "内容库近半年每个月的平均访问次数", - "questionAugmented": "内容库近半年每个月的平均访问次数 (补充信息:。当前的日期是2023-09-04) ", - "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", - "sql": "SELECT MONTH(数据日期), AVG(访问次数) FROM 内容库产品 WHERE datediff('year', 数据日期, '2023-09-04') <= 0.5 GROUP BY MONTH(数据日期)", - "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库近半年每个月的平均访问次数 (补充信息:。当前的日期是2023-09-04) \", we are asked:\n\"访问次数 (\" so we need column = [访问次数]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [0.5,year]. So the Schema_links are:\nSchema_links: [访问次数,数据日期,0.5,year]", + "question": "超音数近半年每个月的平均访问次数", + "questionAugmented": "超音数近半年每个月的平均访问次数 (补充信息:。当前的日期是2023-09-04) ", + "dbSchema": "Table: 超音数产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", + "sql": "SELECT MONTH(数据日期), AVG(访问次数) FROM 超音数产品 WHERE datediff('year', 数据日期, '2023-09-04') <= 0.5 GROUP BY MONTH(数据日期)", + "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"超音数近半年每个月的平均访问次数 (补充信息:。当前的日期是2023-09-04) \", we are asked:\n\"访问次数 (\" so we need column = [访问次数]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [0.5,year]. So the Schema_links are:\nSchema_links: [访问次数,数据日期,0.5,year]", "generatedSchemaLinkings": "[访问次数,数据日期,0.5,year]" }, { - "question": "内容库 按部门统计访问次数 top10 的部门", - "questionAugmented": "内容库 按部门统计访问次数 top10 的部门 (补充信息:。当前的日期是2023-09-10) ", - "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", - "sql": "SELECT 部门, SUM(访问次数) FROM 内容库产品 GROUP BY 部门 ORDER BY SUM(访问次数) DESC LIMIT 10", - "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库 按部门统计访问次数 top10 的部门 (补充信息:。当前的日期是2023-09-10) \", we are asked:\n\"计访问次数 \" so we need column = [访问次数]\n\" 的部门 (\" so we need column = [部门]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [10]. So the Schema_links are:\nSchema_links: [访问次数,部门,10]", + "question": "超音数 按部门统计访问次数 top10 的部门", + "questionAugmented": "超音数 按部门统计访问次数 top10 的部门 (补充信息:。当前的日期是2023-09-10) ", + "dbSchema": "Table: 超音数产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", + "sql": "SELECT 部门, SUM(访问次数) FROM 超音数产品 GROUP BY 部门 ORDER BY SUM(访问次数) DESC LIMIT 10", + "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"超音数 按部门统计访问次数 top10 的部门 (补充信息:。当前的日期是2023-09-10) \", we are asked:\n\"计访问次数 \" so we need column = [访问次数]\n\" 的部门 (\" so we need column = [部门]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [10]. So the Schema_links are:\nSchema_links: [访问次数,部门,10]", "generatedSchemaLinkings": "[访问次数,部门,10]" }, { "question": "超音速 近7个月,月度总访问量超过 2万的月份", "questionAugmented": "超音速 近7个月,月度总访问量超过 2万的月份 (补充信息:。当前的日期是2023-09-10) ", - "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", - "sql": "SELECT MONTH(数据日期) FROM 内容库产品 WHERE datediff('month', 数据日期, '2023-09-10') <= 7 GROUP BY MONTH(数据日期) HAVING SUM(访问次数) > 20000", + "dbSchema": "Table: 超音数产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", + "sql": "SELECT MONTH(数据日期) FROM 超音数产品 WHERE datediff('month', 数据日期, '2023-09-10') <= 7 GROUP BY MONTH(数据日期) HAVING SUM(访问次数) > 20000", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"超音速 近7个月,月度总访问量超过 2万的月份 (补充信息:。当前的日期是2023-09-10) \", we are asked:\n\"访问量超过 \" so we need column = [访问次数]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [7,20000,month]. So the Schema_links are:\nSchema_links: [访问次数,数据日期,7,20000,month]", "generatedSchemaLinkings": "[访问次数,数据日期,7,20000,month]" }, diff --git a/launchers/standalone/src/test/resources/s2ql_exemplar.json b/launchers/standalone/src/test/resources/s2ql_exemplar.json index 24880bdf6..2dfca5a1f 100644 --- a/launchers/standalone/src/test/resources/s2ql_exemplar.json +++ b/launchers/standalone/src/test/resources/s2ql_exemplar.json @@ -1,26 +1,26 @@ [ { - "question": "比较jackjchen和robinlee在内容库的访问次数", - "questionAugmented": "比较jackjchen和robinlee在内容库的访问次数 (补充信息:’'jackjchen'‘是一个’用户名‘,’ 'robinlee'‘是一个’用户名‘。当前的日期是2020-12-01) ", - "dbSchema": "Table: 内容库产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]", - "sql": "SELECT 用户名, 访问次数 FROM 内容库产品 WHERE 用户名 IN ('jackjchen', 'robinlee')", - "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"比较jackjchen和robinlee在内容库的访问次数 (补充信息:’'jackjchen'‘是一个’用户名‘,’ 'robinlee'‘是一个’用户名‘。当前的日期是2020-12-01) \", we are asked:\n\"’用户名‘,\" so we need column = [用户名]\n\"的访问次数 \" so we need column = [访问次数]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [jackjchen,robinlee]. So the Schema_links are:\nSchema_links: [用户名,访问次数,jackjchen,robinlee]", + "question": "比较jackjchen和robinlee在超音数的访问次数", + "questionAugmented": "比较jackjchen和robinlee在超音数的访问次数 (补充信息:’'jackjchen'‘是一个’用户名‘,’ 'robinlee'‘是一个’用户名‘。当前的日期是2020-12-01) ", + "dbSchema": "Table: 超音数产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]", + "sql": "SELECT 用户名, 访问次数 FROM 超音数产品 WHERE 用户名 IN ('jackjchen', 'robinlee')", + "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"比较jackjchen和robinlee在超音数的访问次数 (补充信息:’'jackjchen'‘是一个’用户名‘,’ 'robinlee'‘是一个’用户名‘。当前的日期是2020-12-01) \", we are asked:\n\"’用户名‘,\" so we need column = [用户名]\n\"的访问次数 \" so we need column = [访问次数]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [jackjchen,robinlee]. So the Schema_links are:\nSchema_links: [用户名,访问次数,jackjchen,robinlee]", "generatedSchemaLinkings": "[用户名,访问次数,jackjchen,robinlee]" }, { - "question": "内容库近12个月访问人数 按部门", - "questionAugmented": "内容库近12个月访问人数 按部门 (补充信息:。当前的日期是2022-11-06) ", - "dbSchema": "Table: 内容库产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]", - "sql": "SELECT 部门, 数据日期, 访问人数 FROM 内容库产品 WHERE datediff('month', 数据日期, '2022-11-06') <= 12 ", - "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库近12个月访问人数 按部门 (补充信息:。当前的日期是2022-11-06) \", we are asked:\n\" 按部门 (\" so we need column = [部门]\n\"访问人数 按\" so we need column = [访问人数]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [12,month]. So the Schema_links are:\nSchema_links: [部门,访问人数,数据日期,12,month]", + "question": "超音数近12个月访问人数 按部门", + "questionAugmented": "超音数近12个月访问人数 按部门 (补充信息:。当前的日期是2022-11-06) ", + "dbSchema": "Table: 超音数产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]", + "sql": "SELECT 部门, 数据日期, 访问人数 FROM 超音数产品 WHERE datediff('month', 数据日期, '2022-11-06') <= 12 ", + "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"超音数近12个月访问人数 按部门 (补充信息:。当前的日期是2022-11-06) \", we are asked:\n\" 按部门 (\" so we need column = [部门]\n\"访问人数 按\" so we need column = [访问人数]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [12,month]. So the Schema_links are:\nSchema_links: [部门,访问人数,数据日期,12,month]", "generatedSchemaLinkings": "[部门,访问人数,数据日期,12,month]" }, { - "question": "内容库美术部、技术研发部的访问时长", - "questionAugmented": "内容库美术部、技术研发部的访问时长 (补充信息:’'美术部'‘是一个’部门‘,’ '技术研发部'‘是一个’部门‘。当前的日期是2023-04-21) ", - "dbSchema": "Table: 内容库产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]", - "sql": "SELECT 部门, 访问时长 FROM 内容库产品 WHERE 部门 IN ('美术部', '技术研发部')", - "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库美术部、技术研发部的访问时长 (补充信息:’'美术部'‘是一个’部门‘,’ '技术研发部'‘是一个’部门‘。当前的日期是2023-04-21) \", we are asked:\n\"部门‘,’ \" so we need column = [部门]\n\"的访问时长 \" so we need column = [访问时长]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [美术部,技术研发部]. So the Schema_links are:\nSchema_links: [部门,访问时长,美术部,技术研发部]", + "question": "超音数美术部、技术研发部的访问时长", + "questionAugmented": "超音数美术部、技术研发部的访问时长 (补充信息:’'美术部'‘是一个’部门‘,’ '技术研发部'‘是一个’部门‘。当前的日期是2023-04-21) ", + "dbSchema": "Table: 超音数产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]", + "sql": "SELECT 部门, 访问时长 FROM 超音数产品 WHERE 部门 IN ('美术部', '技术研发部')", + "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"超音数美术部、技术研发部的访问时长 (补充信息:’'美术部'‘是一个’部门‘,’ '技术研发部'‘是一个’部门‘。当前的日期是2023-04-21) \", we are asked:\n\"部门‘,’ \" so we need column = [部门]\n\"的访问时长 \" so we need column = [访问时长]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [美术部,技术研发部]. So the Schema_links are:\nSchema_links: [部门,访问时长,美术部,技术研发部]", "generatedSchemaLinkings": "[部门,访问时长,美术部,技术研发部]" }, { @@ -56,19 +56,19 @@ "generatedSchemaLinkings": "[歌曲名,结算播放量,10000]" }, { - "question": "内容库访问时长小于1小时,且来自美术部的用户是哪些", - "questionAugmented": "内容库访问时长小于1小时,且来自美术部的用户是哪些 (补充信息:’'美术部'‘是一个’部门‘。当前的日期是2023-07-31) ", - "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", - "sql": "SELECT 用户名 FROM 内容库产品 WHERE 部门 = '美术部' AND 访问时长 < 1", - "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库访问时长小于1小时,且来自美术部的用户是哪些 (补充信息:’'美术部'‘是一个’部门‘。当前的日期是2023-07-31) \", we are asked:\n\"术部的用户是\" so we need column = [用户名]\n\"一个’部门‘\" so we need column = [部门]\n\"访问时长小于\" so we need column = [访问时长]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [美术部,1]. So the Schema_links are:\nSchema_links: [用户名,部门,访问时长,美术部,1]", + "question": "超音数访问时长小于1小时,且来自美术部的用户是哪些", + "questionAugmented": "超音数访问时长小于1小时,且来自美术部的用户是哪些 (补充信息:’'美术部'‘是一个’部门‘。当前的日期是2023-07-31) ", + "dbSchema": "Table: 超音数产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", + "sql": "SELECT 用户名 FROM 超音数产品 WHERE 部门 = '美术部' AND 访问时长 < 1", + "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"超音数访问时长小于1小时,且来自美术部的用户是哪些 (补充信息:’'美术部'‘是一个’部门‘。当前的日期是2023-07-31) \", we are asked:\n\"术部的用户是\" so we need column = [用户名]\n\"一个’部门‘\" so we need column = [部门]\n\"访问时长小于\" so we need column = [访问时长]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [美术部,1]. So the Schema_links are:\nSchema_links: [用户名,部门,访问时长,美术部,1]", "generatedSchemaLinkings": "[用户名,部门,访问时长,美术部,1]" }, { - "question": "内容库pv最高的用户有哪些", - "questionAugmented": "内容库pv最高的用户有哪些 (补充信息:。当前的日期是2023-08-31) ", - "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", - "sql": "SELECT 用户名 FROM 内容库产品 ORDER BY 访问次数 DESC LIMIT 1", - "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库pv最高的用户有哪些 (补充信息:。当前的日期是2023-08-31) \", we are asked:\n\"用户有哪些 (\" so we need column = [用户名]\n\"最高的用户有\" so we need column = [访问次数]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1]. So the Schema_links are:\nSchema_links: [用户名,访问次数,1]", + "question": "超音数pv最高的用户有哪些", + "questionAugmented": "超音数pv最高的用户有哪些 (补充信息:。当前的日期是2023-08-31) ", + "dbSchema": "Table: 超音数产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", + "sql": "SELECT 用户名 FROM 超音数产品 ORDER BY 访问次数 DESC LIMIT 1", + "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"超音数pv最高的用户有哪些 (补充信息:。当前的日期是2023-08-31) \", we are asked:\n\"用户有哪些 (\" so we need column = [用户名]\n\"最高的用户有\" so we need column = [访问次数]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1]. So the Schema_links are:\nSchema_links: [用户名,访问次数,1]", "generatedSchemaLinkings": "[用户名,访问次数,1]" }, { @@ -88,11 +88,11 @@ "generatedSchemaLinkings": "[结算播放量,歌手名,数据日期,周倩倩,7,day]" }, { - "question": "内容库访问次数大于1k的部门是哪些", - "questionAugmented": "内容库访问次数大于1k的部门是哪些 (补充信息:。当前的日期是2023-09-14) ", - "dbSchema": "Table: 内容库产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]", - "sql": "SELECT 部门 FROM 内容库产品 WHERE 访问次数 > 1000", - "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库访问次数大于1k的部门是哪些 (补充信息:。当前的日期是2023-09-14) \", we are asked:\n\"访问次数大于\" so we need column = [访问次数]\n\"部门是哪些 \" so we need column = [部门]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1000]. So the Schema_links are:\nSchema_links: [访问次数,部门,1000]", + "question": "超音数访问次数大于1k的部门是哪些", + "questionAugmented": "超音数访问次数大于1k的部门是哪些 (补充信息:。当前的日期是2023-09-14) ", + "dbSchema": "Table: 超音数产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]", + "sql": "SELECT 部门 FROM 超音数产品 WHERE 访问次数 > 1000", + "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"超音数访问次数大于1k的部门是哪些 (补充信息:。当前的日期是2023-09-14) \", we are asked:\n\"访问次数大于\" so we need column = [访问次数]\n\"部门是哪些 \" so we need column = [部门]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1000]. So the Schema_links are:\nSchema_links: [访问次数,部门,1000]", "generatedSchemaLinkings": "[访问次数,部门,1000]" }, { @@ -184,42 +184,42 @@ "generatedSchemaLinkings": "[歌曲名,结算播放量,歌手名,发布时间,200000,刘锝桦,1992-04-02,2020-05-02]" }, { - "question": "内容库近30天访问次数的平均数", - "questionAugmented": "内容库近30天访问次数的平均数 (补充信息:。当前的日期是2023-09-04) ", - "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", - "sql": "SELECT AVG(访问次数) FROM 内容库产品 WHERE datediff('day', 数据日期, '2023-09-04') <= 30 ", - "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库近30天访问次数的平均数 (补充信息:。当前的日期是2023-09-04) \", we are asked:\n\"访问次数的平均数\" so we need column = [访问次数]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [30,day]. So the Schema_links are:\nSchema_links: [访问次数,数据日期,30,day]", + "question": "超音数近30天访问次数的平均数", + "questionAugmented": "超音数近30天访问次数的平均数 (补充信息:。当前的日期是2023-09-04) ", + "dbSchema": "Table: 超音数产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", + "sql": "SELECT AVG(访问次数) FROM 超音数产品 WHERE datediff('day', 数据日期, '2023-09-04') <= 30 ", + "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"超音数近30天访问次数的平均数 (补充信息:。当前的日期是2023-09-04) \", we are asked:\n\"访问次数的平均数\" so we need column = [访问次数]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [30,day]. So the Schema_links are:\nSchema_links: [访问次数,数据日期,30,day]", "generatedSchemaLinkings": "[访问次数,数据日期,30,day]" }, { - "question": "内容库近半年哪个月的访问次数汇总最高", - "questionAugmented": "内容库近半年哪个月的访问次数汇总最高 (补充信息:。当前的日期是2023-09-04) ", - "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", - "sql": "SELECT MONTH(数据日期), SUM(访问次数) FROM 内容库产品 WHERE datediff('year', 数据日期, '2023-09-04') <= 0.5 GROUP BY MONTH(数据日期) ORDER BY SUM(访问次数) DESC LIMIT 1", - "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库近半年哪个月的访问次数汇总最高 (补充信息:。当前的日期是2023-09-04) \", we are asked:\n\"的访问次数汇总\" so we need column = [访问次数]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1,0.5,year]. So the Schema_links are:\nSchema_links: [访问次数,数据日期,1,0.5,year]", + "question": "超音数近半年哪个月的访问次数汇总最高", + "questionAugmented": "超音数近半年哪个月的访问次数汇总最高 (补充信息:。当前的日期是2023-09-04) ", + "dbSchema": "Table: 超音数产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", + "sql": "SELECT MONTH(数据日期), SUM(访问次数) FROM 超音数产品 WHERE datediff('year', 数据日期, '2023-09-04') <= 0.5 GROUP BY MONTH(数据日期) ORDER BY SUM(访问次数) DESC LIMIT 1", + "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"超音数近半年哪个月的访问次数汇总最高 (补充信息:。当前的日期是2023-09-04) \", we are asked:\n\"的访问次数汇总\" so we need column = [访问次数]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1,0.5,year]. So the Schema_links are:\nSchema_links: [访问次数,数据日期,1,0.5,year]", "generatedSchemaLinkings": "[访问次数,数据日期,1,0.5,year]" }, { - "question": "内容库近半年每个月的平均访问次数", - "questionAugmented": "内容库近半年每个月的平均访问次数 (补充信息:。当前的日期是2023-09-04) ", - "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", - "sql": "SELECT MONTH(数据日期), AVG(访问次数) FROM 内容库产品 WHERE datediff('year', 数据日期, '2023-09-04') <= 0.5 GROUP BY MONTH(数据日期)", - "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库近半年每个月的平均访问次数 (补充信息:。当前的日期是2023-09-04) \", we are asked:\n\"访问次数 (\" so we need column = [访问次数]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [0.5,year]. So the Schema_links are:\nSchema_links: [访问次数,数据日期,0.5,year]", + "question": "超音数近半年每个月的平均访问次数", + "questionAugmented": "超音数近半年每个月的平均访问次数 (补充信息:。当前的日期是2023-09-04) ", + "dbSchema": "Table: 超音数产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", + "sql": "SELECT MONTH(数据日期), AVG(访问次数) FROM 超音数产品 WHERE datediff('year', 数据日期, '2023-09-04') <= 0.5 GROUP BY MONTH(数据日期)", + "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"超音数近半年每个月的平均访问次数 (补充信息:。当前的日期是2023-09-04) \", we are asked:\n\"访问次数 (\" so we need column = [访问次数]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [0.5,year]. So the Schema_links are:\nSchema_links: [访问次数,数据日期,0.5,year]", "generatedSchemaLinkings": "[访问次数,数据日期,0.5,year]" }, { - "question": "内容库 按部门统计访问次数 top10 的部门", - "questionAugmented": "内容库 按部门统计访问次数 top10 的部门 (补充信息:。当前的日期是2023-09-10) ", - "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", - "sql": "SELECT 部门, SUM(访问次数) FROM 内容库产品 GROUP BY 部门 ORDER BY SUM(访问次数) DESC LIMIT 10", - "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库 按部门统计访问次数 top10 的部门 (补充信息:。当前的日期是2023-09-10) \", we are asked:\n\"计访问次数 \" so we need column = [访问次数]\n\" 的部门 (\" so we need column = [部门]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [10]. So the Schema_links are:\nSchema_links: [访问次数,部门,10]", + "question": "超音数 按部门统计访问次数 top10 的部门", + "questionAugmented": "超音数 按部门统计访问次数 top10 的部门 (补充信息:。当前的日期是2023-09-10) ", + "dbSchema": "Table: 超音数产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", + "sql": "SELECT 部门, SUM(访问次数) FROM 超音数产品 GROUP BY 部门 ORDER BY SUM(访问次数) DESC LIMIT 10", + "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"超音数 按部门统计访问次数 top10 的部门 (补充信息:。当前的日期是2023-09-10) \", we are asked:\n\"计访问次数 \" so we need column = [访问次数]\n\" 的部门 (\" so we need column = [部门]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [10]. So the Schema_links are:\nSchema_links: [访问次数,部门,10]", "generatedSchemaLinkings": "[访问次数,部门,10]" }, { "question": "超音速 近7个月,月度总访问量超过 2万的月份", "questionAugmented": "超音速 近7个月,月度总访问量超过 2万的月份 (补充信息:。当前的日期是2023-09-10) ", - "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", - "sql": "SELECT MONTH(数据日期) FROM 内容库产品 WHERE datediff('month', 数据日期, '2023-09-10') <= 7 GROUP BY MONTH(数据日期) HAVING SUM(访问次数) > 20000", + "dbSchema": "Table: 超音数产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", + "sql": "SELECT MONTH(数据日期) FROM 超音数产品 WHERE datediff('month', 数据日期, '2023-09-10') <= 7 GROUP BY MONTH(数据日期) HAVING SUM(访问次数) > 20000", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"超音速 近7个月,月度总访问量超过 2万的月份 (补充信息:。当前的日期是2023-09-10) \", we are asked:\n\"访问量超过 \" so we need column = [访问次数]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [7,20000,month]. So the Schema_links are:\nSchema_links: [访问次数,数据日期,7,20000,month]", "generatedSchemaLinkings": "[访问次数,数据日期,7,20000,month]" },