Merge pull request #246 from xnuinside/v1.1.0_release

V1.1.0 release
xnuinside · Apr 21, 2024 · c13ad11 · c13ad11
2 parents bf95ea9 + c0d07a0
commit c13ad11
Show file tree

Hide file tree

Showing 13 changed files with 53,136 additions and 658 deletions.
diff --git a/CHANGELOG.txt b/CHANGELOG.txt
@@ -1,3 +1,13 @@
+**v1.1.0**
+### Improvements
+MySQL:
+1. Added support for INDEX statement inside table definition
+2. Added support for MySQL INVISIBLE/VISIBLE statement - https://github.com/xnuinside/simple-ddl-parser/issues/243
+
+Snowflake:
+1. Added support for cluster by statement before columns definition - https://github.com/xnuinside/simple-ddl-parser/issues/234
+
+
 **v1.0.4**
 ### Improvements
 1. Support functions with schema prefix in `DEFAULT` and `CHECK` statements. https://github.com/xnuinside/simple-ddl-parser/issues/240

diff --git a/README.md b/README.md
@@ -486,6 +486,16 @@ for help with debugging & testing support for BigQuery dialect DDLs:
 
 
 ## Changelog
+**v1.1.0**
+### Improvements
+MySQL:
+1. Added support for INDEX statement inside table definition
+2. Added support for MySQL INVISIBLE/VISIBLE statement - https://github.com/xnuinside/simple-ddl-parser/issues/243
+
+Snowflake:
+1. Added support for cluster by statement before columns definition - https://github.com/xnuinside/simple-ddl-parser/issues/234
+
+
 **v1.0.4**
 ### Improvements
 1. Support functions with schema prefix in `DEFAULT` and `CHECK` statements. https://github.com/xnuinside/simple-ddl-parser/issues/240

diff --git a/docs/README.rst b/docs/README.rst
@@ -537,7 +537,7 @@ Big thanks for the involving & contribution with test cases with DDL samples & o
 
 
 * https://github.com/kukigai , 
-* https://github.com/Awalkman90 ,
+* https://github.com/kliushnichenko ,
 * https://github.com/geob3d
 
 for help with debugging & testing support for BigQuery dialect DDLs:
@@ -549,6 +549,22 @@ for help with debugging & testing support for BigQuery dialect DDLs:
 Changelog
 ---------
 
+**v1.1.0**
+
+Improvements
+^^^^^^^^^^^^
+
+MySQL:
+
+
+#. Added support for INDEX statement inside table definition
+#. Added support for MySQL INVISIBLE/VISIBLE statement - https://github.com/xnuinside/simple-ddl-parser/issues/243
+
+Snowflake:
+
+
+#. Added support for cluster by statement before columns definition - https://github.com/xnuinside/simple-ddl-parser/issues/234
+
 **v1.0.4**
 
 Improvements

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "simple-ddl-parser"
-version = "1.0.4"
+version = "1.1.0"
 description = "Simple DDL Parser to parse SQL & dialects like HQL, TSQL (MSSQL), Oracle, AWS Redshift, Snowflake, MySQL, PostgreSQL, etc ddl files to json/python dict with full information about columns: types, defaults, primary keys, etc.; sequences, alters, custom types & other entities from ddl."
 authors = ["Iuliia Volkova <[email protected]>"]
 license = "MIT"
@@ -22,6 +22,10 @@ classifiers = [
  "Topic :: Software Development :: Libraries :: Python Modules"
 ]
 
+[[tool.poetry.source]]
+name = "pypi-public"
+url = "https://pypi.org/simple/"
+
 [tool.poetry.dependencies]
 python = "^3.6"
 dataclasses = { version = "0.8", python = ">=3.6,<3.7" }

diff --git a/simple_ddl_parser/ddl_parser.py b/simple_ddl_parser/ddl_parser.py
@@ -62,9 +62,9 @@ def after_columns_tokens(self, t: LexToken) -> LexToken:
  return t
 
  def process_body_tokens(self, t: LexToken) -> LexToken:
- if (
- self.lexer.last_par == "RP" and not self.lexer.lp_open
- ) or self.lexer.after_columns:
+ if (self.lexer.last_par == "RP" and not self.lexer.lp_open) or (
+ self.lexer.after_columns and not self.lexer.columns_def
+ ):
  t = self.after_columns_tokens(t)
  elif self.lexer.columns_def:
  t.type = tok.columns_definition.get(t.value.upper(), t.type)
@@ -83,7 +83,6 @@ def tokens_not_columns_names(self, t: LexToken) -> LexToken:
  t_tag = self.parse_tags_symbols(t)
  if t_tag:
  return t_tag
-
  if "ARRAY" in t.value:
  t.type = "ARRAY"
  return t
@@ -98,7 +97,8 @@ def tokens_not_columns_names(self, t: LexToken) -> LexToken:
  elif self.lexer.last_token != "COMMA":
  t.type = tok.common_statements.get(t.value.upper(), t.type)
  else:
- t.type = tok.first_liners.get(t.value.upper(), t.type)
+ if not (self.lexer.columns_def and self.lexer.after_columns):
+ t.type = tok.first_liners.get(t.value.upper(), t.type)
 
  # get tokens from other token dicts
  t = self.process_body_tokens(t)
@@ -198,7 +198,6 @@ def t_ID(self, t: LexToken):
  self.commat_type(t)
 
  self.set_lexx_tags(t)
-
  return self.set_last_token(t)
 
  def commat_type(self, t: LexToken):
@@ -209,14 +208,16 @@ def capitalize_tokens(self, t: LexToken):
  if t.type != "ID" and t.type not in ["LT", "RT"]:
  t.value = t.value.upper()
 
- def set_parathesis_tokens(self, t: LexToken):
+ def set_parenthesis_tokens(self, t: LexToken):
  if t.type in ["RP", "LP"]:
  if t.type == "RP" and self.lexer.lp_open:
  self.lexer.lp_open -= 1
+ if not self.lexer.lp_open:
+ self.lexer.after_columns = True
  self.lexer.last_par = t.type
 
  def set_lexx_tags(self, t: LexToken):
- self.set_parathesis_tokens(t)
+ self.set_parenthesis_tokens(t)
 
  if t.type == "ALTER":
  self.lexer.is_alter = True

diff --git a/simple_ddl_parser/dialects/snowflake.py b/simple_ddl_parser/dialects/snowflake.py
@@ -1,7 +1,7 @@
+import re
 from typing import List
 
 from simple_ddl_parser.utils import remove_par
-import re
 
 
 class Snowflake:
@@ -11,12 +11,17 @@ def p_clone(self, p: List) -> None:
  p[0] = {"clone": {"from": p_list[-1]}}
 
  def p_expression_cluster_by(self, p: List) -> None:
- """expr : expr CLUSTER BY LP pid RP
- | expr CLUSTER BY pid
- """
+ """expr : expr cluster_by"""
+ p_list = list(p)
  p[0] = p[1]
+ p[0].update(p_list[-1])
+
+ def p_cluster_by(self, p: List) -> None:
+ """cluster_by : CLUSTER BY LP pid RP
+ | CLUSTER BY pid
+ """
  p_list = remove_par(list(p))
- p[0]["cluster_by"] = p_list[-1]
+ p[0] = {"cluster_by": p_list[-1]}
 
  def p_multi_id_or_string(self, p: List) -> None:
  """multi_id_or_string : id_or_string

diff --git a/simple_ddl_parser/dialects/sql.py b/simple_ddl_parser/dialects/sql.py
@@ -981,6 +981,26 @@ def p_index_table_name(self, p: List) -> None:
  table_name = p_list[-1]
  p[0].update({"schema": schema, "table_name": table_name})
 
+ def p_c_index(self, p: List) -> None:
+ """c_index : INDEX LP index_pid RP
+ | INDEX id LP index_pid RP
+ | c_index INVISIBLE
+ | c_index VISIBLE"""
+ p_list = remove_par(p_list=list(p))
+ if isinstance(p_list[1], dict):
+ p[0] = p_list[1]
+ p[0]["details"] = {p_list[-1].lower(): True}
+ else:
+ if len(p_list) == 3:
+ name = None
+ else:
+ name = p_list[2]
+ p[0] = {
+ "index_stmt": True,
+ "name": name,
+ "columns": p_list[-1]["detailed_columns"],
+ }
+
  def p_create_index(self, p: List) -> None:
  """create_index : CREATE INDEX id
  | CREATE UNIQUE INDEX id
@@ -1020,7 +1040,9 @@ def p_expression_table(self, p: List) -> None: # noqa R701
  | table_name LP defcolumn
  | table_name
  | table_name LP RP
+ | table_name cluster_by LP defcolumn
  | expr COMMA defcolumn
+ | expr COMMA c_index
  | expr COMMA
  | expr COMMA constraint
  | expr COMMA check_ex
@@ -1041,30 +1063,33 @@ def p_expression_table(self, p: List) -> None: # noqa R701
  """
  p[0] = p[1] or defaultdict(list)
  p_list = remove_par(list(p))
+ if len(p_list) > 2 and "cluster_by" in p_list[2]:
+ p[0].update(p_list[2])
  if p_list[-1] != "," and p_list[-1] is not None:
  if "type" in p_list[-1] and "name" in p_list[-1]:
  if not p[0].get("columns"):
  p[0]["columns"] = []
  p[0]["columns"].append(p_list[-1])
  elif "index_stmt" in p_list[-1]:
+ del p_list[-1]["index_stmt"]
  if not p[0].get("index"):
  p[0]["index"] = []
  index_data = p_list[-1]
- p[0]["index"].append(
- {
-  "clustered": False,
-  "columns": [index_data["columns"]],
- "detailed_columns": [
- {
-  "name": index_data["columns"],
-  "nulls": "LAST",
-  "order": "ASC",
-  }
-  ],
-  "index_name": index_data["name"],
-  "unique": False,
-  }
- )
+ _index = {
+ "clustered": False,
+ "columns": [index_data["columns"]],
+ "detailed_columns": [
+ {
+ "name": index_data["columns"],
+ "nulls": "LAST",
+ "order": "ASC",
+ }
+ ],
+ "index_name": index_data["name"],
+ "unique": False,
+ }
+ _index.update(index_data.get("details", {}))
+ p[0]["index"].append(_index)
  elif "check" in p_list[-1]:
  p[0] = self.extract_check_data(p, p_list)
  elif "enforced" in p_list[-1]:

diff --git a/simple_ddl_parser/parsetab.py b/simple_ddl_parser/parsetab.py
diff --git a/simple_ddl_parser/tokens.py b/simple_ddl_parser/tokens.py
@@ -60,6 +60,8 @@
  "WITH",
  "ORDER",
  "NOORDER",
+ "VISIBLE",
+ "INVISIBLE",
 }
 columns_definition = {value: value for value in columns_definition}
 columns_definition[","] = "COMMA"
@@ -70,9 +72,12 @@
  "CONSTRAINT",
  "FOREIGN",
  "PRIMARY",
+ "INDEX",
  "UNIQUE",
  "CHECK",
  "WITH",
+ "CLUSTER",
+ "BY",
 }
 first_liners = {value: value for value in first_liners}
 

diff --git a/tests/dialects/test_mssql_specific.py b/tests/dialects/test_mssql_specific.py
@@ -1876,7 +1876,7 @@ def test_constraint_primary_key():
 
  ddl = """CREATE TABLE [dbo].[users_WorkSchedule](
  [id] [int] IDENTITY(1,1) NOT NULL,
- [user_id] [int] NULL),
+ [user_id] [int] NULL,
  CONSTRAINT [PK_users_WorkSchedule_id] PRIMARY KEY CLUSTERED
  (
  [id] ASC
@@ -1885,7 +1885,7 @@ def test_constraint_primary_key():
  CONSTRAINT [PK_users_WorkSchedule_id] PRIMARY KEY
  (
  [id] ASC
- )
+ ))
  """
  result = DDLParser(ddl).run(group_by_type=True, output_mode="mssql")
  assert result == expected