Merge pull request #251 from xnuinside/v1.3.0_snowflake_postgres

PostgreSQL & Snowflake Improvements
xnuinside · May 11, 2024 · d1c0a63 · d1c0a63
2 parents 6b8d0c4 + f6fdcd0
commit d1c0a63
Show file tree

Hide file tree

Showing 11 changed files with 31,799 additions and 30,638 deletions.
diff --git a/CHANGELOG.txt b/CHANGELOG.txt
@@ -1,3 +1,20 @@
+**v1.3.0**
+
+### Fixes
+PostgreSQL:
+1. Timezone was moved out from type definition to keyword 'with_time_zone' it can be True (if with time zone) or False (if without)
+BigQuery:
+1. Previously Range in RANGE_BUCKETS was parsed as a columns, now this behaviour is changed and
+range placed in own keyword - 'range' (can be array or str).
+Also for all ``*_TRUNC PARTITIONS` like DATETIME_TRUNC, TIMESTAMP_TRUNC, etc, second argument moved to arg 'trunc_by'
+
+### Improvements
+PostgreSQL:
+1. Added support for PostgreSQL with / without time zone - https://github.com/xnuinside/simple-ddl-parser/issues/250
+
+BigQuery:
+1. Added support for GENERATE_ARRAY in RANGE_BUCKETS https://github.com/xnuinside/simple-ddl-parser/issues/183
+
 **v1.2.1**
 ### Fixes
 MySQL:

diff --git a/README.md b/README.md
@@ -486,6 +486,23 @@ for help with debugging & testing support for BigQuery dialect DDLs:
 
 
 ## Changelog
+**v1.3.0**
+
+### Fixes
+PostgreSQL:
+1. Timezone was moved out from type definition to keyword 'with_time_zone' it can be True (if with time zone) or False (if without)
+BigQuery:
+1. Previously Range in RANGE_BUCKETS was parsed as a columns, now this behaviour is changed and
+range placed in own keyword - 'range' (can be array or str).
+Also for all ``*_TRUNC PARTITIONS` like DATETIME_TRUNC, TIMESTAMP_TRUNC, etc, second argument moved to arg 'trunc_by'
+
+### Improvements
+PostgreSQL:
+1. Added support for PostgreSQL with / without time zone - https://github.com/xnuinside/simple-ddl-parser/issues/250
+
+BigQuery:
+1. Added support for GENERATE_ARRAY in RANGE_BUCKETS https://github.com/xnuinside/simple-ddl-parser/issues/183
+
 **v1.2.1**
 ### Fixes
 MySQL:

diff --git a/docs/README.rst b/docs/README.rst
@@ -549,6 +549,33 @@ for help with debugging & testing support for BigQuery dialect DDLs:
 Changelog
 ---------
 
+**v1.3.0**
+
+Fixes
+^^^^^
+
+PostgreSQL:
+
+
+#. Timezone was moved out from type definition to keyword 'with_time_zone' it can be True (if with time zone) or False (if without)
+ BigQuery:
+#. Previously Range in RANGE_BUCKETS was parsed as a columns, now this behaviour is changed and
+ range placed in own keyword - 'range' (can be array or str).
+ Also for all ```*_TRUNC PARTITIONS`` like DATETIME_TRUNC, TIMESTAMP_TRUNC, etc, second argument moved to arg 'trunc_by'
+
+Improvements
+^^^^^^^^^^^^
+
+PostgreSQL:
+
+
+#. Added support for PostgreSQL with / without time zone - https://github.com/xnuinside/simple-ddl-parser/issues/250
+
+BigQuery:
+
+
+#. Added support for GENERATE_ARRAY in RANGE_BUCKETS https://github.com/xnuinside/simple-ddl-parser/issues/183
+
 **v1.2.1**
 
 Fixes

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "simple-ddl-parser"
-version = "1.2.1"
+version = "1.3.0"
 description = "Simple DDL Parser to parse SQL & dialects like HQL, TSQL (MSSQL), Oracle, AWS Redshift, Snowflake, MySQL, PostgreSQL, etc ddl files to json/python dict with full information about columns: types, defaults, primary keys, etc.; sequences, alters, custom types & other entities from ddl."
 authors = ["Iuliia Volkova <[email protected]>"]
 license = "MIT"

diff --git a/simple_ddl_parser/dialects/psql.py b/simple_ddl_parser/dialects/psql.py
@@ -13,3 +13,13 @@ def p_expr_inherits(self, p: List) -> None:
  "table_name": p_list[-1]["table_name"],
  }
  p[1].update({"inherits": table_identifier})
+
+ def p_timezone(self, p: List) -> None:
+ """timezone : WITH id id
+ | WITHOUT id id"""
+ p_list = remove_par(list(p))
+ if "WITH" in p_list:
+ timezone = True
+ else:
+ timezone = False
+ p[0] = {"with_time_zone": timezone}
diff --git a/simple_ddl_parser/dialects/sql.py b/simple_ddl_parser/dialects/sql.py
@@ -9,21 +9,56 @@
 
 
 class AfterColumns:
+ @staticmethod
+ def _parse_range_bucket(data: List[str]) -> Tuple[List[str], List[str]]:
+ range = None
+
+ if len(data) == 3:
+ columns = data[0]
+ range = data[2]
+ else:
+ columns = []
+ for column in data[0]:
+ if "[" in column:
+ range = [column.replace("[", "")]
+ elif range:
+ range.append(column.replace("]", ""))
+ else:
+ columns.append(column)
+ return columns, range
+
  def p_expression_partition_by(self, p: List) -> None:
  """expr : expr PARTITION BY LP pid RP
  | expr PARTITION BY id LP pid RP
  | expr PARTITION BY pid
- | expr PARTITION BY id pid"""
+ | expr PARTITION BY id pid
+ | expr PARTITION BY id LP pid COMMA f_call RP
+ """
  p[0] = p[1]
- p_list = list(p)
- _type = None
- if isinstance(p[4], list):
- columns = p[4]
+ p_list = remove_par(list(p))
+ _type, range, trunc_by = None, None, None
+
+ if isinstance(p_list[4], list):
+ columns = p_list[4]
+ elif "_TRUNC" in p_list[4]:
+ # bigquery
+ _type = p_list[4]
+ trunc_by = p_list[5][-1]
+ p_list[5].pop(-1)
+ columns = p_list[5]
+ elif p_list[4].upper() == "RANGE_BUCKET":
+ # bigquery RANGE_BUCKET with GENERATE_ARRAY
+ _type = p_list[4]
+ columns, range = self._parse_range_bucket(p_list[5:])
  else:
- columns = p_list[-2]
- if isinstance(p[4], str) and p[4].lower() != "(":
- _type = p[4]
+ columns = p_list[-1]
+ if not _type and isinstance(p_list[4], str):
+ _type = p_list[4]
  p[0]["partition_by"] = {"columns": columns, "type": _type}
+ if range:
+ p[0]["partition_by"]["range"] = range
+ if trunc_by:
+ p[0]["partition_by"]["trunc_by"] = trunc_by
 
 
 class Database:
@@ -419,6 +454,7 @@ def p_defcolumn(self, p: List) -> None:
  | defcolumn as_virtual
  | defcolumn constraint
  | defcolumn generated_by
+ | defcolumn timezone
  """
  p[0] = p[1]
  p_list = list(p)

diff --git a/simple_ddl_parser/parsetab.py b/simple_ddl_parser/parsetab.py
diff --git a/simple_ddl_parser/tokens.py b/simple_ddl_parser/tokens.py
@@ -58,6 +58,7 @@
  "POLICY",
  "MASKING",
  "WITH",
+ "WITHOUT",
  "ORDER",
  "NOORDER",
  "VISIBLE",

diff --git a/tests/dialects/test_bigquery.py b/tests/dialects/test_bigquery.py
@@ -484,7 +484,8 @@ def test_table_name_with_project_id():
  }
  ],
  "partition_by": {
- "columns": ["fiscal_half_year_reporting_week_no", "DAY"],
+ "columns": ["fiscal_half_year_reporting_week_no"],
+ "trunc_by": "DAY",
  "type": "DATETIME_TRUNC",
  },
  "partitioned_by": [],
@@ -646,7 +647,8 @@ def test_multiple_options():
  {"option_four": '"Four"'},
  ],
  "partition_by": {
- "columns": ["fiscal_half_year_reporting_week_no", "DAY"],
+ "columns": ["fiscal_half_year_reporting_week_no"],
+ "trunc_by": "DAY",
  "type": "DATETIME_TRUNC",
  },
  "partitioned_by": [],
@@ -869,3 +871,126 @@ def test_bigquery_options_string():
  "types": [],
  }
  assert result == expected
+
+
+def test_bigquery_partition_range():
+ ddl = """
+ CREATE TABLE data.test(
+ field_a INT OPTIONS(description='some description')
+ )
+ PARTITION BY RANGE_BUCKET(field_a, GENERATE_ARRAY(10, 1000, 1));"""
+
+ result = DDLParser(ddl).run(output_mode="bigquery")
+ expected = [
+ {
+ "alter": {},
+ "checks": [],
+ "columns": [
+ {
+ "check": None,
+ "default": None,
+ "name": "field_a",
+ "nullable": True,
+ "options": [{"description": "'some description'"}],
+ "references": None,
+ "size": None,
+ "type": "INT",
+ "unique": False,
+ }
+ ],
+ "index": [],
+ "partition_by": {
+ "columns": ["field_a"],
+ "range": "GENERATE_ARRAY(10,1000,1)",
+ "type": "RANGE_BUCKET",
+ },
+ "partitioned_by": [],
+ "primary_key": [],
+ "dataset": "data",
+ "table_name": "test",
+ "tablespace": None,
+ }
+ ]
+
+ assert result == expected
+
+
+def test_array_range():
+ ddl = """CREATE TABLE data.test(
+ field_a INT OPTIONS(description='some description')
+ )
+ PARTITION BY RANGE_BUCKET(field_a, [1,2,3]]) ;"""
+
+ result = DDLParser(ddl).run(output_mode="bigquery")
+ expected = [
+ {
+ "alter": {},
+ "checks": [],
+ "columns": [
+ {
+ "check": None,
+ "default": None,
+ "name": "field_a",
+ "nullable": True,
+ "options": [{"description": "'some description'"}],
+ "references": None,
+ "size": None,
+ "type": "INT",
+ "unique": False,
+ }
+ ],
+ "dataset": "data",
+ "index": [],
+ "partition_by": {
+ "columns": ["field_a"],
+ "range": ["1", "2", "3"],
+ "type": "RANGE_BUCKET",
+ },
+ "partitioned_by": [],
+ "primary_key": [],
+ "table_name": "test",
+ "tablespace": None,
+ }
+ ]
+ assert expected == result
+
+
+def test_date_trunc():
+ ddl = """CREATE TABLE data.test(
+ field_a INT OPTIONS(description='some description')
+ )
+ PARTITION BY DATE_TRUNC(field, MONTH);"""
+
+ result = DDLParser(ddl).run(output_mode="bigquery")
+ expected = [
+ {
+ "alter": {},
+ "checks": [],
+ "columns": [
+ {
+ "check": None,
+ "default": None,
+ "name": "field_a",
+ "nullable": True,
+ "options": [{"description": "'some description'"}],
+ "references": None,
+ "size": None,
+ "type": "INT",
+ "unique": False,
+ }
+ ],
+ "dataset": "data",
+ "index": [],
+ "partition_by": {
+ "columns": ["field"],
+ "trunc_by": "MONTH",
+ "type": "DATE_TRUNC",
+ },
+ "partitioned_by": [],
+ "primary_key": [],
+ "table_name": "test",
+ "tablespace": None,
+ }
+ ]
+
+ assert result == expected
diff --git a/tests/dialects/test_psql.py b/tests/dialects/test_psql.py
@@ -86,3 +86,36 @@ def test_cast_generated():
  }
  ]
  assert expected == result
+
+
+def test_with_time_zone():
+ expected = [
+ {
+ "alter": {},
+ "checks": [],
+ "columns": [
+ {
+ "check": None,
+ "default": None,
+ "name": "date_updated",
+ "nullable": True,
+ "references": None,
+ "size": None,
+ "type": "timestamp",
+ "unique": False,
+ "with_time_zone": True,
+ }
+ ],
+ "index": [],
+ "partitioned_by": [],
+ "primary_key": [],
+ "schema": "public",
+ "table_name": "test",
+ "tablespace": None,
+ }
+ ]
+ ddl = """
+ CREATE TABLE public.test (date_updated timestamp with time zone);"""
+
+ result = DDLParser(ddl).run(output_mode="postgres")
+ assert expected == result