Skip to content

Commit

Permalink
Merge pull request #251 from xnuinside/v1.3.0_snowflake_postgres
Browse files Browse the repository at this point in the history
PostgreSQL & Snowflake Improvements
  • Loading branch information
xnuinside committed May 11, 2024
2 parents 6b8d0c4 + f6fdcd0 commit d1c0a63
Show file tree
Hide file tree
Showing 11 changed files with 31,799 additions and 30,638 deletions.
17 changes: 17 additions & 0 deletions CHANGELOG.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,20 @@
**v1.3.0**

### Fixes
PostgreSQL:
1. Timezone was moved out from type definition to keyword 'with_time_zone' it can be True (if with time zone) or False (if without)
BigQuery:
1. Previously Range in RANGE_BUCKETS was parsed as a columns, now this behaviour is changed and
range placed in own keyword - 'range' (can be array or str).
Also for all ``*_TRUNC PARTITIONS` like DATETIME_TRUNC, TIMESTAMP_TRUNC, etc, second argument moved to arg 'trunc_by'

### Improvements
PostgreSQL:
1. Added support for PostgreSQL with / without time zone - https://github.com/xnuinside/simple-ddl-parser/issues/250

BigQuery:
1. Added support for GENERATE_ARRAY in RANGE_BUCKETS https://github.com/xnuinside/simple-ddl-parser/issues/183

**v1.2.1**
### Fixes
MySQL:
Expand Down
17 changes: 17 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -486,6 +486,23 @@ for help with debugging & testing support for BigQuery dialect DDLs:


## Changelog
**v1.3.0**

### Fixes
PostgreSQL:
1. Timezone was moved out from type definition to keyword 'with_time_zone' it can be True (if with time zone) or False (if without)
BigQuery:
1. Previously Range in RANGE_BUCKETS was parsed as a columns, now this behaviour is changed and
range placed in own keyword - 'range' (can be array or str).
Also for all ``*_TRUNC PARTITIONS` like DATETIME_TRUNC, TIMESTAMP_TRUNC, etc, second argument moved to arg 'trunc_by'

### Improvements
PostgreSQL:
1. Added support for PostgreSQL with / without time zone - https://github.com/xnuinside/simple-ddl-parser/issues/250

BigQuery:
1. Added support for GENERATE_ARRAY in RANGE_BUCKETS https://github.com/xnuinside/simple-ddl-parser/issues/183

**v1.2.1**
### Fixes
MySQL:
Expand Down
27 changes: 27 additions & 0 deletions docs/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -549,6 +549,33 @@ for help with debugging & testing support for BigQuery dialect DDLs:
Changelog
---------

**v1.3.0**

Fixes
^^^^^

PostgreSQL:


#. Timezone was moved out from type definition to keyword 'with_time_zone' it can be True (if with time zone) or False (if without)
BigQuery:
#. Previously Range in RANGE_BUCKETS was parsed as a columns, now this behaviour is changed and
range placed in own keyword - 'range' (can be array or str).
Also for all ```*_TRUNC PARTITIONS`` like DATETIME_TRUNC, TIMESTAMP_TRUNC, etc, second argument moved to arg 'trunc_by'

Improvements
^^^^^^^^^^^^

PostgreSQL:


#. Added support for PostgreSQL with / without time zone - https://github.com/xnuinside/simple-ddl-parser/issues/250

BigQuery:


#. Added support for GENERATE_ARRAY in RANGE_BUCKETS https://github.com/xnuinside/simple-ddl-parser/issues/183

**v1.2.1**

Fixes
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "simple-ddl-parser"
version = "1.2.1"
version = "1.3.0"
description = "Simple DDL Parser to parse SQL & dialects like HQL, TSQL (MSSQL), Oracle, AWS Redshift, Snowflake, MySQL, PostgreSQL, etc ddl files to json/python dict with full information about columns: types, defaults, primary keys, etc.; sequences, alters, custom types & other entities from ddl."
authors = ["Iuliia Volkova <[email protected]>"]
license = "MIT"
Expand Down
10 changes: 10 additions & 0 deletions simple_ddl_parser/dialects/psql.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,13 @@ def p_expr_inherits(self, p: List) -> None:
"table_name": p_list[-1]["table_name"],
}
p[1].update({"inherits": table_identifier})

def p_timezone(self, p: List) -> None:
"""timezone : WITH id id
| WITHOUT id id"""
p_list = remove_par(list(p))
if "WITH" in p_list:
timezone = True
else:
timezone = False
p[0] = {"with_time_zone": timezone}
52 changes: 44 additions & 8 deletions simple_ddl_parser/dialects/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,56 @@


class AfterColumns:
@staticmethod
def _parse_range_bucket(data: List[str]) -> Tuple[List[str], List[str]]:
range = None

if len(data) == 3:
columns = data[0]
range = data[2]
else:
columns = []
for column in data[0]:
if "[" in column:
range = [column.replace("[", "")]
elif range:
range.append(column.replace("]", ""))
else:
columns.append(column)
return columns, range

def p_expression_partition_by(self, p: List) -> None:
"""expr : expr PARTITION BY LP pid RP
| expr PARTITION BY id LP pid RP
| expr PARTITION BY pid
| expr PARTITION BY id pid"""
| expr PARTITION BY id pid
| expr PARTITION BY id LP pid COMMA f_call RP
"""
p[0] = p[1]
p_list = list(p)
_type = None
if isinstance(p[4], list):
columns = p[4]
p_list = remove_par(list(p))
_type, range, trunc_by = None, None, None

if isinstance(p_list[4], list):
columns = p_list[4]
elif "_TRUNC" in p_list[4]:
# bigquery
_type = p_list[4]
trunc_by = p_list[5][-1]
p_list[5].pop(-1)
columns = p_list[5]
elif p_list[4].upper() == "RANGE_BUCKET":
# bigquery RANGE_BUCKET with GENERATE_ARRAY
_type = p_list[4]
columns, range = self._parse_range_bucket(p_list[5:])
else:
columns = p_list[-2]
if isinstance(p[4], str) and p[4].lower() != "(":
_type = p[4]
columns = p_list[-1]
if not _type and isinstance(p_list[4], str):
_type = p_list[4]
p[0]["partition_by"] = {"columns": columns, "type": _type}
if range:
p[0]["partition_by"]["range"] = range
if trunc_by:
p[0]["partition_by"]["trunc_by"] = trunc_by


class Database:
Expand Down Expand Up @@ -419,6 +454,7 @@ def p_defcolumn(self, p: List) -> None:
| defcolumn as_virtual
| defcolumn constraint
| defcolumn generated_by
| defcolumn timezone
"""
p[0] = p[1]
p_list = list(p)
Expand Down
62,137 changes: 31,514 additions & 30,623 deletions simple_ddl_parser/parsetab.py

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions simple_ddl_parser/tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
"POLICY",
"MASKING",
"WITH",
"WITHOUT",
"ORDER",
"NOORDER",
"VISIBLE",
Expand Down
129 changes: 127 additions & 2 deletions tests/dialects/test_bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -484,7 +484,8 @@ def test_table_name_with_project_id():
}
],
"partition_by": {
"columns": ["fiscal_half_year_reporting_week_no", "DAY"],
"columns": ["fiscal_half_year_reporting_week_no"],
"trunc_by": "DAY",
"type": "DATETIME_TRUNC",
},
"partitioned_by": [],
Expand Down Expand Up @@ -646,7 +647,8 @@ def test_multiple_options():
{"option_four": '"Four"'},
],
"partition_by": {
"columns": ["fiscal_half_year_reporting_week_no", "DAY"],
"columns": ["fiscal_half_year_reporting_week_no"],
"trunc_by": "DAY",
"type": "DATETIME_TRUNC",
},
"partitioned_by": [],
Expand Down Expand Up @@ -869,3 +871,126 @@ def test_bigquery_options_string():
"types": [],
}
assert result == expected


def test_bigquery_partition_range():
ddl = """
CREATE TABLE data.test(
field_a INT OPTIONS(description='some description')
)
PARTITION BY RANGE_BUCKET(field_a, GENERATE_ARRAY(10, 1000, 1));"""

result = DDLParser(ddl).run(output_mode="bigquery")
expected = [
{
"alter": {},
"checks": [],
"columns": [
{
"check": None,
"default": None,
"name": "field_a",
"nullable": True,
"options": [{"description": "'some description'"}],
"references": None,
"size": None,
"type": "INT",
"unique": False,
}
],
"index": [],
"partition_by": {
"columns": ["field_a"],
"range": "GENERATE_ARRAY(10,1000,1)",
"type": "RANGE_BUCKET",
},
"partitioned_by": [],
"primary_key": [],
"dataset": "data",
"table_name": "test",
"tablespace": None,
}
]

assert result == expected


def test_array_range():
ddl = """CREATE TABLE data.test(
field_a INT OPTIONS(description='some description')
)
PARTITION BY RANGE_BUCKET(field_a, [1,2,3]]) ;"""

result = DDLParser(ddl).run(output_mode="bigquery")
expected = [
{
"alter": {},
"checks": [],
"columns": [
{
"check": None,
"default": None,
"name": "field_a",
"nullable": True,
"options": [{"description": "'some description'"}],
"references": None,
"size": None,
"type": "INT",
"unique": False,
}
],
"dataset": "data",
"index": [],
"partition_by": {
"columns": ["field_a"],
"range": ["1", "2", "3"],
"type": "RANGE_BUCKET",
},
"partitioned_by": [],
"primary_key": [],
"table_name": "test",
"tablespace": None,
}
]
assert expected == result


def test_date_trunc():
ddl = """CREATE TABLE data.test(
field_a INT OPTIONS(description='some description')
)
PARTITION BY DATE_TRUNC(field, MONTH);"""

result = DDLParser(ddl).run(output_mode="bigquery")
expected = [
{
"alter": {},
"checks": [],
"columns": [
{
"check": None,
"default": None,
"name": "field_a",
"nullable": True,
"options": [{"description": "'some description'"}],
"references": None,
"size": None,
"type": "INT",
"unique": False,
}
],
"dataset": "data",
"index": [],
"partition_by": {
"columns": ["field"],
"trunc_by": "MONTH",
"type": "DATE_TRUNC",
},
"partitioned_by": [],
"primary_key": [],
"table_name": "test",
"tablespace": None,
}
]

assert result == expected
33 changes: 33 additions & 0 deletions tests/dialects/test_psql.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,36 @@ def test_cast_generated():
}
]
assert expected == result


def test_with_time_zone():
expected = [
{
"alter": {},
"checks": [],
"columns": [
{
"check": None,
"default": None,
"name": "date_updated",
"nullable": True,
"references": None,
"size": None,
"type": "timestamp",
"unique": False,
"with_time_zone": True,
}
],
"index": [],
"partitioned_by": [],
"primary_key": [],
"schema": "public",
"table_name": "test",
"tablespace": None,
}
]
ddl = """
CREATE TABLE public.test (date_updated timestamp with time zone);"""

result = DDLParser(ddl).run(output_mode="postgres")
assert expected == result
Loading

0 comments on commit d1c0a63

Please sign in to comment.