Skip to content

Commit

Permalink
issue xnuinside#201 snowflake table ddl improvement
Browse files Browse the repository at this point in the history
  • Loading branch information
dmaresma committed Aug 8, 2023
1 parent 11e42ef commit f495e32
Show file tree
Hide file tree
Showing 6 changed files with 596 additions and 261 deletions.
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -421,9 +421,14 @@ In output you will have names like 'dbo' and 'TO_Requests', not '[dbo]' and '[TO
### Snowflake Dialect statements

- CREATE .. CLONE statements for table, database and schema
- CREATE TABLE [or REPLACE] [ TRANSIET | TEMPORARY ] .. CLUSTER BY ..
- CREATE TABLE [or REPLACE] [ TRANSIENT | TEMPORARY ] .. CLUSTER BY ..
- CONSTRAINT .. [NOT] ENFORCED
- COMMENT = in CREATE TABLE & CREATE SCHEMA statements
- WITH MASKING POLICY
- WITH TAG
- DATA_RETENTION_TIME_IN_DAYS
- MAX_DATA_EXTENSION_TIME_IN_DAYS
- CHANGE_TRACKING

### BigQuery

Expand Down
70 changes: 68 additions & 2 deletions simple_ddl_parser/dialects/snowflake.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from simple_ddl_parser.utils import remove_par
from typing import List


class Snowflake:
Expand All @@ -15,9 +16,50 @@ def p_expression_cluster_by(self, p):
p_list = remove_par(list(p))
p[0]["cluster_by"] = p_list[-1]

def p_table_comment(self, p):
"""expr : expr option_comment
def p_table_property_equals(self, p: List) -> None:
"""table_property_equals : id id id_or_string
| id id_or_string
"""
p_list = remove_par(list(p))
p[0] = int(p_list[-1])

def p_table_property_equals_bool(self, p: List) -> None:
"""table_property_equals_bool : id id id_or_string
| id id_or_string
"""
p_list = remove_par(list(p))
print(p_list)
if p_list[-1].lower() == 'true':
p[0] = True
else:
p[0] = False

def p_expression_data_retention_time_in_days(self, p):
"""expr : expr DATA_RETENTION_TIME_IN_DAYS table_property_equals"""
p[0] = p[1]
p_list = remove_par(list(p))
p[0]["data_retention_time_in_days"] = p_list[-1]

def p_expression_max_data_extension_time_in_days(self, p):
"""expr : expr MAX_DATA_EXTENSION_TIME_IN_DAYS table_property_equals"""
p[0] = p[1]
p_list = remove_par(list(p))
p[0]["max_data_extension_time_in_days"] = p_list[-1]

def p_expression_change_tracking(self, p):
"""expr : expr CHANGE_TRACKING table_property_equals_bool"""
p[0] = p[1]
p_list = remove_par(list(p))
p[0]["change_tracking"] = p_list[-1]

def p_table_comment(self, p):
"""expr : expr option_comment"""
p[0] = p[1]
if p[2]:
p[0].update(p[2])

def p_table_tag(self, p):
"""expr : expr option_with_tag"""
p[0] = p[1]
if p[2]:
p[0].update(p[2])
Expand All @@ -31,3 +73,27 @@ def p_option_comment(self, p):
p_list = remove_par(list(p))
if "comment" in p[1].lower():
p[0] = {"comment": p_list[-1]}

def p_tag_equals(self, p: List) -> None:
"""tag_equals : id id id_or_string
| id id_or_string
"""
p_list = remove_par(list(p))
p[0] = f"{p_list[-2]}{p_list[-1]}"

def p_option_with_tag(self, p):
"""option_with_tag : TAG LP id RP
| TAG LP id DOT id DOT id RP
| TAG LP id DOT id DOT tag_equals RP
| WITH TAG LP id RP
| WITH TAG LP id DOT id DOT tag_equals RP
"""
p_list = remove_par(list(p))
p[0] = {"with_tag": f"{p_list[-5]}.{p_list[-3]}.{p_list[-1]}"}

def p_option_with_masking_policy(self, p):
"""option_with_masking_policy : MASKING POLICY id DOT id DOT id
| WITH MASKING POLICY id DOT id DOT id
"""
p_list = remove_par(list(p))
p[0] = {"with_masking_policy": f"{p_list[-5]}.{p_list[-3]}.{p_list[-1]}"}
4 changes: 3 additions & 1 deletion simple_ddl_parser/dialects/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def p_create_table(self, p: List):
| CREATE OR REPLACE id TABLE
"""
# id - for EXTERNAL, TRANSIENT, TEMPORARY
# id - for EXTERNAL, TRANSIENT, TEMPORARY, GLOBAL, LOCAL, TEMP, VOLATILE
# get schema & table name
p[0] = {}
p_list = list(p)
Expand Down Expand Up @@ -398,6 +398,8 @@ def p_defcolumn(self, p: List) -> None:
| defcolumn on_update
| defcolumn options
| defcolumn autoincrement
| defcolumn option_with_tag
| defcolumn option_with_masking_policy
"""
p[0] = p[1]
p_list = list(p)
Expand Down
529 changes: 274 additions & 255 deletions simple_ddl_parser/parsetab.py

Large diffs are not rendered by default.

13 changes: 11 additions & 2 deletions simple_ddl_parser/tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,12 @@
"ENFORCED": "ENFORCED",
"ENCODE": "ENCODE",
"GENERATED": "GENERATED",
"COMMENT": "COMMENT"
"COMMENT": "COMMENT",
"TAG": "TAG",
"POLICY":"POLICY",
"MASKING": "MASKING",
"MASKED": "MASKED",
"WITH" : "WITH",
}
first_liners = {
"LIKE": "LIKE",
Expand Down Expand Up @@ -88,7 +93,11 @@
# mssql
"TEXTIMAGE_ON": "TEXTIMAGE_ON",
# psql
"INHERITS": "INHERITS"
"INHERITS": "INHERITS",
#snowflake
"DATA_RETENTION_TIME_IN_DAYS" : "DATA_RETENTION_TIME_IN_DAYS",
"MAX_DATA_EXTENSION_TIME_IN_DAYS" : "MAX_DATA_EXTENSION_TIME_IN_DAYS",
"CHANGE_TRACKING" : "CHANGE_TRACKING"
}
sequence_reserved = {
"INCREMENT": "INCREMENT",
Expand Down
234 changes: 234 additions & 0 deletions tests/dialects/test_snowflake.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,3 +274,237 @@ def test_comment_on_create_schema():
result = DDLParser(ddl, normalize_names=True).run(output_mode="snowflake")
expected = [{"comment": "'this is comment1'", "schema_name": "my_schema"}]
assert result == expected

def test_table_with_tag():

ddl = """
create TABLE ASIN.EXCLUSION (
USER_COMMENT VARCHAR(100) COMMENT 'User input' WITH TAG (DBName.MASKING_POLICY_LIBRARY.PROJECT_POLICY_MASK='mask_object'),
PROCESS_SQN NUMBER(10,0) NOT NULL,
constraint PK_EXCLUSION primary key (ASIN)
)
;
"""
result_tagged = DDLParser(ddl, normalize_names=True, debug=True).run(output_mode="snowflake")
expected_tagged = [
{
"alter": {},
"checks": [],
"clone": None,
"columns": [
{
"name": "USER_COMMENT",
"type": "VARCHAR",
"size": 100,
"comment": "'User input'",
"references": None,
"unique": False,
"nullable": True,
"default": None,
"check": None,
"with_tag": "DBName.MASKING_POLICY_LIBRARY.PROJECT_POLICY_MASK='mask_object'",
},
{
"check": None,
"default": None,
"name": "PROCESS_SQN",
"nullable": False,
"references": None,
"size": (10, 0),
"type": "NUMBER",
"unique": False,
},
],
"constraints": {
"primary_keys": [
{"columns": ["ASIN"], "constraint_name": "PK_EXCLUSION"}
]
},
"index": [],
"partitioned_by": [],
"primary_key": ["ASIN"],
"primary_key_enforced": None,
"schema": "ASIN",
"table_name": "EXCLUSION",
"tablespace": None,
}
]
f = open("payload.json", "a")
f.write(str(result_tagged))
f.close()

assert result_tagged == expected_tagged

def test_table_with_mask():

ddl = """
create TABLE ASIN.EXCLUSION (
USER_COMMENT VARCHAR(100) COMMENT 'User input' WITH MASKING POLICY DBName.MASKING_POLICY_LIBRARY.MASK_STRING,
PROCESS_SQN NUMBER(10,0) NOT NULL,
constraint PK_EXCLUSION primary key (ASIN)
)
;
"""
result_masked = DDLParser(ddl, normalize_names=True, debug=True).run(output_mode="snowflake")

expected_masked = [
{
"alter": {},
"checks": [],
"clone": None,
"columns": [
{
"name": "USER_COMMENT",
"type": "VARCHAR",
"size": 100,
"comment": "'User input'",
"references": None,
"unique": False,
"nullable": True,
"default": None,
"check": None,
"with_masking_policy": "DBName.MASKING_POLICY_LIBRARY.MASK_STRING",
},
{
"check": None,
"default": None,
"name": "PROCESS_SQN",
"nullable": False,
"references": None,
"size": (10, 0),
"type": "NUMBER",
"unique": False,
},
],
"constraints": {
"primary_keys": [
{"columns": ["ASIN"], "constraint_name": "PK_EXCLUSION"}
]
},
"index": [],
"partitioned_by": [],
"primary_key": ["ASIN"],
"primary_key_enforced": None,
"schema": "ASIN",
"table_name": "EXCLUSION",
"tablespace": None,
}
]

assert result_masked == expected_masked

def test_table_with_retention():

ddl = """
create TABLE ASIN.EXCLUSION (
USER_COMMENT VARCHAR(100) COMMENT 'User input',
PROCESS_SQN NUMBER(10,0) NOT NULL,
constraint PK_EXCLUSION primary key (ASIN)
) DATA_RETENTION_TIME_IN_DAYS = 15
;
"""
result_retention = DDLParser(ddl, normalize_names=True, debug=True).run(output_mode="snowflake")

expected_retention = [
{
"alter": {},
"checks": [],
"clone": None,
"columns": [
{
"name": "USER_COMMENT",
"type": "VARCHAR",
"size": 100,
"comment": "'User input'",
"references": None,
"unique": False,
"nullable": True,
"default": None,
"check": None,
},
{
"check": None,
"default": None,
"name": "PROCESS_SQN",
"nullable": False,
"references": None,
"size": (10, 0),
"type": "NUMBER",
"unique": False,
},
],
"constraints": {
"primary_keys": [
{"columns": ["ASIN"], "constraint_name": "PK_EXCLUSION"}
]
},
"index": [],
"partitioned_by": [],
"primary_key": ["ASIN"],
"primary_key_enforced": None,
"schema": "ASIN",
"table_name": "EXCLUSION",
"tablespace": None,
"data_retention_time_in_days" : 15
}
]

assert result_retention == expected_retention

def test_table_with_change_tracking():

ddl = """
create TABLE ASIN.EXCLUSION (
USER_COMMENT VARCHAR(100) COMMENT 'User input',
PROCESS_SQN NUMBER(10,0) NOT NULL,
constraint PK_EXCLUSION primary key (ASIN)
) change_tracking = False
;
"""
result_change_tracking = DDLParser(ddl, normalize_names=True, debug=True).run(output_mode="snowflake")

expected_change_tracking = [
{
"alter": {},
"checks": [],
"clone": None,
"columns": [
{
"name": "USER_COMMENT",
"type": "VARCHAR",
"size": 100,
"comment": "'User input'",
"references": None,
"unique": False,
"nullable": True,
"default": None,
"check": None,
},
{
"check": None,
"default": None,
"name": "PROCESS_SQN",
"nullable": False,
"references": None,
"size": (10, 0),
"type": "NUMBER",
"unique": False,
},
],
"constraints": {
"primary_keys": [
{"columns": ["ASIN"], "constraint_name": "PK_EXCLUSION"}
]
},
"index": [],
"partitioned_by": [],
"primary_key": ["ASIN"],
"primary_key_enforced": None,
"schema": "ASIN",
"table_name": "EXCLUSION",
"tablespace": None,
"change_tracking" : False
}
]

assert result_change_tracking == expected_change_tracking

0 comments on commit f495e32

Please sign in to comment.