-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1 from bdosn/Notebooks-till-third-workshop
Notebooks till third workshop
- Loading branch information
Showing
4 changed files
with
1,396 additions
and
0 deletions.
There are no files selected for viewing
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
db_creds: | ||
ssh_address: 'yourAddress/IP' | ||
ssh_username: 'yourUsername' | ||
ssh_password: 'yourPassword' | ||
user: 'dbUsername' | ||
password: 'dbPassword' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,208 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"ename": "SyntaxError", | ||
"evalue": "invalid syntax (<ipython-input-1-ba5ec63f4dda>, line 1)", | ||
"output_type": "error", | ||
"traceback": [ | ||
"\u001b[0;36m File \u001b[0;32m\"<ipython-input-1-ba5ec63f4dda>\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m pip install psycopg2-binary\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"pip install psycopg2-binary\n", | ||
"pip install sshtunnel\n", | ||
"pip install sqlalchemy" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 6, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import psycopg2\n", | ||
"import sshtunnel as ssh\n", | ||
"import pandas\n", | ||
"import yaml" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"## Save the DB credentials and your username and password in a config file.\n", | ||
"## You DO NOT want to write down your password directly on a script.\n", | ||
"## Hence you should use a config file. I have chosen to work with .yml file.\n", | ||
"## But there are also other options. Make sure you either specify the path \n", | ||
"## of the config file or that the file is at the same location as this .ipynb script\n", | ||
"\n", | ||
"#ymlfile:\n", | ||
"#db_creds:\n", | ||
"# dsn: '10.2.221.111..116:8563'\n", | ||
"# user: 'tanzeem.haque'\n", | ||
"# password: 'get_your_own_pw_lol'\n", | ||
"\n", | ||
"#config_file = 'config_mentor.yml'\n", | ||
"config_file = 'config_student.yml'\n", | ||
"\n", | ||
"with open(config_file, 'r') as ymlfile:\n", | ||
" cfg = yaml.safe_load(ymlfile)\n", | ||
"ssh_address = cfg['db_creds']['ssh_address']\n", | ||
"ssh_username = cfg['db_creds']['ssh_username']\n", | ||
"ssh_password = cfg['db_creds']['ssh_password']\n", | ||
"user = cfg['db_creds']['user']\n", | ||
"password = cfg['db_creds']['password']\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 8, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"server connected\n", | ||
"database connected\n", | ||
"The number of parts: 10\n", | ||
"('2624022', '93', '13', None, None, None, '93', '13', '4.1', '4.1')\n", | ||
"('2624022', None, '15', None, None, None, None, None, '4.1', '4.1')\n", | ||
"('2624022', None, '17', None, None, None, None, None, '4.1', '4.1')\n", | ||
"('2624022', None, '19', None, None, None, None, None, '4.1', '4.1')\n", | ||
"('2624022', None, '21', None, None, None, None, None, '4.1', '4.1')\n", | ||
"('2624022', None, '23', None, None, None, None, None, '4.1', '4.1')\n", | ||
"('2624022', None, '25', None, None, None, None, None, '4.1', '4.1')\n", | ||
"('2624022', None, '27', None, None, None, None, None, '4.1', '4.1')\n", | ||
"('2624022', None, '29', None, None, None, None, None, '4.1', '4.1')\n", | ||
"('2624022', None, '31', None, None, None, None, None, '4.1', '4.1')\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"try:\n", | ||
"\n", | ||
" with ssh.SSHTunnelForwarder(\n", | ||
" (ssh_address, 22),\n", | ||
" #ssh_private_key=\"</path/to/private/ssh/key>\",\n", | ||
" ### in my case, I used a password instead of a private key\n", | ||
" ssh_username=ssh_username,\n", | ||
" ssh_password=ssh_password, \n", | ||
" remote_bind_address=('localhost', 5432)) as server:\n", | ||
"\n", | ||
" server.start()\n", | ||
" print (\"server connected\")\n", | ||
"\n", | ||
" lp = str(server.local_bind_port)\n", | ||
" params = {\n", | ||
" 'database': 'alcdatathon2021',\n", | ||
" 'user': user,\n", | ||
" 'password': password,\n", | ||
" 'host': 'localhost',\n", | ||
" 'port': lp\n", | ||
" }\n", | ||
"\n", | ||
" conn = psycopg2.connect(**params)\n", | ||
" curs = conn.cursor()\n", | ||
"\n", | ||
" print (\"database connected\")\n", | ||
" curs.execute(\"SELECT * from aacomplete_kvalues limit 10;\")\n", | ||
" print(\"The number of parts: \", curs.rowcount)\n", | ||
" row = curs.fetchone()\n", | ||
"\n", | ||
" while row is not None:\n", | ||
" print(row)\n", | ||
" row = curs.fetchone()\n", | ||
"\n", | ||
"except:\n", | ||
" print (\"Connection Failed\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 31, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"## DBconnect with exasol\n", | ||
"C = pyexasol.connect(dsn = dsn, user = user, password = passwd)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 112, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"## SQL Query. I suggest, you do not make complicated queries here, \n", | ||
"## but you have all liberty and freedom to do that. But I personally\n", | ||
"## would suggest do as much as possible in the DB and save the result \n", | ||
"## in a physical table so that all you need to do is to simply \"read\" it here \n", | ||
"all_sets_sql = 'select * from TEST.TZ_STG_UN_DWH_LEADS_FULL_SETS'" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 113, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"## Export the SQL query result into pandas frame\n", | ||
"all_sets_df = C.export_to_pandas(all_sets_sql)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 114, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
" set name PROD_REF_COUNT\n", | ||
"0 UN all 456542\n", | ||
"1 UN union DWH 627973\n", | ||
"2 Total distinct 456567\n", | ||
"3 DWH all 171431\n", | ||
"4 DWH except UN 25\n", | ||
"5 UN and DWH common 171406\n", | ||
"6 UN except DWH 285136\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"print(all_sets_df)" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.8.5" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 4 | ||
} |
Oops, something went wrong.