Skip to content

Commit

Permalink
Merge pull request #1 from bdosn/Notebooks-till-third-workshop
Browse files Browse the repository at this point in the history
Notebooks till third workshop
  • Loading branch information
ihumaunkabir committed Jan 16, 2021
2 parents ddc9822 + c940fb6 commit 3cbd79a
Show file tree
Hide file tree
Showing 4 changed files with 1,396 additions and 0 deletions.
File renamed without changes.
6 changes: 6 additions & 0 deletions Part 2 Feature Engineering/config_student.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
db_creds:
ssh_address: 'yourAddress/IP'
ssh_username: 'yourUsername'
ssh_password: 'yourPassword'
user: 'dbUsername'
password: 'dbPassword'
208 changes: 208 additions & 0 deletions Part 2 Feature Engineering/dbconnect.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"ename": "SyntaxError",
"evalue": "invalid syntax (<ipython-input-1-ba5ec63f4dda>, line 1)",
"output_type": "error",
"traceback": [
"\u001b[0;36m File \u001b[0;32m\"<ipython-input-1-ba5ec63f4dda>\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m pip install psycopg2-binary\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n"
]
}
],
"source": [
"pip install psycopg2-binary\n",
"pip install sshtunnel\n",
"pip install sqlalchemy"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"import psycopg2\n",
"import sshtunnel as ssh\n",
"import pandas\n",
"import yaml"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"## Save the DB credentials and your username and password in a config file.\n",
"## You DO NOT want to write down your password directly on a script.\n",
"## Hence you should use a config file. I have chosen to work with .yml file.\n",
"## But there are also other options. Make sure you either specify the path \n",
"## of the config file or that the file is at the same location as this .ipynb script\n",
"\n",
"#ymlfile:\n",
"#db_creds:\n",
"# dsn: '10.2.221.111..116:8563'\n",
"# user: 'tanzeem.haque'\n",
"# password: 'get_your_own_pw_lol'\n",
"\n",
"#config_file = 'config_mentor.yml'\n",
"config_file = 'config_student.yml'\n",
"\n",
"with open(config_file, 'r') as ymlfile:\n",
" cfg = yaml.safe_load(ymlfile)\n",
"ssh_address = cfg['db_creds']['ssh_address']\n",
"ssh_username = cfg['db_creds']['ssh_username']\n",
"ssh_password = cfg['db_creds']['ssh_password']\n",
"user = cfg['db_creds']['user']\n",
"password = cfg['db_creds']['password']\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"server connected\n",
"database connected\n",
"The number of parts: 10\n",
"('2624022', '93', '13', None, None, None, '93', '13', '4.1', '4.1')\n",
"('2624022', None, '15', None, None, None, None, None, '4.1', '4.1')\n",
"('2624022', None, '17', None, None, None, None, None, '4.1', '4.1')\n",
"('2624022', None, '19', None, None, None, None, None, '4.1', '4.1')\n",
"('2624022', None, '21', None, None, None, None, None, '4.1', '4.1')\n",
"('2624022', None, '23', None, None, None, None, None, '4.1', '4.1')\n",
"('2624022', None, '25', None, None, None, None, None, '4.1', '4.1')\n",
"('2624022', None, '27', None, None, None, None, None, '4.1', '4.1')\n",
"('2624022', None, '29', None, None, None, None, None, '4.1', '4.1')\n",
"('2624022', None, '31', None, None, None, None, None, '4.1', '4.1')\n"
]
}
],
"source": [
"try:\n",
"\n",
" with ssh.SSHTunnelForwarder(\n",
" (ssh_address, 22),\n",
" #ssh_private_key=\"</path/to/private/ssh/key>\",\n",
" ### in my case, I used a password instead of a private key\n",
" ssh_username=ssh_username,\n",
" ssh_password=ssh_password, \n",
" remote_bind_address=('localhost', 5432)) as server:\n",
"\n",
" server.start()\n",
" print (\"server connected\")\n",
"\n",
" lp = str(server.local_bind_port)\n",
" params = {\n",
" 'database': 'alcdatathon2021',\n",
" 'user': user,\n",
" 'password': password,\n",
" 'host': 'localhost',\n",
" 'port': lp\n",
" }\n",
"\n",
" conn = psycopg2.connect(**params)\n",
" curs = conn.cursor()\n",
"\n",
" print (\"database connected\")\n",
" curs.execute(\"SELECT * from aacomplete_kvalues limit 10;\")\n",
" print(\"The number of parts: \", curs.rowcount)\n",
" row = curs.fetchone()\n",
"\n",
" while row is not None:\n",
" print(row)\n",
" row = curs.fetchone()\n",
"\n",
"except:\n",
" print (\"Connection Failed\")"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
"## DBconnect with exasol\n",
"C = pyexasol.connect(dsn = dsn, user = user, password = passwd)"
]
},
{
"cell_type": "code",
"execution_count": 112,
"metadata": {},
"outputs": [],
"source": [
"## SQL Query. I suggest, you do not make complicated queries here, \n",
"## but you have all liberty and freedom to do that. But I personally\n",
"## would suggest do as much as possible in the DB and save the result \n",
"## in a physical table so that all you need to do is to simply \"read\" it here \n",
"all_sets_sql = 'select * from TEST.TZ_STG_UN_DWH_LEADS_FULL_SETS'"
]
},
{
"cell_type": "code",
"execution_count": 113,
"metadata": {},
"outputs": [],
"source": [
"## Export the SQL query result into pandas frame\n",
"all_sets_df = C.export_to_pandas(all_sets_sql)"
]
},
{
"cell_type": "code",
"execution_count": 114,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" set name PROD_REF_COUNT\n",
"0 UN all 456542\n",
"1 UN union DWH 627973\n",
"2 Total distinct 456567\n",
"3 DWH all 171431\n",
"4 DWH except UN 25\n",
"5 UN and DWH common 171406\n",
"6 UN except DWH 285136\n"
]
}
],
"source": [
"print(all_sets_df)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Loading

0 comments on commit 3cbd79a

Please sign in to comment.