Skip to content

Commit

Permalink
refactoring db setup to load data into remote db (#22)
Browse files Browse the repository at this point in the history
  • Loading branch information
mrcnc authored Apr 13, 2017
1 parent efbc793 commit 83d6dd8
Show file tree
Hide file tree
Showing 8 changed files with 67 additions and 77 deletions.
30 changes: 4 additions & 26 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,39 +29,17 @@ Other nice to have features:

First you need to install PostgreSQL and PostGIS.

Once those are available, if you use `bash`, you can just run the `setup.sh`
script.
Once those are available, you can run the `setup.sh` script:

```
./setup.sh
```

If not, you can run the commands below to get the 311 data into your database.
If you are loading the data into a remote database, use environment variables
to tell the script where to load:

```
# identify where to save the two data files
call_data_file="./nola311_raw.csv"
neighborhood_areas_file="./neighborhood_areas.geo.json"
# create the db
createuser nola311
createdb nola311 -O nola311
# download the source data
/usr/local/bin/wget --show-progress -O "$call_data_file" "https://data.nola.gov/api/views/3iz8-nghx/rows.csv?accessType=DOWNLOAD"
/usr/local/bin/wget --show-progress -O "$neighborhood_areas_file" "https://portal.nolagis.opendata.arcgis.com/datasets/e7daa4c977d14e1b9e2fa4d7aff81e59_0.geojson"
# create the table and import the data from the csv
psql --set=call_data_file="$call_data_file" --set=neighborhood_areas_file="$neighborhood_areas_file" -U postgres -d nola311 -f setup/schema_and_csv_import.sql
# sanitize the table
psql -U nola311 -d nola311 -f setup/sanitize.sql
# create views
psql -U nola311 -d nola311 -f views/open_tickets_stats.sql
psql -U nola311 -d nola311 -f views/closed_tickets_stats.sql
psql -U nola311 -d nola311 -f views/call_records_for_review.sql
psql -U nola311 -d nola311 -f views/call_records_with_call_for_details.sql
DB_USER=nola311 DB_NAME=nola311 DB_HOST=c2rp0kujqp.us-east-1.rds.amazonaws.com ./setup.sh
```

## some sample queries
Expand Down
17 changes: 14 additions & 3 deletions setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,23 @@
#!/usr/bin/bash
#!/usr/local/bin/bash

# create the db
echo ""
echo "Creating nola311 user and database"
echo ""
createuser nola311
createdb nola311 -O nola311

# download the source data, setup tables, and import data
echo ""
echo "Downloading the source data"
echo ""
./setup/download_source_data.sh

# install remaining database objects
echo ""
echo "Setup tables and load data into database"
echo ""
./setup/load_data.sh

echo ""
echo "install remaining database objects"
echo ""
./setup/install.sh
7 changes: 0 additions & 7 deletions setup/download_source_data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,3 @@ fi

echo ""
echo "Data downloaded to $data_dir."
echo ""
echo "Setting up schemas and importing data"

# Call script to create schemas and import data, passing file locations
psql --set=call_data_file="$call_data_file" \
--set=neighborhood_areas_file="$neighborhood_areas_file" \
-U postgres -d nola311 -f setup/schema_and_csv_import.sql
16 changes: 10 additions & 6 deletions setup/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,16 @@
#!/usr/bin/bash
#!/usr/local/bin/bash

DB_USER=${DB_USER:-postgres}
DB_HOST=${DB_HOST:-localhost}
DB_NAME=${DB_NAME:-nola311}

# sanitize the tables
psql -U nola311 -d nola311 -f setup/sanitize_call_data.sql
psql -U nola311 -d nola311 -f setup/sanitize_neighborhood_data.sql
psql -U $DB_USER -d $DB_NAME -h $DB_HOST -f setup/sanitize_call_data.sql
psql -U $DB_USER -d $DB_NAME -h $DB_HOST -f setup/sanitize_neighborhood_data.sql

# create views
psql -U nola311 -d nola311 -f views/open_tickets_stats.sql -q
psql -U nola311 -d nola311 -f views/closed_tickets_stats.sql -q
psql -U nola311 -d nola311 -f views/call_records_for_review.sql -q
psql -U nola311 -d nola311 -f views/call_records_with_call_for_details.sql -q
psql -U $DB_USER -d $DB_NAME -h $DB_HOST -f views/open_tickets_stats.sql -q
psql -U $DB_USER -d $DB_NAME -h $DB_HOST -f views/closed_tickets_stats.sql -q
psql -U $DB_USER -d $DB_NAME -h $DB_HOST -f views/call_records_for_review.sql -q
psql -U $DB_USER -d $DB_NAME -h $DB_HOST -f views/call_records_with_call_for_details.sql -q
30 changes: 30 additions & 0 deletions setup/load_data.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/bin/bash
#!/usr/bin/bash
#!/usr/local/bin/bash
set -e

DB_USER=${DB_USER:-postgres}
DB_HOST=${DB_HOST:-localhost}
DB_NAME=${DB_NAME:-nola311}
data_dir=$(pwd)/data
neighborhood_areas_file=$data_dir/neighborhood_areas.geo.json
neighborhood_areas_file=$data_dir/neighborhood_areas.geo.json
call_data_file=$data_dir/nola311_raw.csv

echo ""
echo "Setting up schema to load data into $DB_NAME on $DB_HOST"
echo ""
psql -U $DB_USER -d $DB_NAME -h $DB_HOST -f setup/setup_schema.sql

echo ""
echo "Loading data from $call_data_file"
echo "This may take a minute..."
echo ""
psql -U $DB_USER -d $DB_NAME -h $DB_HOST \
-c "\copy nola311.calls_tmp(ticket_id,issue_type,ticket_created_date_time,ticket_closed_date_time,ticket_status,issue_description,street_address,neighborhood_district,council_district,city,state,zip_code,location,geom,latitude,longitude) from '$call_data_file' with csv header NULL as '';"

echo ""
echo "Loading data from $neighborhood_areas_file"
echo ""
psql -U $DB_USER -d $DB_NAME -h $DB_HOST \
-c "\copy nola311.neighborhood_areas_tmp(json_data) from '$neighborhood_areas_file' csv quote e'\x01' delimiter e'\x02';"
2 changes: 2 additions & 0 deletions setup/sanitize_call_data.sql
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,6 @@ create table nola311.calls as (

comment on table nola311.calls is 'This dataset represents calls to the City of New Orleans'' 311 Call Center';

grant all on schema nola311 to nola311;
grant all on all tables in schema nola311 to nola311;
grant all on all sequences in schema nola311 to nola311;
2 changes: 2 additions & 0 deletions setup/sanitize_neighborhood_data.sql
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,6 @@ from json_geo_records t

comment on table nola311.neighborhoods is 'This dataset contains geojson features and related metadata for New Orleans'' neighborhoods per GNO CDC.';

grant all on schema nola311 to nola311;
grant all on all tables in schema nola311 to nola311;
grant all on all sequences in schema nola311 to nola311;
40 changes: 5 additions & 35 deletions setup/schema_and_csv_import.sql → setup/setup_schema.sql
Original file line number Diff line number Diff line change
@@ -1,12 +1,7 @@
-- Note that this script must be called with the `--set=` option
-- passed to the `psql` command to set the following variables to
-- indicate the location of the relevant data files:
-- :'call_data_file'
-- :'neighborhood_areas_file'

create schema if not exists nola311;

create table if not exists nola311.calls_tmp (
drop table if exists nola311.calls_tmp cascade;
create table nola311.calls_tmp (
id serial primary key,
ticket_id numeric,
issue_type text,
Expand All @@ -26,41 +21,16 @@ create table if not exists nola311.calls_tmp (
longitude numeric
);

copy nola311.calls_tmp (
ticket_id,
issue_type,
ticket_created_date_time,
ticket_closed_date_time,
ticket_status,
issue_description,
street_address,
neighborhood_district,
council_district,
city,
state,
zip_code,
location,
geom,
latitude,
longitude
)
from :'call_data_file'
with csv header NULL as '';

create table if not exists nola311.neighborhood_areas_tmp (
drop table if exists nola311.neighborhood_areas_tmp cascade;
create table nola311.neighborhood_areas_tmp (
id serial primary key,
json_data jsonb,
created_at timestamp with time zone default current_timestamp
);

copy nola311.neighborhood_areas_tmp (
json_data
)
from :'neighborhood_areas_file'
csv quote e'\x01' delimiter e'\x02';

grant all on schema nola311 to nola311;
grant all on all tables in schema nola311 to nola311;
grant all on all sequences in schema nola311 to nola311;

alter role nola311 set search_path to nola311, public;
create extension if not exists postgis;

0 comments on commit 83d6dd8

Please sign in to comment.