Skip to content

Commit

Permalink
etl pipeline, machine learning, visualization
Browse files Browse the repository at this point in the history
  • Loading branch information
LN5user committed Jun 11, 2021
0 parents commit 0db1b50
Show file tree
Hide file tree
Showing 9 changed files with 608 additions and 0 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
.DS_Store
*.csv
*.pkl
__pycache__
41 changes: 41 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Disaster Response Pipeline Project
### Table of Contents


1. [Project Motivation](#overview)
2. [Installation](#installation)
3. [Instructions](#instructions)
4. [File Descriptions](#files)
5. [Discussion](#discussion )
6. [Licensing, Authors, and Acknowledgements](#licensing)

### Project Overview<a name="overview"></a>
## Installation <a name="installation"></a>
The code was tested using Python version 3.9.
For other necessary libraries please use requirements.txt
```bash
pip install -r requirements.txt
```

### Instructions<a name="instructions"></a>:
1. Run the following commands in the project's root directory to set up your database and model.

- To run ETL pipeline that cleans data and stores in database
`python data/process_data.py data/disaster_messages.csv data/disaster_categories.csv data/DisasterResponse.db`
- To run ML pipeline that trains classifier and saves
1. In case you with to tune the parameter (GridSearchCV)
`python models/train_classifier.py data/DisasterResponse.db models/classifier.pkl True`
2. Otherwise, the model will take for training the optimized parameter
`python models/train_classifier.py data/DisasterResponse.db models/classifier.pkl False`

2. Run the following command in the app's directory to run your web app.
`python run.py`

3. Go to https://0.0.0.0:3001/

## File Descriptions <a name="files"></a>
## Discussion <a name="discussion"></a>
## Licensing, Authors, Acknowledgements!!!!<a name="licensing"></a>


Must give credit to Airbnb. You can find the Licensing for the data and more useful information at Airbnb [here](https://insideairbnb.com/get-the-data.html) or at the Kaggle [here](https://www.kaggle.com/airbnb/seattle).
72 changes: 72 additions & 0 deletions app/run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import json
from string import punctuation
import plotly
from flask import Flask
from flask import render_template, request, jsonify
from plotly.graph_objs import Bar
import joblib
import pandas as pd
from sqlalchemy import create_engine
from wrangling_script.wrangle_data import return_figures
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
app = Flask(__name__)

def tokenize(text):
custom_punctuation = punctuation.replace("'", "")
text = text.translate(str.maketrans('', '', custom_punctuation))
tokens = word_tokenize(text)
lemmatizer = WordNetLemmatizer()

clean_tokens = []
for tok in tokens:
clean_tok = lemmatizer.lemmatize(tok).lower().strip()
clean_tokens.append(clean_tok)

return clean_tokens


# load model
model = joblib.load("../models/classifier.pkl")
# load data
engine = create_engine('sqlite:https:///../data/DisasterResponse.db')
df = pd.read_sql_table('DisasterResponse', engine)

# index webpage displays cool visuals and receives user input text for model
@app.route('/')
@app.route('/index')
def index():

graphs = return_figures(df)
# encode plotly graphs in JSON
ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)]
graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder)

# render web page with plotly graphs
return render_template('master.html', ids=ids, graphJSON=graphJSON)


# web page that handles user query and displays model results
@app.route('/go')
def go():
# save user input in query
query = request.args.get('query', '')

# use model to predict classification for query
classification_labels = model.predict([query])[0]
classification_results = dict(zip(df.columns[4:], classification_labels))

# This will render the go.html Please see that file.
return render_template(
'go.html',
query=query,
classification_result=classification_results
)


def main():
app.run(host='0.0.0.0', port=3001, debug=True)


if __name__ == '__main__':
main()
24 changes: 24 additions & 0 deletions app/templates/go.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{% extends "master.html" %}
{% block title %}Results{% endblock %}

{% block message %}
<hr />
<h4 class="text-center">MESSAGE</h4>
<p class="text-center"><i>{{query}}</i></p>
{% endblock %}

{% block content %}
<h1 class="text-center">Result</h1>
<ul class="list-group">
{% for category, classification in classification_result.items() %}
{% if classification == 1 %}
<li class="list-group-item list-group-item-success text-center">{{category.replace('_', ' ').title()}}</li>
{% else %}
<li class="list-group-item list-group-item-dark text-center">{{category.replace('_', ' ').title()}}</li>
{% endif %}
{% endfor %}

</div>
</div>

{% endblock %}
76 changes: 76 additions & 0 deletions app/templates/master.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">

<title>Disasters</title>

<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css" integrity="sha384-BVYiiSIFeK1dGmJRAkycuHAHRg32OmUcww7on3RYdg4Va+PmSTsz/K68vbdEjh4u" crossorigin="anonymous">
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap-theme.min.css" integrity="sha384-rHyoN1iRsVXV4nD0JutlnGaslCJuC7uwjduW9SVrLvRYooPp2bWYgmgJQIXwl/Sp" crossorigin="anonymous">
<script src="https://ajax.googleapis.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
<script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
</head>

<body>

<nav class="navbar navbar-inverse navbar-fixed-top">
<div class="container">
<div class="navbar-header">
<a class="navbar-brand" href="/">Disaster Response Project</a>
</div>
<div id="navbar" class="collapse navbar-collapse">
<ul class="nav navbar-nav">
<li><a href="https://www.udacity.com/">Made with Udacity</a></li>
<li><a href="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/">Contact</a></li>
</ul>
</div>
</div>
</nav>


<div class="jumbotron">
<div class="container">
<h1 class="text-center">Disaster Response Project</h1>
<p class="text-center">Analyzing message data for disaster response</p>
<hr />

<div class="row">
<div class="col-lg-12 form-group-lg">
<form action="/go" method="get">
<input type="text" class="form-control form-control-lg" name="query" placeholder="Enter a message to classify">
<div class="col-lg-offset-5">
<button type="submit" class="btn btn-lg btn-success">Classify Message</button>
</div>
</form>
</div>
</div>

{% block message %}
{% endblock %}
</div>
</div>

<div class="container mt-3 text-center">
{% block content %}
<div class="page-header">
<h1 class="text-center">Overview of Training Dataset</h1>
</div>
{% endblock %}

{% for id in ids %}
<div id="{{id}}"></div>
{% endfor %}
</div>

<script type="text/javascript">
const graphs = {{graphJSON | safe}};
const ids = {{ids | safe}};
for(let i in graphs) {
Plotly.plot(ids[i], graphs[i].data, graphs[i].layout);
}
</script>

</body>
</html>
102 changes: 102 additions & 0 deletions app/wrangling_script/wrangle_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@

import plotly.graph_objects as go



def return_figures(df):
"""
Prepare and visualize data
INPUT
df: pandas dataframe
OUTPUT
None
"""

# extract data needed for visuals
genre_counts = df.groupby('genre').count()['message']
genre_names = list(genre_counts.index)
request_counts = df[df['request'] == 1].groupby('genre').count()['message']
offer_counts = df[df['offer'] == 1].groupby('genre').count()['message']


graph_one =[
go.Bar(
x=genre_names,
y=genre_counts,
name = 'Total'

),
go.Bar(
x=genre_names,
y= request_counts,
name = 'Request'
),
go.Bar(
x=genre_names,
y= offer_counts,
name = 'Offer'
)
]




layout_one = dict(title='Distribution of Message Genres and Help Type',
xaxis=dict(title="Count"),
yaxis=dict(title="Genre"),
height=500,
width=1400,
autosize=False
)

# count number of occurrences 1 for each label
count_one_occurence = {}
for col_name in df.columns[4:]:
cnt = df[df[col_name] == 1].shape[0]
col_name = col_name.replace('_', ' ')
count_one_occurence[col_name] = cnt
count_one_occurence = dict(
sorted(count_one_occurence.items(), key=lambda item: item[1], reverse=True))
graph_two = []

graph_two.append(
go.Bar(
x=list(count_one_occurence.keys()),
y=list(count_one_occurence.values()),

)
)

layout_two = dict(title='Distribution of Disaster Types',
yaxis=dict(title="Count"),
xaxis=dict(title="Labels"),
height=500,
width=1400,
autosize=False
)

graph_three = []
graph_three .append(
go.Pie(
labels=list(count_one_occurence.keys()),
values=list(count_one_occurence.values()),

)
)

layout_three = dict(title='Distribution of Disaster Types in Percent ',
yaxis=dict(title="Count"),
xaxis=dict(title="Labels"),
height=900,
width=1200,
autosize=False
)

# append all charts
figures = []
figures.append(dict(data=graph_one, layout=layout_one))
figures.append(dict(data=graph_two, layout=layout_two))
figures.append(dict(data=graph_three, layout=layout_three))
#figures.append(dict(data=graph_four, layout=layout_four))
return figures
Binary file added data/DisasterResponse.db
Binary file not shown.
Loading

0 comments on commit 0db1b50

Please sign in to comment.