-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
etl pipeline, machine learning, visualization
- Loading branch information
0 parents
commit 0db1b50
Showing
9 changed files
with
608 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
.DS_Store | ||
*.csv | ||
*.pkl | ||
__pycache__ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
# Disaster Response Pipeline Project | ||
### Table of Contents | ||
|
||
|
||
1. [Project Motivation](#overview) | ||
2. [Installation](#installation) | ||
3. [Instructions](#instructions) | ||
4. [File Descriptions](#files) | ||
5. [Discussion](#discussion ) | ||
6. [Licensing, Authors, and Acknowledgements](#licensing) | ||
|
||
### Project Overview<a name="overview"></a> | ||
## Installation <a name="installation"></a> | ||
The code was tested using Python version 3.9. | ||
For other necessary libraries please use requirements.txt | ||
```bash | ||
pip install -r requirements.txt | ||
``` | ||
|
||
### Instructions<a name="instructions"></a>: | ||
1. Run the following commands in the project's root directory to set up your database and model. | ||
|
||
- To run ETL pipeline that cleans data and stores in database | ||
`python data/process_data.py data/disaster_messages.csv data/disaster_categories.csv data/DisasterResponse.db` | ||
- To run ML pipeline that trains classifier and saves | ||
1. In case you with to tune the parameter (GridSearchCV) | ||
`python models/train_classifier.py data/DisasterResponse.db models/classifier.pkl True` | ||
2. Otherwise, the model will take for training the optimized parameter | ||
`python models/train_classifier.py data/DisasterResponse.db models/classifier.pkl False` | ||
|
||
2. Run the following command in the app's directory to run your web app. | ||
`python run.py` | ||
|
||
3. Go to https://0.0.0.0:3001/ | ||
|
||
## File Descriptions <a name="files"></a> | ||
## Discussion <a name="discussion"></a> | ||
## Licensing, Authors, Acknowledgements!!!!<a name="licensing"></a> | ||
|
||
|
||
Must give credit to Airbnb. You can find the Licensing for the data and more useful information at Airbnb [here](https://insideairbnb.com/get-the-data.html) or at the Kaggle [here](https://www.kaggle.com/airbnb/seattle). |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
import json | ||
from string import punctuation | ||
import plotly | ||
from flask import Flask | ||
from flask import render_template, request, jsonify | ||
from plotly.graph_objs import Bar | ||
import joblib | ||
import pandas as pd | ||
from sqlalchemy import create_engine | ||
from wrangling_script.wrangle_data import return_figures | ||
from nltk.tokenize import word_tokenize | ||
from nltk.stem import WordNetLemmatizer | ||
app = Flask(__name__) | ||
|
||
def tokenize(text): | ||
custom_punctuation = punctuation.replace("'", "") | ||
text = text.translate(str.maketrans('', '', custom_punctuation)) | ||
tokens = word_tokenize(text) | ||
lemmatizer = WordNetLemmatizer() | ||
|
||
clean_tokens = [] | ||
for tok in tokens: | ||
clean_tok = lemmatizer.lemmatize(tok).lower().strip() | ||
clean_tokens.append(clean_tok) | ||
|
||
return clean_tokens | ||
|
||
|
||
# load model | ||
model = joblib.load("../models/classifier.pkl") | ||
# load data | ||
engine = create_engine('sqlite:https:///../data/DisasterResponse.db') | ||
df = pd.read_sql_table('DisasterResponse', engine) | ||
|
||
# index webpage displays cool visuals and receives user input text for model | ||
@app.route('/') | ||
@app.route('/index') | ||
def index(): | ||
|
||
graphs = return_figures(df) | ||
# encode plotly graphs in JSON | ||
ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] | ||
graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) | ||
|
||
# render web page with plotly graphs | ||
return render_template('master.html', ids=ids, graphJSON=graphJSON) | ||
|
||
|
||
# web page that handles user query and displays model results | ||
@app.route('/go') | ||
def go(): | ||
# save user input in query | ||
query = request.args.get('query', '') | ||
|
||
# use model to predict classification for query | ||
classification_labels = model.predict([query])[0] | ||
classification_results = dict(zip(df.columns[4:], classification_labels)) | ||
|
||
# This will render the go.html Please see that file. | ||
return render_template( | ||
'go.html', | ||
query=query, | ||
classification_result=classification_results | ||
) | ||
|
||
|
||
def main(): | ||
app.run(host='0.0.0.0', port=3001, debug=True) | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
{% extends "master.html" %} | ||
{% block title %}Results{% endblock %} | ||
|
||
{% block message %} | ||
<hr /> | ||
<h4 class="text-center">MESSAGE</h4> | ||
<p class="text-center"><i>{{query}}</i></p> | ||
{% endblock %} | ||
|
||
{% block content %} | ||
<h1 class="text-center">Result</h1> | ||
<ul class="list-group"> | ||
{% for category, classification in classification_result.items() %} | ||
{% if classification == 1 %} | ||
<li class="list-group-item list-group-item-success text-center">{{category.replace('_', ' ').title()}}</li> | ||
{% else %} | ||
<li class="list-group-item list-group-item-dark text-center">{{category.replace('_', ' ').title()}}</li> | ||
{% endif %} | ||
{% endfor %} | ||
|
||
</div> | ||
</div> | ||
|
||
{% endblock %} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
<!doctype html> | ||
<html lang="en"> | ||
<head> | ||
<meta charset="utf-8"> | ||
<meta http-equiv="X-UA-Compatible" content="IE=edge"> | ||
<meta name="viewport" content="width=device-width, initial-scale=1"> | ||
|
||
<title>Disasters</title> | ||
|
||
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css" integrity="sha384-BVYiiSIFeK1dGmJRAkycuHAHRg32OmUcww7on3RYdg4Va+PmSTsz/K68vbdEjh4u" crossorigin="anonymous"> | ||
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap-theme.min.css" integrity="sha384-rHyoN1iRsVXV4nD0JutlnGaslCJuC7uwjduW9SVrLvRYooPp2bWYgmgJQIXwl/Sp" crossorigin="anonymous"> | ||
<script src="https://ajax.googleapis.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script> | ||
<script src="https://cdn.plot.ly/plotly-latest.min.js"></script> | ||
</head> | ||
|
||
<body> | ||
|
||
<nav class="navbar navbar-inverse navbar-fixed-top"> | ||
<div class="container"> | ||
<div class="navbar-header"> | ||
<a class="navbar-brand" href="/">Disaster Response Project</a> | ||
</div> | ||
<div id="navbar" class="collapse navbar-collapse"> | ||
<ul class="nav navbar-nav"> | ||
<li><a href="https://www.udacity.com/">Made with Udacity</a></li> | ||
<li><a href="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/">Contact</a></li> | ||
</ul> | ||
</div> | ||
</div> | ||
</nav> | ||
|
||
|
||
<div class="jumbotron"> | ||
<div class="container"> | ||
<h1 class="text-center">Disaster Response Project</h1> | ||
<p class="text-center">Analyzing message data for disaster response</p> | ||
<hr /> | ||
|
||
<div class="row"> | ||
<div class="col-lg-12 form-group-lg"> | ||
<form action="/go" method="get"> | ||
<input type="text" class="form-control form-control-lg" name="query" placeholder="Enter a message to classify"> | ||
<div class="col-lg-offset-5"> | ||
<button type="submit" class="btn btn-lg btn-success">Classify Message</button> | ||
</div> | ||
</form> | ||
</div> | ||
</div> | ||
|
||
{% block message %} | ||
{% endblock %} | ||
</div> | ||
</div> | ||
|
||
<div class="container mt-3 text-center"> | ||
{% block content %} | ||
<div class="page-header"> | ||
<h1 class="text-center">Overview of Training Dataset</h1> | ||
</div> | ||
{% endblock %} | ||
|
||
{% for id in ids %} | ||
<div id="{{id}}"></div> | ||
{% endfor %} | ||
</div> | ||
|
||
<script type="text/javascript"> | ||
const graphs = {{graphJSON | safe}}; | ||
const ids = {{ids | safe}}; | ||
for(let i in graphs) { | ||
Plotly.plot(ids[i], graphs[i].data, graphs[i].layout); | ||
} | ||
</script> | ||
|
||
</body> | ||
</html> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
|
||
import plotly.graph_objects as go | ||
|
||
|
||
|
||
def return_figures(df): | ||
""" | ||
Prepare and visualize data | ||
INPUT | ||
df: pandas dataframe | ||
OUTPUT | ||
None | ||
""" | ||
|
||
# extract data needed for visuals | ||
genre_counts = df.groupby('genre').count()['message'] | ||
genre_names = list(genre_counts.index) | ||
request_counts = df[df['request'] == 1].groupby('genre').count()['message'] | ||
offer_counts = df[df['offer'] == 1].groupby('genre').count()['message'] | ||
|
||
|
||
graph_one =[ | ||
go.Bar( | ||
x=genre_names, | ||
y=genre_counts, | ||
name = 'Total' | ||
|
||
), | ||
go.Bar( | ||
x=genre_names, | ||
y= request_counts, | ||
name = 'Request' | ||
), | ||
go.Bar( | ||
x=genre_names, | ||
y= offer_counts, | ||
name = 'Offer' | ||
) | ||
] | ||
|
||
|
||
|
||
|
||
layout_one = dict(title='Distribution of Message Genres and Help Type', | ||
xaxis=dict(title="Count"), | ||
yaxis=dict(title="Genre"), | ||
height=500, | ||
width=1400, | ||
autosize=False | ||
) | ||
|
||
# count number of occurrences 1 for each label | ||
count_one_occurence = {} | ||
for col_name in df.columns[4:]: | ||
cnt = df[df[col_name] == 1].shape[0] | ||
col_name = col_name.replace('_', ' ') | ||
count_one_occurence[col_name] = cnt | ||
count_one_occurence = dict( | ||
sorted(count_one_occurence.items(), key=lambda item: item[1], reverse=True)) | ||
graph_two = [] | ||
|
||
graph_two.append( | ||
go.Bar( | ||
x=list(count_one_occurence.keys()), | ||
y=list(count_one_occurence.values()), | ||
|
||
) | ||
) | ||
|
||
layout_two = dict(title='Distribution of Disaster Types', | ||
yaxis=dict(title="Count"), | ||
xaxis=dict(title="Labels"), | ||
height=500, | ||
width=1400, | ||
autosize=False | ||
) | ||
|
||
graph_three = [] | ||
graph_three .append( | ||
go.Pie( | ||
labels=list(count_one_occurence.keys()), | ||
values=list(count_one_occurence.values()), | ||
|
||
) | ||
) | ||
|
||
layout_three = dict(title='Distribution of Disaster Types in Percent ', | ||
yaxis=dict(title="Count"), | ||
xaxis=dict(title="Labels"), | ||
height=900, | ||
width=1200, | ||
autosize=False | ||
) | ||
|
||
# append all charts | ||
figures = [] | ||
figures.append(dict(data=graph_one, layout=layout_one)) | ||
figures.append(dict(data=graph_two, layout=layout_two)) | ||
figures.append(dict(data=graph_three, layout=layout_three)) | ||
#figures.append(dict(data=graph_four, layout=layout_four)) | ||
return figures |
Binary file not shown.
Oops, something went wrong.