accomondations_in_sri_lanka.py

# -*- coding: utf-8 -*-
"""Accomondations In Sri Lanka.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/18qtOSTe7Pst6Qhgv4C9FBBgXtBXH5Qbd

<h1 align="center"> <b>Identifying The Most Felicitous Areas For Tourism In Sri Lanka <b> </h1>

---

## **Accomondations Data Analysis & Visualizations**

---

### Import Libraries
"""

# Commented out IPython magic to ensure Python compatibility.
#import libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# %matplotlib inline
import seaborn as sns
import folium
from folium import Marker
from folium.plugins import MarkerCluster
import math
import io

"""### Getting know About the Dataset"""

# upload csv file to google colabaratory

'''
from google.colab import files
uploaded = files.upload()

'''

#read uploaded file as a pandas dataframe
#df = pd.read_csv(io.BytesIO(uploaded['Information for Accommodation.csv']))

df = pd.read_csv('Information for Accommodation.csv')

#display first 5 rows of the dataset

df.head()

df.tail()

df.shape

for i in df.columns:
  print(i)

df.isnull().sum()

df = df.dropna(axis=0, subset=['Grade'])

df.isnull().sum()

# Checking the datatypes
df.dtypes

"""## Exploratory Data Analysis & Visualization"""

# Display the count os each features avaialble in the dataset

def Countplot(col):

  plt.figure(figsize=(18,8.5))
  sns.countplot(df[str(col)],palette='husl')
  plt.xticks(rotation='vertical',size=10)
  plt.show()

columns = ['Type','Grade','District']

for i in columns:
  Countplot(i)

"""> ### **According To The Above Visualizations**


*   There Are Huge Number Of Guest Houses Type Of Accomondation places in Sri Lanka
*   Grade A , B Are the most available Grades for accomondation places & Has A Moderate Amount of Delux ,Superior & Standard Types Of Accomondations Areas 
*   Arround Kandy , Colombo & Galle located Most of the Accomondation places respectively ( Colombo is The most )

## Visualize Accomondations Places In A Map
"""

# Remove null values rows in laongitude & lattitude

df_cleaned = pd.DataFrame(df)

df_cleaned.dropna(subset=['Logitiute', 'Latitude'],inplace=True)

df_cleaned.isnull().sum()

# Create the map
map_1 = folium.Map(location=[7.8731,80.7718], tiles='cartodbpositron', zoom_start=8) 


mc1 = MarkerCluster()

for idx, row in df_cleaned.iterrows(): 

     if not math.isnan(row['Logitiute']) and not math.isnan(row['Latitude']):

   
        mc1.add_child(Marker(location=[row['Latitude'], row['Logitiute']], tooltip=row['Name'],popup=row['Grade']))

#add child to the map                                     
map_1.add_child(mc1)

# Display the map
map_1

"""> # According To The Above Map Visualizations :


*   There Are Huge Number Of places located in colombo kandy  galle & Anuradhapura areas. reasion this is these places are the most famous areas for tourism Beacuse they are lots ancient places & Tourismly Important Places are located on those areas

## Analyse Places In Colombo Galle & kandy
"""

c_k_g = pd.DataFrame(df[(df['District'] == 'Colombo') | (df['District'] == 'Kandy') | (df['District'] == 'Galle') ])
c_k_g.head()

"""## Colombo Area"""

k = pd.DataFrame(df[(df['District'] == 'Kandy')])
k.head()

k.shape

"""### There are 158 places arround kandy district

### Rooms Count Distribution Around Kandy District
"""

# Rooms Count Distribution Around Colombo Area

fig, ax = plt.subplots(1, 1, figsize = (20,10))
k['Rooms'].hist(bins=20, ax=ax,histtype='bar')
plt.xlabel('Number Of Rooms',fontsize=20)
plt.title('Rooms Count Distribution Around Kandy Area',fontsize=20)
plt.ylabel('Count',fontsize=20)

"""### Number Of places in Each Condition Type In kandy"""

def NCK():

  #print the count
  print(k['Grade'].value_counts()) 

  #visulize the count plot
  plt.figure(figsize=(18,8.5))
  sns.countplot(k['Grade'],palette='husl')
  plt.xticks(rotation='vertical',size=10)
  plt.show()


NCK()

"""## According To the Graph :

*  most of the places have rooms between 1 & 40 
*  there superior places are the most
* lets Compare this situation against Colombo & Galle

### In Colombo
"""

c = pd.DataFrame(df[(df['District'] == 'Colombo')])

fig, ax = plt.subplots(1, 1, figsize = (20,10))
c['Rooms'].hist(bins=10, ax=ax,histtype='bar')
plt.xlabel('Number Of Rooms',fontsize=20)
plt.title('Rooms Count Distribution Around Colombo Area',fontsize=20)
plt.ylabel('Count',fontsize=20)

"""### Number Of places in Each Condition Type In Colombo"""

def NCC():

  #print the count
  print(c['Grade'].value_counts()) 

  #visulize the count plot
  plt.figure(figsize=(18,8.5))
  sns.countplot(c['Grade'],palette='husl')
  plt.xticks(rotation='vertical',size=10)
  plt.show()


NCC()

"""### In Galle"""

g = pd.DataFrame(df[(df['District'] == 'Galle')])

fig, ax = plt.subplots(1, 1, figsize = (20,10))
g['Rooms'].hist(bins=10, ax=ax,histtype='bar')
plt.xlabel('Number Of Rooms',fontsize=20)
plt.title('Rooms Count Distribution Around Galle Area',fontsize=20)
plt.ylabel('Count',fontsize=20)

"""### Number Of places in Each Condition Type In Galle"""

def NCG():

  #print the count
  print(g['Grade'].value_counts()) 

  #visulize the count plot
  plt.figure(figsize=(18,8.5))
  sns.countplot(g['Grade'],palette='husl')
  plt.xticks(rotation='vertical',size=10)
  plt.show()


NCG()

"""## According to the above graphs 

* colombo & kandy areas has huge places rather than kandy because in kandy & colombo there are massive hotels. its mean five star or above hotels thats the reasions  for this.

* lets check it out by group the places by there Grade
"""

Gr_By_type_grade = pd.DataFrame(c_k_g.groupby(['District', 'Grade']).sum()['Rooms'].reset_index())

# group by each weather condition by using pivot table

Gr_By_type_grade_pivot = pd.DataFrame(Gr_By_type_grade.pivot(index='District', columns='Grade', values='Rooms').reset_index())
Gr_By_type_grade_pivot

Gr_By_type_grade_pivot.plot(x="District", y=["A", "B", "C","DELUXE","FIVE","FOUR","ONE","STANDARD","SUPERIOR","THREE","TWO" ], kind="bar",figsize=(18,10))
plt.xticks(rotation='vertical',size=15)
plt.show

"""# **Analyse The Travel Dining-Recreational Activities And Information Of Travel Agents**

---
"""

#import the data
#uploaded2 = files.upload()

#read uploaded file as a pandas dataframe

#df_2 = pd.read_csv(io.BytesIO(uploaded2['Places for Travel-Dining-Recreational activities and Information of travel agents.csv']))

df_2 = pd.read_csv('Places for Travel-Dining-Recreational activities and Information of travel agents.csv')

"""### Getting Know About The Dataset"""

df_2.head()

df_2.tail()

df_2.dtypes

df_2.isnull().sum()

df_2.shape

"""#### **Removing null values is not a suitable way to analysis the daya. because there are 1045 values null in Grade column** """

df_2['Grade'].value_counts()

"""## Exploratory Data Analysis & Visualization"""

def def_2_countplot(col):

  #print the count
  print(df_2[str(col)].value_counts()) 

  #visulize the count plot
  plt.figure(figsize=(18,8.5))
  sns.countplot(df_2[str(col)],palette='Set2')
  plt.xticks(rotation='vertical',size=14)
  plt.show()

columns_df_2 = ['Type','District']

for i in columns_df_2:
  def_2_countplot(i)

"""## Visualize How Each Type Distributed In Each Major District"""

# selct major district with places

df_2_c_k_g_G = pd.DataFrame(df_2[(df_2['District'] == 'Colombo') | (df_2['District'] == 'Kandy') | (df_2['District'] == 'Galle') | (df_2['District'] == 'Gampaha') ])
df_2_c_k_g_G.head()

# group  by each type & district

Df2_Gr_By_type_DIS = pd.DataFrame(df_2_c_k_g_G.groupby(['District', 'Type']).size().reset_index(name='Counts'))

Df2_Gr_By_type_DIS

# group by each type & district using pivot table

Df2_Gr_By_type_DIS_pivot = pd.DataFrame(Df2_Gr_By_type_DIS.pivot(index='District', columns='Type', values='Counts').reset_index())
Df2_Gr_By_type_DIS_pivot

Df2_Gr_By_type_DIS_pivot.plot(x="District", y=["Restaurants", "Spa & Wellness Centers","Spice Gardens","Tourist Shops","Travel Agents","Water Sports Centers" ], kind="bar",figsize=(18,10))
plt.xticks(rotation='vertical',size=15)
plt.show

"""> ### - There are huge number of resturants & travel agents in colombo districts

> ### - There are most of the ToUrist shops are located in kandy beasue kandy has thire own unique culture so there are lots handmade clohes & items ( bathicks , wood products etc.)

> ### - So according to the above facts we can deermine that Kandy , Colombo , Galle Are the best Places For Torism Considering Theire Availability Of Travel Dining-Recreational Activities And Travel Agents

# **Analysis Of Crimes Data In Sri Lanka**

---
"""

#import the data
#uploaded3 = files.upload()

"""### Getting Know About The Dataset"""

#df_3 = pd.read_excel(io.BytesIO(uploaded3['Crime_Data _All.xlsx']))

df_3 = pd.read_excel('Crime_Data _All.xlsx')

df_3.head()

df_3.tail(1)

df_3.dtypes

df_3.describe()

"""## Type Of Crimes"""

df_3['Category'].value_counts()

"""## Cleaning The Data"""

#rename the columns
df_3.columns =  ['year', 'category','Anuradhapura','Ampara', 'Badulla','Batticalo', 'Colombo','Galle','Gampaha', 
                 'Jaffna','Kalutara','Kandy', 'Kegalle','Kilinochchi', 'Kurunegala','Mannar','Matale', 'Matara','Monaragala',
                 'Mulllativu', 'Nuwara Eliya','Polonnaruwa', 'Puttlam','Ratnapura','Hambanthota', 'Trincomalee','Vavuniya']

#check the result
df_3.head(2)

"""## Filter Type Of crimes That Mainly Affect To Tourists"""

im_cr = pd.DataFrame(df_3[(df_3['category'] == 'Riot') | (df_3['category'] == 'Counterfeiting Currency') | (df_3['category'] == 'Extortion') | (df_3['category'] == 'Robbery') ])
im_cr

"""## Get Sum Of The Each Crime All Of The Distrcits"""

column_list = ['Anuradhapura','Ampara', 'Badulla','Batticalo', 'Colombo','Galle','Gampaha', 
              'Jaffna','Kalutara','Kandy', 'Kegalle','Kilinochchi', 'Kurunegala','Mannar','Matale', 'Matara','Monaragala',
               'Mulllativu', 'Nuwara Eliya','Polonnaruwa', 'Puttlam','Ratnapura','Hambanthota', 'Trincomalee','Vavuniya']

im_cr["Crime_sum"] = im_cr[column_list].sum(axis=1)

im_cr

TR_im_cr = pd.DataFrame(im_cr.pivot(index='year', columns='category', values='Crime_sum').reset_index())
TR_im_cr

TR_im_cr.plot(x="year", y=["Extortion", "Riot", "Robbery","Counterfeiting Currency"], kind="bar",figsize=(14,6))
plt.xticks(rotation='vertical',size=15)
plt.show

"""## **According To The Above Insights**
> ### - Robberies Are Higher In All The Years
> ### - Riots Are Lower In All The Years
> ### - Extortion & Counterfeiting Currency Has Moderate Amount During The Years When Compare To The Other

## **How Crimes Distributed In Each Major Cities That Has Higher Tourist Activities, Tourist Shops , Guides & Accomondations ( In 2012 )**
"""

#Filter major cities
im_cr_yr = pd.DataFrame(im_cr[(im_cr['year'] == 2012)])
im_cr_Ds =  im_cr_yr[['category','Colombo','Galle','Gampaha','Kandy']]

#tranform the datset
im_cr_Ds_ts = im_cr_Ds.transpose().reset_index()
im_cr_Ds_ts

# Rename the columns names
im_cr_Ds_ts.columns = ['category', 'Riot',	'Robbery' ,	'Extortion'	, 'Counterfeiting Currency']

# drop wrong rows 
im_cr_Ds_ts.drop(0,inplace=True)

# check the above results
im_cr_Ds_ts

im_cr_Ds_ts.plot(x="category", y=["Riot" , "Robbery" , "Extortion" , "Counterfeiting Currency"], kind="bar",figsize=(18,10))

"""## **According To Above Insights**
> ### - Colombo Gampaha Has a Higher Crimes Than Other Districts Respectively.
> ### - Overaly Kandy & Galle Has  Has Lower Crimes.
> ### - Extortion	Counterfeiting Currency are most common crime against in Tourists.

---

# **Conclusion**

## When Considering :
> ## Accomondations Data Analysis & Visualizations
* Grade A , B Are the most available Grades for accomondation places. 
* Has A Moderate Amount of Delux ,Superior & Standard Types Of Grades
* Arround Colombo , kandy & Galle located Most of the Accomondation places respectively ( Colombo is The most ).
* Galle & Colombo Has Every Type Of Accommondation Places With higher Grade

> ## Travel Dining-Recreational Activities And Information Of Travel Agents Data Analyse & visualization
* There are huge number of resturants & travel agents in colombo  & Gampaha districts
* There are most of the Tourist shops are located in kandy beasue kandy has thire own unique culture so there are lots handmade clohes & items ( bathicks , wood products etc.).
* So according to the above facts we can deermine that Colombo , Kandy , Galle Are the best Places For Torism Considering Theire Availability Of Travel Dining-Recreational Activities And Travel Agents Respectively
* Colombo & Gampaha Has The Highest Resturants & Travel Agents Respectively So In Colombo & Gampaha Foriegns Tourists Can Get Good Grade Resturants Easily & Can Hired A Travel Agents Easily.

> ## Crimes Data Analysis & Visualization
* Colombo Gampaha Has a Higher Crimes Than Other Districts Respectively.
* Overaly Kandy & Galle Has Has Lower Crimes.
* Extortion Counterfeiting Currency are most common crime against in Tourists

## So As a Result Of That We Can Reccommend **Galle** As The **Best** & **Safe** Place To Visit For The Tourists
"""

# Commented out IPython magic to ensure Python compatibility.
# # export notbook file as HTML file
# 
# '''
# %%shell
# jupyter nbconvert --to html /content/Accomondations_In_Sri_Lanka.ipynb
# 
# '''