Skip to content

Commit

Permalink
Fix models, load data from s3
Browse files Browse the repository at this point in the history
Is fairly slow but at least it works now. Can ignore for a while
  • Loading branch information
RobertLucey committed Dec 30, 2021
1 parent 111232e commit c83f56c
Show file tree
Hide file tree
Showing 9 changed files with 533 additions and 420 deletions.
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
boto3
road-collisions-base
6 changes: 1 addition & 5 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
)

INSTALL_REQUIRES = (
'boto3',
'road-collisions-base'
)

Expand All @@ -17,11 +18,6 @@
packages=find_packages('src'),
package_dir={'': 'src'},
install_requires=INSTALL_REQUIRES,
package_data={
'road_collisions_uk': [
'resources/uk/uk.csv.tgz'
]
},
entry_points={
'console_scripts': [
'load_road_collisions_uk = road_collisions_uk.bin.load:main',
Expand Down
3 changes: 3 additions & 0 deletions src/road_collisions_uk/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from road_collisions_uk.download import ensure_data_downloaded

ensure_data_downloaded()
3 changes: 2 additions & 1 deletion src/road_collisions_uk/bin/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@


def main():
collisions = Collisions.load_all(region='uk')
print('NOTE: Since UK data is so large, only loading data from 2020')
collisions = Collisions.load_all(region='uk', year=2020)

logger.info('Loaded %s collisions', (len(collisions)))
logger.info('Do something with the data in the variable \'collisions\'...')
Expand Down
43 changes: 43 additions & 0 deletions src/road_collisions_uk/download.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import os

import boto3
from botocore import UNSIGNED
from botocore.client import Config


def download_data():
os.makedirs(
'/opt/road_collisions/uk',
exist_ok=True
)

s3 = boto3.client(
's3',
region_name='eu-west-1',
config=Config(signature_version=UNSIGNED)
)

paginator = s3.get_paginator('list_objects')
for result in paginator.paginate(Bucket='road-collisions-uk'):
for key in result.get('Contents', []):
if os.path.exists(os.path.join('/opt/road_collisions/uk', key['Key'])):
continue
s3.download_file(
'road-collisions-uk',
key['Key'],
os.path.join('/opt/road_collisions/uk', key['Key'])
)
os.utime(
os.path.join(
'/opt/road_collisions/uk',
key['Key']
),
(
key['LastModified'].timestamp(),
key['LastModified'].timestamp()
)
)


def ensure_data_downloaded():
download_data()
105 changes: 105 additions & 0 deletions src/road_collisions_uk/models/casualty.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
class Casualties():

def __init__(self, *args, **kwargs):
self._data = kwargs.get('data', [])

def __getitem__(self, i):
return self._data[i]

def __iter__(self):
return (i for i in self._data)

def __len__(self):
return len(self._data)

def append(self, data):
self._data.append(data)

def extend(self, data):
self._data.extend(data)

def serialize(self):
return [
d.serialize() for d in self
]

@staticmethod
def parse(data):
casualties = Casualties()
if isinstance(data, list):
for d in data:
if isinstance(d, dict):
casualties.append(
Casualty(
**d
)
)
else:
raise NotImplementedError()
elif isinstance(data, dict):
casualties.append(
Casualty(
**data
)
)
else:
raise NotImplementedError()

return casualties


class Casualty():

__slots__ = [
'vehicle_reference',
'casualty_reference',
'casualty_class',
'sex_of_casualty',
'age_of_casualty',
'age_band_of_casualty',
'casualty_severity',
'pedestrian_location',
'pedestrian_movement',
'car_passenger',
'bus_or_coach_passenger',
'pedestrian_road_maintenance_worker',
'casualty_type',
'casualty_home_area_type',
'casualty_imd_decile',
]

def __init__(self, *args, **kwargs):
self.vehicle_reference = kwargs['vehicle_reference']
self.casualty_reference = kwargs['casualty_reference']
self.casualty_class = kwargs['casualty_class']
self.sex_of_casualty = kwargs['sex_of_casualty']
self.age_of_casualty = kwargs['age_of_casualty']
self.age_band_of_casualty = kwargs['age_band_of_casualty']
self.casualty_severity = kwargs['casualty_severity']
self.pedestrian_location = kwargs['pedestrian_location']
self.pedestrian_movement = kwargs['pedestrian_movement']
self.car_passenger = kwargs['car_passenger']
self.bus_or_coach_passenger = kwargs['bus_or_coach_passenger']
self.pedestrian_road_maintenance_worker = kwargs['pedestrian_road_maintenance_worker']
self.casualty_type = kwargs['casualty_type']
self.casualty_home_area_type = kwargs['casualty_home_area_type']
self.casualty_imd_decile = kwargs['casualty_imd_decile']

def serialize(self):
return {
'vehicle_reference': self.vehicle_reference,
'casualty_reference': self.casualty_reference,
'casualty_class': self.casualty_class,
'sex_of_casualty': self.sex_of_casualty,
'age_of_casualty': self.age_of_casualty,
'age_band_of_casualty': self.age_band_of_casualty,
'casualty_severity': self.casualty_severity,
'pedestrian_location': self.pedestrian_location,
'pedestrian_movement': self.pedestrian_movement,
'car_passenger': self.car_passenger,
'bus_or_coach_passenger': self.bus_or_coach_passenger,
'pedestrian_road_maintenance_worker': self.pedestrian_road_maintenance_worker,
'casualty_type': self.casualty_type,
'casualty_home_area_type': self.casualty_home_area_type,
'casualty_imd_decile': self.casualty_imd_decile,
}
Loading

0 comments on commit c83f56c

Please sign in to comment.