Skip to content

Commit

Permalink
Meteostat 1.5.11 (#82)
Browse files Browse the repository at this point in the history
* Bugfix and new endpoint path function  (#80)

* Fixes bug with chunked datasets in hourly requests and introduce a function to generate the endpoint path within a tested function

* Update linter.yml

Workaround for super-linter bug

Co-authored-by: Christian Lamprecht <[email protected]>

* Minor adaptions to better understand cache behaviour (#81)

* Restructuring & Linting

* misplaced-comparison-constant

* Wrap up v1.5.11

* Finalize 1.5.11

Co-authored-by: Daniel Lassahn <[email protected]>
  • Loading branch information
clampr and meteoDaniel committed Jan 8, 2022
1 parent 69ea420 commit 96766a0
Show file tree
Hide file tree
Showing 17 changed files with 283 additions and 76 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/linter.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ jobs:
# Run Linter against code base #
################################
- name: Lint Code Base
uses: github/super-linter@v3
uses: github/super-linter@v3.17.0
env:
VALIDATE_ALL_CODEBASE: false
DEFAULT_BRANCH: master
Expand Down
2 changes: 1 addition & 1 deletion meteostat/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
"""

__appname__ = 'meteostat'
__version__ = '1.5.10'
__version__ = '1.5.11'

from .interface.base import Base
from .interface.timeseries import Timeseries
Expand Down
5 changes: 2 additions & 3 deletions meteostat/core/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import hashlib


def get_file_path(
def get_local_file_path(
cache_dir: str,
cache_subdir: str,
path: str
Expand All @@ -25,8 +25,7 @@ def get_file_path(
# Get file ID
file = hashlib.md5(path.encode('utf-8')).hexdigest()

# Return path
return cache_dir + os.sep + cache_subdir + os.sep + file
return f"{cache_dir}/{cache_subdir}/{file}"


def file_in_cache(
Expand Down
3 changes: 2 additions & 1 deletion meteostat/core/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,8 @@ def load_handler(
compression='gzip',
names=columns,
dtype=types,
parse_dates=parse_dates)
parse_dates=parse_dates
)

# Force datetime conversion
if coerce_dates:
Expand Down
Empty file.
22 changes: 22 additions & 0 deletions meteostat/enumerations/granularity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
"""
Granularity Enumeration
Meteorological data provided by Meteostat (https://dev.meteostat.net)
under the terms of the Creative Commons Attribution-NonCommercial
4.0 International Public License.
The code is licensed under the MIT license.
"""

from enum import Enum


class Granularity(Enum):
"""
The different levels of time series granularity
"""

HOURLY = 'hourly'
DAILY = 'daily'
MONTHLY = 'monthly'
NORMALS = 'normals'
20 changes: 10 additions & 10 deletions meteostat/interface/daily.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@
from typing import Union
import numpy as np
import pandas as pd
from meteostat.core.cache import get_file_path, file_in_cache
from meteostat.core.cache import get_local_file_path, file_in_cache
from meteostat.core.loader import processing_handler, load_handler
from meteostat.enumerations.granularity import Granularity
from meteostat.utilities.validations import validate_series
from meteostat.utilities.aggregations import degree_mean, weighted_average
from meteostat.utilities.endpoint import generate_endpoint_path
from meteostat.interface.timeseries import Timeseries
from meteostat.interface.point import Point

Expand Down Expand Up @@ -93,11 +95,14 @@ def _load(
"""

# File name
file = 'daily/' + ('full' if self._model else 'obs') + \
'/' + station + '.csv.gz'
file = generate_endpoint_path(
Granularity.DAILY,
station,
self._model
)

# Get local file path
path = get_file_path(self.cache_dir, self.cache_subdir, file)
path = get_local_file_path(self.cache_dir, self.cache_subdir, file)

# Check if file in cache
if self.max_age > 0 and file_in_cache(path, self.max_age):
Expand Down Expand Up @@ -142,12 +147,7 @@ def _get_data(self) -> None:
if len(self._stations) > 0:

# List of datasets
datasets = []

for station in self._stations:
datasets.append((
str(station),
))
datasets = [(str(station),) for station in self._stations]

# Data Processing
return processing_handler(
Expand Down
94 changes: 59 additions & 35 deletions meteostat/interface/hourly.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,17 @@
"""

from math import floor
from datetime import datetime
from datetime import datetime, timedelta
from typing import Union
import pytz
import numpy as np
import pandas as pd
from meteostat.core.cache import get_file_path, file_in_cache
from meteostat.core.cache import get_local_file_path, file_in_cache
from meteostat.core.loader import processing_handler, load_handler
from meteostat.enumerations.granularity import Granularity
from meteostat.utilities.validations import validate_series
from meteostat.utilities.aggregations import degree_mean, weighted_average
from meteostat.utilities.endpoint import generate_endpoint_path
from meteostat.interface.timeseries import Timeseries
from meteostat.interface.point import Point

Expand All @@ -32,7 +34,7 @@ class Hourly(Timeseries):
# The cache subdirectory
cache_subdir: str = 'hourly'

# Specify if the library should use chunks or full dumps
# Download data as annual chunks
chunked: bool = True

# The time zone
Expand Down Expand Up @@ -118,13 +120,15 @@ def _set_time(

# Set start date
self._start = timezone.localize(
start, is_dst=None).astimezone(
pytz.utc)
start,
is_dst=None
).astimezone(pytz.utc)

# Set end date
self._end = timezone.localize(
end, is_dst=None).astimezone(
pytz.utc)
end,
is_dst=None
).astimezone(pytz.utc)

else:

Expand All @@ -134,21 +138,25 @@ def _set_time(
# Set end date
self._end = end

self._annual_steps = [
(
self._start + timedelta(days=365 * i)
).year for i in range(
self._end.year - self._start.year + 1
)
]

def _load(
self,
station: str,
year: str = None
file: str
) -> None:
"""
Load file from Meteostat
"""

# File name
file = 'hourly/' + ('full' if self._model else 'obs') + '/' + \
(year + '/' if year else '') + station + '.csv.gz'

# Get local file path
path = get_file_path(self.cache_dir, self.cache_subdir, file)
path = get_local_file_path(self.cache_dir, self.cache_subdir, file)

# Check if file in cache
if self.max_age > 0 and file_in_cache(path, self.max_age):
Expand All @@ -164,7 +172,8 @@ def _load(
file,
self._columns,
self._types,
self._parse_dates)
self._parse_dates
)

# Validate Series
df = validate_series(df, station)
Expand All @@ -176,8 +185,12 @@ def _load(
# Localize time column
if self._timezone is not None and len(df.index) > 0:
df = df.tz_localize(
'UTC', level='time').tz_convert(
self._timezone, level='time')
'UTC',
level='time'
).tz_convert(
self._timezone,
level='time'
)

# Filter time period and append to DataFrame
if self._start and self._end:
Expand All @@ -198,29 +211,40 @@ def _get_data(self) -> None:

if len(self._stations) > 0:

# List of datasets
datasets = []

for station in self._stations:

if self.chunked and self._start and self._end:

for year in range(self._start.year, self._end.year + 1):
datasets.append((
str(station),
str(year)
))

else:
# Create list of datasets
if self.chunked:
datasets = [
(
str(station),
generate_endpoint_path(
Granularity.HOURLY,
station,
self._model,
year
)
)
for station in self._stations for year in self._annual_steps
]

datasets.append((
else:
datasets = [
(
str(station),
None
))
generate_endpoint_path(
Granularity.HOURLY,
station,
self._model
)
)
for station in self._stations
]

# Data Processing
return processing_handler(
datasets, self._load, self.processes, self.threads)
datasets,
self._load,
self.processes,
self.threads
)

return pd.DataFrame(columns=[*self._types])

Expand Down
25 changes: 13 additions & 12 deletions meteostat/interface/monthly.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@
from typing import Union
import numpy as np
import pandas as pd
from meteostat.core.cache import get_file_path, file_in_cache
from meteostat.core.cache import get_local_file_path, file_in_cache
from meteostat.core.loader import processing_handler, load_handler
from meteostat.utilities.endpoint import generate_endpoint_path
from meteostat.utilities.validations import validate_series
from meteostat.utilities.aggregations import degree_mean, weighted_average
from meteostat.enumerations.granularity import Granularity
from meteostat.interface.timeseries import Timeseries
from meteostat.interface.point import Point

Expand Down Expand Up @@ -94,11 +96,14 @@ def _load(
"""

# File name
file = 'monthly/' + ('full' if self._model else 'obs') + \
'/' + station + '.csv.gz'
file = generate_endpoint_path(
Granularity.MONTHLY,
station,
self._model
)

# Get local file path
path = get_file_path(self.cache_dir, self.cache_subdir, file)
path = get_local_file_path(self.cache_dir, self.cache_subdir, file)

# Check if file in cache
if self.max_age > 0 and file_in_cache(path, self.max_age):
Expand Down Expand Up @@ -143,15 +148,11 @@ def _get_data(self) -> None:
if len(self._stations) > 0:

# List of datasets
datasets = []

for station in self._stations:
datasets.append((
str(station),
))

datasets = [(str(station),) for station in self._stations]
# Data Processing
return processing_handler(datasets, self._load, self.processes, self.threads)
return processing_handler(
datasets, self._load, self.processes, self.threads
)

# Empty DataFrame
return pd.DataFrame(columns=[*self._types])
Expand Down
22 changes: 12 additions & 10 deletions meteostat/interface/normals.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,12 @@
from datetime import datetime
import numpy as np
import pandas as pd
from meteostat.core.cache import get_file_path, file_in_cache
from meteostat.core.cache import get_local_file_path, file_in_cache
from meteostat.enumerations.granularity import Granularity
from meteostat.core.loader import processing_handler, load_handler
from meteostat.core.warn import warn
from meteostat.utilities.aggregations import weighted_average
from meteostat.utilities.endpoint import generate_endpoint_path
from meteostat.interface.base import Base
from meteostat.interface.point import Point

Expand Down Expand Up @@ -78,10 +80,13 @@ def _load(
"""

# File name
file = f'normals/{station}.csv.gz'
file = generate_endpoint_path(
Granularity.NORMALS,
station
)

# Get local file path
path = get_file_path(self.cache_dir, self.cache_subdir, file)
path = get_local_file_path(self.cache_dir, self.cache_subdir, file)

# Check if file in cache
if self.max_age > 0 and file_in_cache(path, self.max_age):
Expand All @@ -97,11 +102,13 @@ def _load(
file,
self._columns,
self._types,
None)
None
)

if df.index.size > 0:

# Add weather station ID
# pylint: disable=unsupported-assignment-operation
df['station'] = station

# Set index
Expand Down Expand Up @@ -130,12 +137,7 @@ def _get_data(self) -> None:
if len(self._stations) > 0:

# List of datasets
datasets = []

for station in self._stations:
datasets.append((
str(station),
))
datasets = [(str(station),) for station in self._stations]

# Data Processing
return processing_handler(
Expand Down
Loading

0 comments on commit 96766a0

Please sign in to comment.