Meteostat 1.5.11 (#82)

* Bugfix and new endpoint path function (#80) * Fixes bug with chunked datasets in hourly requests and introduce a function to generate the endpoint path within a tested function * Update linter.yml Workaround for super-linter bug Co-authored-by: Christian Lamprecht <[email protected]> * Minor adaptions to better understand cache behaviour (#81) * Restructuring & Linting * misplaced-comparison-constant * Wrap up v1.5.11 * Finalize 1.5.11 Co-authored-by: Daniel Lassahn <[email protected]>
meteostat · Jan 8, 2022 · 96766a0 · 96766a0
1 parent 69ea420
commit 96766a0
Show file tree

Hide file tree

Showing 17 changed files with 283 additions and 76 deletions.
diff --git a/.github/workflows/linter.yml b/.github/workflows/linter.yml
@@ -48,7 +48,7 @@ jobs:
  # Run Linter against code base #
  ################################
  - name: Lint Code Base
- uses: github/super-linter@v3
+ uses: github/super-linter@v3.17.0
  env:
  VALIDATE_ALL_CODEBASE: false
  DEFAULT_BRANCH: master

diff --git a/meteostat/__init__.py b/meteostat/__init__.py
@@ -12,7 +12,7 @@
 """
 
 __appname__ = 'meteostat'
-__version__ = '1.5.10'
+__version__ = '1.5.11'
 
 from .interface.base import Base
 from .interface.timeseries import Timeseries

diff --git a/meteostat/core/cache.py b/meteostat/core/cache.py
@@ -13,7 +13,7 @@
 import hashlib
 
 
-def get_file_path(
+def get_local_file_path(
  cache_dir: str,
  cache_subdir: str,
  path: str
@@ -25,8 +25,7 @@ def get_file_path(
  # Get file ID
  file = hashlib.md5(path.encode('utf-8')).hexdigest()
 
- # Return path
- return cache_dir + os.sep + cache_subdir + os.sep + file
+ return f"{cache_dir}/{cache_subdir}/{file}"
 
 
 def file_in_cache(

diff --git a/meteostat/core/loader.py b/meteostat/core/loader.py
@@ -87,7 +87,8 @@ def load_handler(
  compression='gzip',
  names=columns,
  dtype=types,
- parse_dates=parse_dates)
+ parse_dates=parse_dates
+ )
 
  # Force datetime conversion
  if coerce_dates:

diff --git a/meteostat/enumerations/__init__.py b/meteostat/enumerations/__init__.py
diff --git a/meteostat/enumerations/granularity.py b/meteostat/enumerations/granularity.py
@@ -0,0 +1,22 @@
+"""
+Granularity Enumeration
+
+Meteorological data provided by Meteostat (https://dev.meteostat.net)
+under the terms of the Creative Commons Attribution-NonCommercial
+4.0 International Public License.
+
+The code is licensed under the MIT license.
+"""
+
+from enum import Enum
+
+
+class Granularity(Enum):
+ """
+ The different levels of time series granularity
+ """
+
+ HOURLY = 'hourly'
+ DAILY = 'daily'
+ MONTHLY = 'monthly'
+ NORMALS = 'normals'
diff --git a/meteostat/interface/daily.py b/meteostat/interface/daily.py
@@ -12,10 +12,12 @@
 from typing import Union
 import numpy as np
 import pandas as pd
-from meteostat.core.cache import get_file_path, file_in_cache
+from meteostat.core.cache import get_local_file_path, file_in_cache
 from meteostat.core.loader import processing_handler, load_handler
+from meteostat.enumerations.granularity import Granularity
 from meteostat.utilities.validations import validate_series
 from meteostat.utilities.aggregations import degree_mean, weighted_average
+from meteostat.utilities.endpoint import generate_endpoint_path
 from meteostat.interface.timeseries import Timeseries
 from meteostat.interface.point import Point
 
@@ -93,11 +95,14 @@ def _load(
  """
 
  # File name
- file = 'daily/' + ('full' if self._model else 'obs') + \
- '/' + station + '.csv.gz'
+ file = generate_endpoint_path(
+ Granularity.DAILY,
+ station,
+ self._model
+ )
 
  # Get local file path
- path = get_file_path(self.cache_dir, self.cache_subdir, file)
+ path = get_local_file_path(self.cache_dir, self.cache_subdir, file)
 
  # Check if file in cache
  if self.max_age > 0 and file_in_cache(path, self.max_age):
@@ -142,12 +147,7 @@ def _get_data(self) -> None:
  if len(self._stations) > 0:
 
  # List of datasets
- datasets = []
-
- for station in self._stations:
- datasets.append((
- str(station),
- ))
+ datasets = [(str(station),) for station in self._stations]
 
  # Data Processing
  return processing_handler(

diff --git a/meteostat/interface/hourly.py b/meteostat/interface/hourly.py
@@ -9,15 +9,17 @@
 """
 
 from math import floor
-from datetime import datetime
+from datetime import datetime, timedelta
 from typing import Union
 import pytz
 import numpy as np
 import pandas as pd
-from meteostat.core.cache import get_file_path, file_in_cache
+from meteostat.core.cache import get_local_file_path, file_in_cache
 from meteostat.core.loader import processing_handler, load_handler
+from meteostat.enumerations.granularity import Granularity
 from meteostat.utilities.validations import validate_series
 from meteostat.utilities.aggregations import degree_mean, weighted_average
+from meteostat.utilities.endpoint import generate_endpoint_path
 from meteostat.interface.timeseries import Timeseries
 from meteostat.interface.point import Point
 
@@ -32,7 +34,7 @@ class Hourly(Timeseries):
  # The cache subdirectory
  cache_subdir: str = 'hourly'
 
- # Specify if the library should use chunks or full dumps
+ # Download data as annual chunks
  chunked: bool = True
 
  # The time zone
@@ -118,13 +120,15 @@ def _set_time(
 
  # Set start date
  self._start = timezone.localize(
- start, is_dst=None).astimezone(
- pytz.utc)
+ start,
+ is_dst=None
+ ).astimezone(pytz.utc)
 
  # Set end date
  self._end = timezone.localize(
- end, is_dst=None).astimezone(
- pytz.utc)
+ end,
+ is_dst=None
+ ).astimezone(pytz.utc)
 
  else:
 
@@ -134,21 +138,25 @@ def _set_time(
  # Set end date
  self._end = end
 
+ self._annual_steps = [
+ (
+ self._start + timedelta(days=365 * i)
+ ).year for i in range(
+ self._end.year - self._start.year + 1
+ )
+ ]
+
  def _load(
  self,
  station: str,
- year: str = None
+ file: str
  ) -> None:
  """
  Load file from Meteostat
  """
 
- # File name
- file = 'hourly/' + ('full' if self._model else 'obs') + '/' + \
- (year + '/' if year else '') + station + '.csv.gz'
-
  # Get local file path
- path = get_file_path(self.cache_dir, self.cache_subdir, file)
+ path = get_local_file_path(self.cache_dir, self.cache_subdir, file)
 
  # Check if file in cache
  if self.max_age > 0 and file_in_cache(path, self.max_age):
@@ -164,7 +172,8 @@ def _load(
  file,
  self._columns,
  self._types,
- self._parse_dates)
+ self._parse_dates
+ )
 
  # Validate Series
  df = validate_series(df, station)
@@ -176,8 +185,12 @@ def _load(
  # Localize time column
  if self._timezone is not None and len(df.index) > 0:
  df = df.tz_localize(
- 'UTC', level='time').tz_convert(
- self._timezone, level='time')
+ 'UTC',
+ level='time'
+ ).tz_convert(
+ self._timezone,
+ level='time'
+ )
 
  # Filter time period and append to DataFrame
  if self._start and self._end:
@@ -198,29 +211,40 @@ def _get_data(self) -> None:
 
  if len(self._stations) > 0:
 
- # List of datasets
- datasets = []
-
- for station in self._stations:
-
- if self.chunked and self._start and self._end:
-
- for year in range(self._start.year, self._end.year + 1):
- datasets.append((
- str(station),
-  str(year)
-  ))
-
- else:
+ # Create list of datasets
+ if self.chunked:
+ datasets = [
+  (
+ str(station),
+  generate_endpoint_path(
+ Granularity.HOURLY,
+  station,
+  self._model,
+ year
+ )
+ )
+ for station in self._stations for year in self._annual_steps
+ ]
 
- datasets.append((
+ else:
+ datasets = [
+ (
  str(station),
- None
- ))
+ generate_endpoint_path(
+ Granularity.HOURLY,
+ station,
+ self._model
+ )
+ )
+ for station in self._stations
+ ]
 
- # Data Processing
  return processing_handler(
- datasets, self._load, self.processes, self.threads)
+ datasets,
+ self._load,
+ self.processes,
+ self.threads
+ )
 
  return pd.DataFrame(columns=[*self._types])
 

diff --git a/meteostat/interface/monthly.py b/meteostat/interface/monthly.py
@@ -12,10 +12,12 @@
 from typing import Union
 import numpy as np
 import pandas as pd
-from meteostat.core.cache import get_file_path, file_in_cache
+from meteostat.core.cache import get_local_file_path, file_in_cache
 from meteostat.core.loader import processing_handler, load_handler
+from meteostat.utilities.endpoint import generate_endpoint_path
 from meteostat.utilities.validations import validate_series
 from meteostat.utilities.aggregations import degree_mean, weighted_average
+from meteostat.enumerations.granularity import Granularity
 from meteostat.interface.timeseries import Timeseries
 from meteostat.interface.point import Point
 
@@ -94,11 +96,14 @@ def _load(
  """
 
  # File name
- file = 'monthly/' + ('full' if self._model else 'obs') + \
- '/' + station + '.csv.gz'
+ file = generate_endpoint_path(
+ Granularity.MONTHLY,
+ station,
+ self._model
+ )
 
  # Get local file path
- path = get_file_path(self.cache_dir, self.cache_subdir, file)
+ path = get_local_file_path(self.cache_dir, self.cache_subdir, file)
 
  # Check if file in cache
  if self.max_age > 0 and file_in_cache(path, self.max_age):
@@ -143,15 +148,11 @@ def _get_data(self) -> None:
  if len(self._stations) > 0:
 
  # List of datasets
- datasets = []
-
- for station in self._stations:
- datasets.append((
- str(station),
- ))
-
+ datasets = [(str(station),) for station in self._stations]
  # Data Processing
- return processing_handler(datasets, self._load, self.processes, self.threads)
+ return processing_handler(
+ datasets, self._load, self.processes, self.threads
+ )
 
  # Empty DataFrame
  return pd.DataFrame(columns=[*self._types])

diff --git a/meteostat/interface/normals.py b/meteostat/interface/normals.py
@@ -13,10 +13,12 @@
 from datetime import datetime
 import numpy as np
 import pandas as pd
-from meteostat.core.cache import get_file_path, file_in_cache
+from meteostat.core.cache import get_local_file_path, file_in_cache
+from meteostat.enumerations.granularity import Granularity
 from meteostat.core.loader import processing_handler, load_handler
 from meteostat.core.warn import warn
 from meteostat.utilities.aggregations import weighted_average
+from meteostat.utilities.endpoint import generate_endpoint_path
 from meteostat.interface.base import Base
 from meteostat.interface.point import Point
 
@@ -78,10 +80,13 @@ def _load(
  """
 
  # File name
- file = f'normals/{station}.csv.gz'
+ file = generate_endpoint_path(
+ Granularity.NORMALS,
+ station
+ )
 
  # Get local file path
- path = get_file_path(self.cache_dir, self.cache_subdir, file)
+ path = get_local_file_path(self.cache_dir, self.cache_subdir, file)
 
  # Check if file in cache
  if self.max_age > 0 and file_in_cache(path, self.max_age):
@@ -97,11 +102,13 @@ def _load(
  file,
  self._columns,
  self._types,
- None)
+ None
+ )
 
  if df.index.size > 0:
 
  # Add weather station ID
+ # pylint: disable=unsupported-assignment-operation
  df['station'] = station
 
  # Set index
@@ -130,12 +137,7 @@ def _get_data(self) -> None:
  if len(self._stations) > 0:
 
  # List of datasets
- datasets = []
-
- for station in self._stations:
- datasets.append((
- str(station),
- ))
+ datasets = [(str(station),) for station in self._stations]
 
  # Data Processing
  return processing_handler(