## Connect to Google Drive

In [28]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


## Authorization

In [None]:
# make sure
# 1) Login to Copernicus website
# 2) Accept the license terms for the account
# 3) Accept the termsof 'CMIP6 - Data Access - Terms of Use' at https://cds.climate.copernicus.eu/cdsapp/#!/terms/cmip6-wps
# 4) Use UID and API Key, not login details

url = 'url: https://cds.climate.copernicus.eu/api/v2'
key = 'key: your_key'

with open('/root/.cdsapirc', 'w') as f:
    f.write('\n'.join([url, key]))

with open('/root/.cdsapirc') as f:
    print(f.read())

## Install Required Libraries

In [None]:
! pip install rasterio
! pip install rioxarray
! pip install geopandas
! pip install cartopy
! pip install shapely
! pip install rasterstats
! pip install tqdm
! pip install cdsapi

# restart runtime
# os.kill(os.getpid(), 9)

## Import Libraries

In [5]:
import datetime
from pathlib import Path 
import geopandas as gpd
import pandas as pd
import numpy as np
import urllib
import requests
import sys
import time
from rasterstats import zonal_stats
import rasterio
from tqdm import tqdm 
import zipfile
import io 
from shapely.geometry.polygon import Polygon 
from shapely.geometry.multipolygon import MultiPolygon
import cdsapi
import os
import shutil
import xarray as xr
import rioxarray
from shapely.geometry import mapping
import cartopy.crs as ccrs
import cftime
import cartopy.feature as cfeature
from shapely import wkt
import matplotlib.pyplot as plt
import warnings
warnings.simplefilter(action='ignore')

## Define a Python class to extract CMIP climate variables via cdsapi

In [34]:
# Import cdsapi and other geospatial libraries
# Note: to run cdsapi properly, make sure the following actions have been performed:
# 1) Login to Copernicus website
# 2) Accept the license terms for the account
# 3) Accept the termsof 'CMIP6 - Data Access - Terms of Use' at https://cds.climate.copernicus.eu/cdsapp/#!/terms/cmip6-wps
# 4) Input UID and API Key for data access. More details: https://cds.climate.copernicus.eu/api-how-to

class CmipExtractor():
  variable: str
  experiment: str
  model: str
  roi: gpd.GeoDataFrame
  start: str
  end: str
  
  def __init__(self,variable,experiment,start,end,bounds,resolution,folder,model=None):
    self.variable = variable
    self.experiment = experiment
    self.bounds = bounds
    self.date =  f"{start}/{end}"
    self.model = model
    self.download_folder = folder
    self.resolution = resolution
    self.nc_path = None
   
  def __str__(self):
    return f"Variable: {self.variable}\nExperiment: {self.experiment}\nGCM Model: {self.model}"

  @property
  def var_id(self) -> str:
    """
    Get CMIP6 Variable ID
    Description
    ----------
    Extract variable id for variable names
    Parameters
    ----------
    variable: str
        variable name
    Returns
    -------
    id: str
        If variable exist in the dict of common variables, id will be derived. Otherwise, the input will be assumed to be id itself.
    """
    # dictionary for common climate variables (cmip6)
    # https://confluence.ecmwf.int/display/CKB/CMIP6%3A+Global+climate+projections#CMIP6:Globalclimateprojections-Parameterlistings

    var_dict = {
      "near_surface_air_temperature": "tas",
      "daily_maximum_near_surface_air_temperature": "tasmax",
      "daily_minimum_near_surface_air_temperature": "tasmin",
      "surface_temperature": "ts",
      "sea_level_pressure":	"psl",
      "surface_air_pressure":	"ps",
      "eastward_near_surface_wind":	"uas",
      "northward_near_surface_wind": "vas",
      "precipitation": "pr",
      "evaporation_including_sublimation_and_transpiration": "evspsbl",
      "air_temperature": "ta"
    }

    # transform to id
    var = self.variable
    id = var_dict[var] if var in var_dict else var
    return id

  def set_model(self, model):
    """
    Set GCM after init
    """
    self.model = model
    return self

  def unzipFile(self, path_to_zipfile: str, unzip_directory: str) -> None:
    """
    Unzip netCDF file
    Description
    ----------
    Extract zip file in the target directory
    Parameters
    ----------
    path_to_zipfile: str
        Relative folder path of the zip file
    unzip_directory: str
        Relative folder path for the unzipped output
    Returns
    -------
    An unzipped file will be extracted at the target location
    """
    extensions = ('.nc')
    # get file
    zip_file = zipfile.ZipFile(path_to_zipfile, 'r')
    for file in zip_file.namelist():
        # grab and unzip
        if file.endswith(extensions):
            zip_file.extract(file, unzip_directory)
    zip_file.close() 

  def cds_retrieval(self) -> None:
    """
      Retrieval CMIP6 data
      Description
      ----------
      Retrieval CMIP6 data from CDS using Python API for a single variable, experiment, and model
      Parameters
      ----------
      climate_folder: path
          base folder for raw file to be downloaded
      variable: str
          variable name
      experiment: str
          CMIP6 scenario
      model: str
          name of the climate model
      bounds: list
          miny, maxx, maxy, minx (in this order)
      Returns
      -------
      netcdf_fn: str
          Path for downloaded netCDF file
    """
    model = self.model
    experiment = self.experiment
    variable = self.variable
    date = self.date
    bounds = self.bounds
    resolution = self.resolution

    # output file path
    netcdf_zip = os.path.join(self.download_folder, variable, experiment, f'{variable}_{experiment}_{model}.zip')
          
    # Download climate data (single variable) using cdsapi API
    if not os.path.exists(netcdf_zip):
      # cdsapi
      try:
        c = cdsapi.Client()
        c.retrieve(
            'projections-cmip6',
            {
                'temporal_resolution': resolution,
                'experiment': experiment,
                'level': 'single_levels',
                'variable': variable,
                'model': model,
                'area': bounds,
                'date': date,
                'format': 'zip'
                },
                netcdf_zip
                )          
      except Exception as e: 
        print('Error: {}. Continue to the next retrieval.'.format(e))
    
    # Unzip downloaded file
    unzip_folder = os.path.join('intermediate', 'Climate', variable, experiment, f'{experiment}_{model}')
    if os.path.exists(netcdf_zip) and not os.path.exists(unzip_folder):
      print('unzip folder')
      self.unzipFile(netcdf_zip, unzip_folder)

    # Return NetCDF file
    netcdf_fn = list(Path(unzip_folder).glob('*.nc'))
    assert len(netcdf_fn) != 0, "No netCDF file retrieved!"
    
    self.nc_path = netcdf_fn[0]

  def __call__(self):
    CmipExtractor.cds_retrieval(self)
    ds = xr.open_dataset(self.nc_path)
    return ds

## Data Retrieval

In [35]:
variable = 'precipitation'
experiment = 'historical'
bounds = [42.51, -124.92, 32.03, -113.63]
res = 'monthly'
start = '2014-01-01'
end = '2015-01-01'

extractor = CmipExtractor(variable=variable, experiment=experiment, start=start, end=end, bounds=bounds, resolution=res, folder='raw/Climate')

In [17]:
print(extractor)

Variable: precipitation
Experiment: historical
GCM Model: None


In [36]:
extractor.set_model('cams_csm1_0')

<__main__.CmipExtractor at 0x7fc0afe58bd0>

In [22]:
print(extractor)

Variable: precipitation
Experiment: historical
GCM Model: cams_csm1_0


## Retrieved Data

In [38]:
ds = extractor()
ds.pr