From b1f394a48764e22799b13332f58c77597fac4813 Mon Sep 17 00:00:00 2001 From: zshaheen Date: Tue, 14 Mar 2017 16:43:46 -0700 Subject: [PATCH] Removed two classes and added lots of docstrings. --- src/python/pcmdi/scripts/driver/dataset.py | 23 +- src/python/pcmdi/scripts/driver/model.py | 18 +- .../pcmdi/scripts/driver/observation.py | 26 ++- .../pcmdi/scripts/driver/outputmetrics.py | 40 ++-- src/python/pcmdi/scripts/driver/pmp_driver.py | 64 ------ src/python/pcmdi/scripts/driver/rundiags.py | 182 ---------------- .../pcmdi/scripts/pcmdi_metrics_driver.py | 199 +++++++++++++++++- 7 files changed, 275 insertions(+), 277 deletions(-) delete mode 100755 src/python/pcmdi/scripts/driver/pmp_driver.py delete mode 100755 src/python/pcmdi/scripts/driver/rundiags.py diff --git a/src/python/pcmdi/scripts/driver/dataset.py b/src/python/pcmdi/scripts/driver/dataset.py index 552a7fc1d..b2927c949 100644 --- a/src/python/pcmdi/scripts/driver/dataset.py +++ b/src/python/pcmdi/scripts/driver/dataset.py @@ -4,10 +4,11 @@ import logging import cdutil import cdms2 -import pcmdi_metrics.io.base +from pcmdi_metrics.io.base import Base class DataSet(object): + ''' Abstract parent of the Observation of Model classes. ''' __metaclass__ = abc.ABCMeta def __init__(self, parameter, var_name_long, region, @@ -24,6 +25,7 @@ def __init__(self, parameter, var_name_long, region, self.sftlf = sftlf def get_sftlf(self): + ''' Returns the sftlf attribute. ''' return self.sftlf def __call__(self): @@ -31,6 +33,7 @@ def __call__(self): @staticmethod def calculate_level_from_var(var): + ''' Get the level from the var string, where it's var_LEVEL ''' var_split_name = var.split('_') if len(var_split_name) > 1: level = float(var_split_name[-1]) * 100 @@ -39,6 +42,8 @@ def calculate_level_from_var(var): return level def setup_target_grid(self, obs_or_model_file): + ''' Call the set_target_grid function for + obs_or_model_file, which is of type Base. ''' if self.use_omon(self.obs_dict, self.var): regrid_method = self.parameter.regrid_method_ocn regrid_tool = self.parameter.regrid_tool_ocn @@ -55,17 +60,19 @@ def setup_target_grid(self, obs_or_model_file): @staticmethod def use_omon(obs_dict, var): + ''' For the given variable and obs_dict, do we use Omon? ''' obs_default = obs_dict[var][obs_dict[var]["default"]] return obs_default["CMIP_CMOR_TABLE"] == 'Omon' @staticmethod def create_sftlf(parameter): + ''' Create the sftlf file from the parameter. ''' sftlf = {} for test in parameter.test_data_set: - sft = pcmdi_metrics.io.base.Base(parameter.test_data_path, - getattr(parameter, "sftlf_filename_template", - parameter.filename_template)) + sft = Base(parameter.test_data_path, + getattr(parameter, "sftlf_filename_template", + parameter.filename_template)) sft.model_version = test sft.table = "fx" sft.realm = "atmos" @@ -96,17 +103,19 @@ def create_sftlf(parameter): @staticmethod def apply_custom_keys(obj, custom_dict, var): + ''' Apply the all of the keys in custom_dict that are var to obj. ''' for k, v in custom_dict.iteritems(): key = custom_dict[k] setattr(obj, k, key.get(var, key.get(None, ""))) @abc.abstractmethod def get(self): - """Calls the get function on the Base object.""" + ''' Calls the get function on the Base object. ''' raise NotImplementedError() @staticmethod def load_path_as_file_obj(name): + ''' Returns a File object for the file named name. ''' file_path = sys.prefix + '/share/pmp/' + name opened_file = None try: @@ -120,9 +129,9 @@ def load_path_as_file_obj(name): @abc.abstractmethod def hash(self): - """Calls the hash function on the Base object.""" + ''' Calls the hash function on the Base object. ''' raise NotImplementedError() def file_path(self): - """Calls the __call__() function on the Base object.""" + ''' Calls the __call__() function on the Base object. ''' raise NotImplementedError() diff --git a/src/python/pcmdi/scripts/driver/model.py b/src/python/pcmdi/scripts/driver/model.py index 3d900c56a..71506e038 100644 --- a/src/python/pcmdi/scripts/driver/model.py +++ b/src/python/pcmdi/scripts/driver/model.py @@ -3,11 +3,13 @@ import MV2 import cdutil import cdms2 -import pcmdi_metrics.io.base +from pcmdi_metrics.io.base import Base import pcmdi_metrics.driver.dataset class Model(pcmdi_metrics.driver.dataset.DataSet): + ''' Handles all the computation (setting masking, target grid, etc) + and some file I/O related to models. ''' def __init__(self, parameter, var_name_long, region, model, obs_dict, data_path, sftlf): super(Model, self).__init__(parameter, var_name_long, region, @@ -22,8 +24,8 @@ def __init__(self, parameter, var_name_long, region, self.setup_target_mask() def create_model_file(self): - self._model_file = pcmdi_metrics.io.base.Base(self.data_path, - self.parameter.filename_template) + ''' Creates an object that will eventually output the netCDF file. ''' + self._model_file = Base(self.data_path, self.parameter.filename_template) self._model_file.variable = self.var self._model_file.model_version = self.obs_or_model self._model_file.period = self.parameter.period @@ -34,6 +36,7 @@ def create_model_file(self): self.parameter.custom_keys, self.var) def setup_target_mask(self): + ''' Sets the mask and target_mask attribute of self._model_file ''' self.var_in_file = self.get_var_in_file() if self.region is not None: @@ -47,6 +50,8 @@ def setup_target_mask(self): MV2.not_equal(self.sftlf['target_grid'], region_value) def get(self): + ''' Gets the variable based on the region and level (if given) for + the file from data_path, which is defined in the initalizer. ''' try: if self.level is None: data_model = self._model_file.get( @@ -64,6 +69,7 @@ def get(self): raise RuntimeError('Need to skip model: %s' % self.obs_or_model) def get_var_in_file(self): + ''' Based off the model_tweaks parameter, get the variable mapping. ''' tweaks = {} tweaks_all = {} if hasattr(self.parameter, 'model_tweaks'): @@ -80,8 +86,10 @@ def get_var_in_file(self): return var_in_file def create_sftlf_model_raw(self, var_in_file): + ''' For the self.obs_or_model from the initializer, create a landSeaMask + from cdutil for self.sftlf[self.obs_or_model]['raw'] value. ''' if not hasattr(self.parameter, 'generate_sftlf') or \ - self.parameter.generate_sftlf is False: + self.parameter.generate_sftlf is False: logging.info('Model %s does not have sftlf, skipping region: %s' % (self.obs_or_model, self.region)) raise RuntimeError('Model %s does not have sftlf, skipping region: %s' % (self.obs_or_model, self.region)) @@ -98,7 +106,9 @@ def create_sftlf_model_raw(self, var_in_file): logging.info('Auto generated sftlf for model %s' % self.obs_or_model) def hash(self): + ''' Return a hash of the file. ''' return self._model_file.hash() def file_path(self): + ''' Return the path of the file. ''' return self._model_file() diff --git a/src/python/pcmdi/scripts/driver/observation.py b/src/python/pcmdi/scripts/driver/observation.py index a03ec716b..a7f357b84 100644 --- a/src/python/pcmdi/scripts/driver/observation.py +++ b/src/python/pcmdi/scripts/driver/observation.py @@ -1,10 +1,11 @@ import logging import MV2 -import pcmdi_metrics.io.base -import pcmdi_metrics.driver.dataset +from pcmdi_metrics.io.base import Base +from pcmdi_metrics.driver.dataset import Dataset -class OBS(pcmdi_metrics.io.base.Base): +class OBS(Base): + ''' Creates an output the netCDF file for an observation. ''' def __init__(self, root, var, obs_dict, obs='default', file_mask_template=None): template = "%(realm)/%(frequency)/%(variable)/" +\ @@ -32,6 +33,8 @@ def __init__(self, root, var, obs_dict, obs='default', self.variable = var def setup_based_on_obs_table(self, obs_table): + ''' Set the realm, frequency, ac based on the + CMIP_CMOR_TABLE value in the obs dict.''' if obs_table == u'Omon': self.realm = 'ocn' self.frequency = 'mo' @@ -46,7 +49,9 @@ def setup_based_on_obs_table(self, obs_table): self.ac = 'ac' -class Observation(pcmdi_metrics.driver.dataset.DataSet): +class Observation(Dataset): + ''' Handles all the computation (setting masking, target grid, etc) + and some file I/O related to observations. ''' def __init__(self, parameter, var_name_long, region, obs, obs_dict, data_path, sftlf): super(Observation, self).__init__(parameter, var_name_long, region, @@ -58,6 +63,7 @@ def __init__(self, parameter, var_name_long, region, self.setup_target_mask() def create_obs_file(self): + ''' Creates an object that will eventually output the netCDF file. ''' obs_mask_name = self.create_obs_mask_name() self._obs_file = OBS(self.data_path, self.var, self.obs_dict, self.obs_or_model, @@ -67,6 +73,7 @@ def create_obs_file(self): self._obs_file.case_id = self.parameter.case_id def create_obs_mask_name(self): + ''' Gets the name from the obs_mask, which is obtained from a netCDF file. ''' try: obs_from_obs_dict = self.get_obs_from_obs_dict() obs_mask = OBS(self.data_path, 'sftlf', @@ -80,6 +87,8 @@ def create_obs_mask_name(self): return obs_mask_name def get_obs_from_obs_dict(self): + ''' Returns the obsercation from the obsercation + dictionary for self.var and self.obs_or_model. ''' if isinstance(self.obs_dict[self.var][self.obs_or_model], (str, unicode)): obs_from_obs_dict = \ self.obs_dict[self.var][self.obs_dict[self.var][self.obs_or_model]] @@ -88,6 +97,7 @@ def get_obs_from_obs_dict(self): return obs_from_obs_dict def setup_target_mask(self): + ''' Sets the attribute target_mask of self._obs_file. ''' if self.region is not None: region_value = self.region.get('value', None) if region_value is not None: @@ -97,6 +107,8 @@ def setup_target_mask(self): ) def get(self): + ''' Gets the variable based on the region and level (if given) for + the file from data_path, which is defined in the initializer. ''' try: if self.level is not None: data_obs = self._obs_file.get(self.var, @@ -115,14 +127,18 @@ def get(self): self.var, self.obs_or_model, e) def hash(self): + ''' Return a hash of the file. ''' return self._obs_file.hash() def file_path(self): + ''' Return the path of the file. ''' return self._obs_file() @staticmethod - # This must remain static b/c used before an Observation obj is created. + # This must remain static b/c used before an Observation object is created. def setup_obs_list_from_parameter(parameter_obs_list, obs_dict, var): + ''' If the data_set list from the parameter is + for observations, apply these special cases. ''' obs_list = parameter_obs_list if 'all' in [x.lower() for x in obs_list]: obs_list = 'all' diff --git a/src/python/pcmdi/scripts/driver/outputmetrics.py b/src/python/pcmdi/scripts/driver/outputmetrics.py index cf6571a3e..7023b80f9 100644 --- a/src/python/pcmdi/scripts/driver/outputmetrics.py +++ b/src/python/pcmdi/scripts/driver/outputmetrics.py @@ -3,9 +3,9 @@ import os import cdms2 import pcmdi_metrics -import pcmdi_metrics.io.base -import pcmdi_metrics.driver.observation -import pcmdi_metrics.driver.dataset +from pcmdi_metrics.io.base import Base +from pcmdi_metrics.driver.observation import Observation +from pcmdi_metrics.driver.dataset import DataSet class OutputMetrics(object): @@ -23,7 +23,7 @@ def __init__(self, parameter, var_name_long, obs_dict, sftlf): string_template = "%(variable)%(level)_%(target_grid_name)_" +\ "%(regrid_tool)_%(regrid_method)_metrics" - self.out_file = pcmdi_metrics.io.base.Base(self.parameter.metrics_output_path, string_template) + self.out_file = Base(self.parameter.metrics_output_path, string_template) self.regrid_method = '' self.regrid_tool = '' @@ -34,6 +34,8 @@ def __init__(self, parameter, var_name_long, obs_dict, sftlf): self.setup_metrics_dictionary() def setup_metrics_dictionary(self): + ''' Initalize the results dict (metrics_dictionary) and the metrics documentation + dict (metrics_def_dictionary) which is put in the results dict. ''' self.metrics_def_dictionary = collections.OrderedDict() self.metrics_dictionary = collections.OrderedDict() self.metrics_dictionary["DISCLAIMER"] = self.open_disclaimer() @@ -45,7 +47,7 @@ def setup_metrics_dictionary(self): self.metrics_dictionary["References"] = {} self.metrics_dictionary["RegionalMasking"] = {} - level = pcmdi_metrics.driver.dataset.DataSet.calculate_level_from_var(self.var_name_long) + level = DataSet.calculate_level_from_var(self.var_name_long) if level is None: self.out_file.level = '' else: @@ -53,13 +55,16 @@ def setup_metrics_dictionary(self): self.out_file.level = "-%i" % (int(level / 100.0)) def open_disclaimer(self): - f = pcmdi_metrics.driver.dataset.DataSet.load_path_as_file_obj('disclaimer.txt') + ''' Return the contents of disclaimer.txt. ''' + f = DataSet.load_path_as_file_obj('disclaimer.txt') contents = f.read() f.close() return contents def setup_regrid_and_realm_vars(self): - if pcmdi_metrics.driver.dataset.DataSet.use_omon(self.obs_dict, self.var): + ''' Set the regrid_method, regrid_tool, table_realm, + and realm based off the obs dict and var. ''' + if DataSet.use_omon(self.obs_dict, self.var): self.regrid_method = self.parameter.regrid_method_ocn self.regrid_tool = self.parameter.regrid_tool_ocn self.table_realm = 'Omon' @@ -71,18 +76,21 @@ def setup_regrid_and_realm_vars(self): self.realm = "atm" def setup_out_file(self): + ''' Setup for the out_file, which outputs both the .json and .txt. ''' self.out_file.set_target_grid( self.parameter.target_grid, self.regrid_tool, self.regrid_method) self.out_file.variable = self.var self.out_file.realm = self.realm self.out_file.table = self.table_realm self.out_file.case_id = self.parameter.case_id - pcmdi_metrics.driver.dataset.DataSet.apply_custom_keys(self.out_file, self.parameter.custom_keys, self.var) + DataSet.apply_custom_keys(self.out_file, self.parameter.custom_keys, self.var) def add_region(self, region): + ''' Add a region to the metrics_dictionary. ''' self.metrics_dictionary['RegionalMasking'][self.get_region_name_from_region(region)] = region def calculate_and_output_metrics(self, ref, test): + ''' Given ref and test (both either of type Observation or Model), compute the metrics. ''' if isinstance(self.obs_dict[self.var][ref.obs_or_model], (str, unicode)): self.obs_var_ref = self.obs_dict[self.var][self.obs_dict[self.var][ref.obs_or_model]] else: @@ -99,7 +107,8 @@ def calculate_and_output_metrics(self, ref, test): try: test_data = test() except RuntimeError as e: - # THIS EXCEPTION IS RAISED TO BREAK OUT OF THE FOR LOOP IN RunDiags + # THIS EXCEPTION IS RAISED TO BREAK OUT OF THE FOR LOOP IN PCMDI_DRIVER + # THIS SHOULD BE A CUSTOM EXCEPTION (PrematureBreakError) raise RuntimeError('Need to skip model: %s' % test.obs_or_model) # Todo: Make this a fcn @@ -149,6 +158,7 @@ def calculate_and_output_metrics(self, ref, test): self.write_on_exit() def set_grid_in_metrics_dictionary(self, test_data): + ''' Set the grid in metrics_dictionary. ''' grid = {} grid['RegridMethod'] = self.regrid_method grid['RegridTool'] = self.regrid_tool @@ -157,7 +167,7 @@ def set_grid_in_metrics_dictionary(self, test_data): self.metrics_dictionary['GridInfo'] = grid def set_simulation_desc(self, test, test_data): - + ''' Fillout information for the output .json and .txt files. ''' self.metrics_dictionary["RESULTS"][test.obs_or_model] = \ self.metrics_dictionary["RESULTS"].get(test.obs_or_model, {}) if "SimulationDescription" not in \ @@ -220,10 +230,11 @@ def set_simulation_desc(self, test, test_data): self.sftlf[test.obs_or_model]["md5"] def output_interpolated_model_climatologies(self, test, test_data): + ''' Save the netCDF file. ''' region_name = self.get_region_name_from_region(test.region) pth = os.path.join(self.parameter.test_clims_interpolated_output, region_name) - clim_file = pcmdi_metrics.io.base.Base(pth, self.parameter.filename_output_template) + clim_file = Base(pth, self.parameter.filename_output_template) logging.info('Saving interpolated climatologies to: %s' % clim_file()) clim_file.level = self.out_file.level clim_file.model_version = test.obs_or_model @@ -238,10 +249,11 @@ def output_interpolated_model_climatologies(self, test, test_data): clim_file.variable = self.var clim_file.region = region_name clim_file.realization = self.parameter.realization - pcmdi_metrics.driver.dataset.DataSet.apply_custom_keys(clim_file, self.parameter.custom_keys, self.var) + DataSet.apply_custom_keys(clim_file, self.parameter.custom_keys, self.var) clim_file.write(test_data, type="nc", id=self.var) def get_region_name_from_region(self, region): + ''' Extract the region name from the region dict. ''' # region is both in ref and test region_name = region['id'] if region is None: @@ -249,16 +261,18 @@ def get_region_name_from_region(self, region): return region_name def check_save_test_clim(self, ref): + ''' Bunch of checks to see if the netCDF files are needed to be saved. ''' # Since we are only saving once per reference data set (it's always # the same after), we need to check if ref is the first value from the # parameter, hence we have ref.obs_or_model == reference_data_set[0] reference_data_set = self.parameter.reference_data_set - reference_data_set = pcmdi_metrics.driver.observation.Observation.setup_obs_list_from_parameter( + reference_data_set = Observation.setup_obs_list_from_parameter( reference_data_set, self.obs_dict, self.var) return not self.parameter.dry_run and hasattr(self.parameter, 'save_test_clims') \ and self.parameter.save_test_clims is True and ref.obs_or_model == reference_data_set[0] # noqa def write_on_exit(self): + ''' Output the metrics_dictionary as a json and text file. ''' self.metrics_dictionary['METRICS'] = self.metrics_def_dictionary if not self.parameter.dry_run: logging.info('Saving results to: %s' % self.out_file()) diff --git a/src/python/pcmdi/scripts/driver/pmp_driver.py b/src/python/pcmdi/scripts/driver/pmp_driver.py deleted file mode 100755 index e3dc93c06..000000000 --- a/src/python/pcmdi/scripts/driver/pmp_driver.py +++ /dev/null @@ -1,64 +0,0 @@ -import logging -import os -import cdp.cdp_driver -import pcmdi_metrics.driver.rundiags -import pcmdi_metrics.driver.pmp_parser - - -class PMPDriver(cdp.cdp_driver.CDPDriver): - def __init__(self): - parser = pcmdi_metrics.driver.pmp_parser.PMPParser() - super(PMPDriver, self).__init__(parser.get_parameter()) - self.run() - logging.basicConfig(level=logging.DEBUG) - - def check_parameter(self): - # Check that all of the variables used from parameter exist. - # Just check that the parameters use exist in the parameter object. - # The validity for each option was already - # checked by the parameter itself. - ''' - vars_to_check = ['case_id', 'test_data_set', 'period', 'realization', - 'vars', 'reference_data_set', 'target_grid', 'regrid_tool', - 'regrid_method', 'regrid_tool_ocn', - 'regrid_method_ocn', 'save_test_clims', - 'regions_specs', 'regions', 'custom_keys', - 'filename_template', - 'generate_surface_type_land_fraction', - 'surface_type_land_fraction_filename_template', - 'test_data_path', 'reference_data_path', - 'metrics_output_path', - 'test_clims_interpolated_output', - 'filename_output_template', - 'custom_observations_path'] - - for var in vars_to_check: - if not hasattr(self.parameter, var): - logging.error("%s is not in the parameter file!" % var) - raise AttributeError("%s is not in the parameter file!" % var) - ''' - # TODO Add this all to PMPParameter class soon - if getattr(self.parameter, "save_test_clims", False): - if not hasattr(self.parameter, "test_clims_interpolated_output"): - self.parameter.test_clims_interpolated_output = os.path.join( - self.parameter.metrics_output_path, - 'interpolated_model_clims') - logging.warning("Your parameter file asks to save interpolated test climatologies," + - " but did not define a path for this\n We set 'test_clims_interpolated" + - "_output' to %s for you" % self.parameter.test_clims_interpolated_output) - if not hasattr(self.parameter, "filename_output_template"): - template = "%(variable)%(level)_%(model_version)_%(table)_%(realization)" + \ - "_%(period).interpolated.%(regrid_method).%(target_grid_name)-clim%(ext)" - self.parameter.filename_output_template = template - logging.warning("Your parameter file asks to save interpolated model climatologies," + - " but did not define a name template for this\nWe set 'filename_output" + - "_template' to %s for you" % self.parameter.filename_output_template) - if not hasattr(self.parameter, 'dry_run'): - self.parameter.dry_run = True - - def run_diags(self): - run = pcmdi_metrics.driver.rundiags.RunDiags(self.parameter) - run() - - def export(self): - pass diff --git a/src/python/pcmdi/scripts/driver/rundiags.py b/src/python/pcmdi/scripts/driver/rundiags.py deleted file mode 100755 index 393515968..000000000 --- a/src/python/pcmdi/scripts/driver/rundiags.py +++ /dev/null @@ -1,182 +0,0 @@ -import logging -import json -import pcmdi_metrics.driver.outputmetrics -import pcmdi_metrics.driver.observation -import pcmdi_metrics.driver.model -import pcmdi_metrics.driver.dataset - - -class RunDiags(object): - - def __init__(self, parameter): - logging.basicConfig(level=logging.DEBUG) - - self.parameter = parameter - self.obs_dict = {} - self.regions_dict = {} - self.var = '' - self.output_metric = None - self.region = '' - self.sftlf = pcmdi_metrics.driver.dataset.DataSet.create_sftlf(self.parameter) - self.default_regions = [] - self.regions_specs = {} - - def __call__(self): - self.run_diags() - - def run_diags(self): - self.obs_dict = self.load_obs_dict() - self.regions_dict = self.create_regions_dict() - - for self.var_name_long in self.parameter.vars: - self.var = self.var_name_long.split('_')[0] - - if self.var not in self.obs_dict: - logging.error('Var %s not in obs_dict' % self.var) - continue - - self.output_metric = pcmdi_metrics.driver.outputmetrics.OutputMetrics( - self.parameter, self.var_name_long, - self.obs_dict, sftlf=self.sftlf) - - for region in self.regions_dict[self.var]: - self.region = self.create_region(region) - # Need to add the region to the output dict now b/c - # otherwise if done later, sometimes it's not added due to - # premature break in the for loops for reference and test. - self.output_metric.add_region(self.region) - # Runs obs vs obs, obs vs model, or model vs model - self.run_reference_and_test_comparison() - self.output_metric.write_on_exit() - - def load_obs_dict(self): - obs_file_name = 'obs_info_dictionary.json' - obs_json_file = pcmdi_metrics.driver.dataset.DataSet.load_path_as_file_obj(obs_file_name) - obs_dict = json.loads(obs_json_file.read()) - obs_json_file.close() - - if hasattr(self.parameter, 'custom_observations'): - # Can't use load_path_as_file_obj() b/c might not be in /share/ - cust_obs_json_file = open(self.parameter.custom_observations) - obs_dict.update(json.load(cust_obs_json_file)) - cust_obs_json_file.close() - return obs_dict - - def create_regions_dict(self): - self.load_default_regions_and_regions_specs() - - regions_dict = {} - for var_name_long in self.parameter.vars: - var = var_name_long.split('_')[0] - regions = self.parameter.regions - region = regions.get(var, self.default_regions) - if not isinstance(region, (list, tuple)): - region = [region] - if None in region: - region.remove(None) - for r in self.default_regions: - region.insert(0, r) - regions_dict[var] = region - - return regions_dict - - def load_default_regions_and_regions_specs(self): - default_regions_file = \ - pcmdi_metrics.driver.dataset.DataSet.load_path_as_file_obj('default_regions.py') - execfile(default_regions_file.name) - default_regions_file.close() - try: - self.default_regions = locals()['default_regions'] - self.regions_specs = locals()['regions_specs'] - except KeyError: - logging.error('Failed to open default_regions.py') - - region_values = self.parameter.regions_values - region_values.update(getattr(self.parameter, "regions_values", {})) - # Now need to edit regions_specs - for region in region_values: - insert_dict = {'value': region_values[region]} - if region in self.regions_specs: - self.regions_specs[region].update(insert_dict) - else: - self.regions_specs[region] = insert_dict - self.regions_specs.update(getattr(self.parameter, - "regions_specs", {})) - - def create_region(self, region): - if isinstance(region, basestring): - region_name = region - region = self.regions_specs.get( - region_name, - self.regions_specs.get(region_name.lower())) - region['id'] = region_name - elif region is None: - # It's okay if region == None - pass - else: - raise Exception('Unknown region: %s' % region) - return region - - def run_reference_and_test_comparison(self): - reference_data_set = self.parameter.reference_data_set - test_data_set = self.parameter.test_data_set - - # Member variables are used so when it's obs_vs_model, we know - # which is which. - reference_data_set_is_obs = self.is_data_set_obs(reference_data_set) - test_data_set_is_obs = self.is_data_set_obs(test_data_set) - - # If reference or test are obs, the data sets themselves need to - # be modified. - if reference_data_set_is_obs: - reference_data_set = pcmdi_metrics.driver.observation.Observation.setup_obs_list_from_parameter( - reference_data_set, self.obs_dict, self.var) - if test_data_set_is_obs: - test_data_set = pcmdi_metrics.driver.observation.Observation.setup_obs_list_from_parameter( - test_data_set, self.obs_dict, self.var) - - # self.reference/self.test are either an obs or model - for self.reference in reference_data_set: - try: - ref = self.determine_obs_or_model(reference_data_set_is_obs, - self.reference, self.parameter.reference_data_path) - # TODO Make this a custom exception. This exception is for - # when a model doesn't have sftlf for a given region - except RuntimeError: - continue - - for self.test in test_data_set: - try: - test = self.determine_obs_or_model(test_data_set_is_obs, - self.test, self.parameter.test_data_path) - # TODO Make this a custom exception. This exception is for - # when a model doesn't have sftlf for a given region - except RuntimeError: - continue - - try: - self.output_metric.calculate_and_output_metrics(ref, test) - except RuntimeError: - break - - def is_data_set_obs(self, data_set): - if 'all' in data_set: - return True - data_set_is_obs = True - # If an element of data_set is not in the obs_dict, then - # data_set is a model. - for obs in data_set: - if obs not in self.obs_dict[self.var]: - data_set_is_obs = False - break - return data_set_is_obs - - def determine_obs_or_model(self, is_obs, ref_or_test, data_path): - if is_obs: - print 'OBS' - return pcmdi_metrics.driver.observation.Observation(self.parameter, self.var_name_long, self.region, - ref_or_test, self.obs_dict, data_path, self.sftlf) - else: - print 'MODEL' - return pcmdi_metrics.driver.model.Model(self.parameter, self.var_name_long, self.region, - ref_or_test, self.obs_dict, data_path, self.sftlf) diff --git a/src/python/pcmdi/scripts/pcmdi_metrics_driver.py b/src/python/pcmdi/scripts/pcmdi_metrics_driver.py index 6156f39be..25196ae5b 100644 --- a/src/python/pcmdi/scripts/pcmdi_metrics_driver.py +++ b/src/python/pcmdi/scripts/pcmdi_metrics_driver.py @@ -1,4 +1,199 @@ #!/usr/bin/env python -import pcmdi_metrics.driver.pmp_driver +import logging +import json +from pcmdi_metrics.driver.outputmetrics import OutputMetrics +from pcmdi_metrics.driver.observation import Observation +from pcmdi_metrics.driver.model import Model +import pcmdi_metrics.driver.dataset +import pcmdi_metrics.driver.dataset.pmp_parser -pcmdi_metrics.driver.pmp_driver.PMPDriver() + +class PMPDriver(object): + + def __init__(self, parameter): + logging.basicConfig(level=logging.DEBUG) + + self.parameter = parameter + self.obs_dict = {} + self.regions_dict = {} + self.var = '' + self.output_metric = None + self.region = '' + self.sftlf = pcmdi_metrics.driver.dataset.DataSet.create_sftlf(self.parameter) + self.default_regions = [] + self.regions_specs = {} + + def __call__(self): + self.run_diags() + + def run_diags(self): + ''' Runs the diagnostics. What did you think it did? ''' + self.obs_dict = self.load_obs_dict() + self.regions_dict = self.create_regions_dict() + + for self.var_name_long in self.parameter.vars: + self.var = self.var_name_long.split('_')[0] + + if self.var not in self.obs_dict: + logging.error('Variable %s not in obs_dict' % self.var) + continue + + self.output_metric = OutputMetrics(self.parameter, self.var_name_long, + self.obs_dict, sftlf=self.sftlf) + + for region in self.regions_dict[self.var]: + self.region = self.create_region(region) + # Need to add the region to the output dict now b/c + # otherwise if done later, sometimes it's not added due to + # premature break in the for loops for reference and test. + self.output_metric.add_region(self.region) + # Runs obs vs obs, obs vs model, or model vs model + self.run_reference_and_test_comparison() + self.output_metric.write_on_exit() + + def load_obs_dict(self): + ''' Loads obs_info_dictionary.json and appends + custom_observations from the parameter file if needed. ''' + obs_file_name = 'obs_info_dictionary.json' + obs_json_file = pcmdi_metrics.driver.dataset.DataSet.load_path_as_file_obj(obs_file_name) + obs_dict = json.loads(obs_json_file.read()) + obs_json_file.close() + + if hasattr(self.parameter, 'custom_observations'): + # Can't use load_path_as_file_obj() b/c might not be in /share/ + cust_obs_json_file = open(self.parameter.custom_observations) + obs_dict.update(json.load(cust_obs_json_file)) + cust_obs_json_file.close() + return obs_dict + + def create_regions_dict(self): + ''' Creates a dict from self.default_regions. ''' + self.load_default_regions_and_regions_specs() + + regions_dict = {} + for var_name_long in self.parameter.vars: + var = var_name_long.split('_')[0] + regions = self.parameter.regions + region = regions.get(var, self.default_regions) + if not isinstance(region, (list, tuple)): + region = [region] + if None in region: + region.remove(None) + for r in self.default_regions: + region.insert(0, r) + regions_dict[var] = region + + return regions_dict + + def load_default_regions_and_regions_specs(self): + ''' Gets the default_regions dict and regions_specs dict + from default_regions.py and stores them as attributes. ''' + default_regions_file = \ + pcmdi_metrics.driver.dataset.DataSet.load_path_as_file_obj('default_regions.py') + execfile(default_regions_file.name) + default_regions_file.close() + try: + self.default_regions = locals()['default_regions'] + self.regions_specs = locals()['regions_specs'] + except KeyError: + logging.error('Failed to open default_regions.py') + + region_values = self.parameter.regions_values + region_values.update(getattr(self.parameter, "regions_values", {})) + # Now need to edit regions_specs + for region in region_values: + insert_dict = {'value': region_values[region]} + if region in self.regions_specs: + self.regions_specs[region].update(insert_dict) + else: + self.regions_specs[region] = insert_dict + self.regions_specs.update(getattr(self.parameter, + "regions_specs", {})) + + def create_region(self, region): + ''' From the argument region, it gets that region from self.regions_specs + (which itself is loaded from default_regions.py) ''' + if isinstance(region, basestring): + region_name = region + region = self.regions_specs.get( + region_name, + self.regions_specs.get(region_name.lower())) + region['id'] = region_name + elif region is None: + # It's okay if region == None + pass + else: + raise Exception('Unknown region: %s' % region) + return region + + def run_reference_and_test_comparison(self): + ''' Does the (obs or model) vs (obs or model) comparison. ''' + reference_data_set = self.parameter.reference_data_set + test_data_set = self.parameter.test_data_set + + reference_data_set_is_obs = self.is_data_set_obs(reference_data_set) + test_data_set_is_obs = self.is_data_set_obs(test_data_set) + + # If either the reference or test are obs, the data sets + # themselves need to be modified. + if reference_data_set_is_obs: + reference_data_set = Observation.setup_obs_list_from_parameter( + reference_data_set, self.obs_dict, self.var) + if test_data_set_is_obs: + test_data_set = Observation.setup_obs_list_from_parameter( + test_data_set, self.obs_dict, self.var) + + # self.reference/self.test are either an obs or model + for reference in reference_data_set: + try: + ref = self.determine_obs_or_model(reference_data_set_is_obs, + reference, self.parameter.reference_data_path) + # TODO Make this a custom exception. This exception is for + # when a model doesn't have sftlf for a given region + except RuntimeError: + continue + + for test in test_data_set: + try: + tst = self.determine_obs_or_model(test_data_set_is_obs, + test, self.parameter.test_data_path) + # TODO Make this a custom exception. This exception is for + # when a model doesn't have sftlf for a given region + except RuntimeError: + continue + + try: + self.output_metric.calculate_and_output_metrics(ref, tst) + except RuntimeError: + break + + def is_data_set_obs(self, data_set): + ''' Is data_set (which is either a test or reference) an obs? ''' + if 'all' in data_set: + return True + data_set_is_obs = True + # If an element of data_set is not in the obs_dict, then + # data_set is a model. + for obs in data_set: + if obs not in self.obs_dict[self.var]: + data_set_is_obs = False + break + return data_set_is_obs + + def determine_obs_or_model(self, is_obs, ref_or_test, data_path): + ''' Actually create Observation or Module object + based on if ref_or_test is an obs or model. ''' + if is_obs: + logging.info('%s is an obs' % ref_or_test) + return Observation(self.parameter, self.var_name_long, self.region, + ref_or_test, self.obs_dict, data_path, self.sftlf) + else: + logging.info('%s is a model' % ref_or_test) + return Model(self.parameter, self.var_name_long, self.region, + ref_or_test, self.obs_dict, data_path, self.sftlf) + + +parser = pcmdi_metrics.driver.pmp_parser.PMPParser() +parameter = parser.get_parameter() +driver = PMPDriver(parameter) +driver.run_diags()