Source code for scores.ensemble

# -*- coding: utf-8 -*-
"""
Created on 20 March 2018

@author: lafaysse
"""

import numpy as np
from snowtools.utils.prosimu import prosimu

ESMSnowMIP_dicvarnames = dict(snowdepth="snd_auto", snowswe="snw_auto", snowdepthman="snd_man",
                              snowsweman="snw_man", tsurf="ts", albedo="albs")
"""
dict mapping common variable names to ESMSnowMIP variable names
"""
ESMSnowMIP_alternate_varnames = dict(snd_auto="snd_can_auto")
"""
dict mapping ESMSnowMIP_alternate_varnames
"""

SURFEX_dicvarnames = dict(snowdepth="DSN_T_ISBA", snowswe="WSN_T_ISBA", snowdepthman="DSN_T_ISBA",
                          snowsweman="WSN_T_ISBA", tsurf="TS_ISBA", albedo="ASN_ISBA")
"""
dict mapping common variable names to surfex variable names
"""


[docs] class EnsembleScores(object): """ Ensemble scores """ startwinter = 10 endwinter = 6 def __init__(self, timeObs, timeSim, obs, ensemble): """ :param timeObs: :param timeSim: :param obs: array of observations dimension : time :param ensemble: array of forecasts dimensions (member, time) """ self.obsCommon = obs self.ensCommon = ensemble
[docs] def CRPS(self): """ CRPS is computed from https://pypi.python.org/pypi/properscoring#downloads :return: mean CRPS over the data period :rtype: float """ # for each date, sort members (increasingly) # one line is not anymore one single member ens_common = np.sort(self.ensCommon, axis=0) evenement = 0 # compute the integral for each obs CRPSVector = np.ones(len(self.obsCommon)) for obs in self.obsCommon: # cumulated distribution function # Heaviside function for obs obsCDF = 0 ensembleCDF = 0 precPrevision = 0 integrale = 0 n = 0 # Number of valid members at this date valid = np.ma.masked_invalid(ens_common[:, evenement]) n_valid = np.ma.count(valid) if n_valid > 0: for prevision in valid.compressed(): """ if np.isnan(prevision): #If the first forecast is nan, they all are # (nan at the end in the sort step) # --> do not compute crps if n==0: integral=np.nan #otherwise prevision = precPrevision break """ # immediately after obs if obsCDF == 0 and obs < prevision: integrale += (obs - precPrevision) * (ensembleCDF ** 2) integrale += (prevision - obs) * (ensembleCDF - 1) ** 2 obsCDF = 1. # otherwise else: integrale += ((prevision - precPrevision) * (ensembleCDF - obsCDF) ** 2) # add 1/Ndate to CDF ensemble ensembleCDF += 1. / float(n_valid) precPrevision = prevision n += 1 # if obs > all forecasts if obsCDF == 0: integrale += obs - prevision CRPSVector[evenement] = integrale # if no simulations for this obs else: CRPSVector[evenement] = np.nan evenement += 1 CRPS = np.mean(np.ma.masked_invalid(CRPSVector)) return CRPS
[docs] def CRPS_decomp(self): """ BC implementing the Hersbach et al. formulation for decomposition. inspired on https://github.com/brankart/ensdam/blob/master/src/EnsScores/score_crps.F90 aggregated assuming that realizations are independant. :return: CRPS, Reliability, potential CRPS """ ens_common = np.sort(self.ensCommon, axis=0) nbmb = np.shape(ens_common)[0] ai = np.zeros((nbmb + 1,)) bi = np.zeros((nbmb + 1,)) pi = np.arange(0, nbmb + 1.) / nbmb oi_0 = 0. oi_nbmb = len(self.obsCommon) evenement = 0 for obs in self.obsCommon: if not np.ma.is_masked(obs): # Number of valid members at this date valid = np.ma.masked_invalid(ens_common[:, evenement]) ens = valid.compressed() n_valid = np.ma.count(valid) if n_valid > 0: # obs smaller than all if obs < ens[0]: ai[0] += 0. bi[0] += ens[0] - obs oi_0 += 1. # obs bigger than all elif obs > ens[nbmb - 1]: ai[nbmb] += obs - ens[nbmb - 1] bi[nbmb] += 0. oi_nbmb -= 1. # obs within for i in range(1, nbmb): if obs >= ens[i]: ai[i] += ens[i] - ens[i - 1] bi[i] += 0. elif (ens[i] > obs) and (obs >= ens[i - 1]): ai[i] += obs - ens[i - 1] bi[i] += ens[i] - obs elif obs < ens[i - 1]: ai[i] += 0. bi[i] += ens[i] - ens[i - 1] else: raise Exception else: raise Exception evenement += 1 ai_avg = ai / len(self.obsCommon) bi_avg = bi / len(self.obsCommon) gi_avg = np.zeros(np.shape(ai_avg)) oi_avg = np.zeros(np.shape(ai_avg)) for i in range(1, nbmb): gi_avg[i] = ai_avg[i] + bi_avg[i] if gi_avg[i] > 0.: oi_avg[i] = bi_avg[i] / gi_avg[i] oi_avg[0] = oi_0 / len(self.obsCommon) oi_avg[nbmb] = oi_nbmb / len(self.obsCommon) if oi_avg[0] > 0.: gi_avg[0] = bi_avg[0] / oi_avg[0] if oi_avg[nbmb] < 1: gi_avg[nbmb] = ai_avg[nbmb] / (1. - oi_avg[nbmb]) CRPS = np.nansum(ai_avg * pi ** 2 + bi_avg * (1. - pi)**2) Reli = np.nansum(gi_avg * (oi_avg - pi)**2) CRPS_pot = np.nansum(gi_avg * oi_avg * (1. - oi_avg)) return CRPS, Reli, CRPS_pot
@property def meanEnsemble(self): """ ensemble mean, dealing with only some members valid :return: ensemble mean for every date """ return np.ma.masked_invalid(self.ensCommon).mean(axis=0)
[docs] def dispersionEnsemble(self): """ calculate ensemble spread, the RMSE of the ensemble mean and the spread/skill relationship :return: spread, RMSE of ensemble mean, spread/skill """ # bc 21/10/19 code optim disp = np.sqrt(np.mean([np.mean((np.ma.masked_invalid(m) - self.meanEnsemble)**2) for m in self.ensCommon])) rmse_mean = np.sqrt(np.mean(np.square(self.meanEnsemble - self.obsCommon))) return disp, rmse_mean, disp / rmse_mean
[docs] class ESCROC_EnsembleScores(EnsembleScores): def __init__(self, profiles, obsfile, varname): """ :param profiles: list of simulation files (one per ensemble member :param obsfile: observation file (netCDF format) :param varname: variable name """ self.read(profiles, obsfile, varname)
[docs] def read_var_ifpresent(self, dataNc, varname, convert1d=False): """ read a variable if present in file else return an empty numpy array. convert degrees C to Kelvin if the variable name is Ts and convert1d=False. :param dataNc: dataset :type dataNc: prosimu :param varname: variable name :type varname: str :param convert1d: :type convert1d: bool :return: data array :rtype: np.array """ if varname not in dataNc.listvar(): if varname in list(ESMSnowMIP_alternate_varnames.keys()): varname = ESMSnowMIP_alternate_varnames[varname] if varname in dataNc.listvar(): if convert1d: array = dataNc.read(varname, selectpoint=0) if varname == "ts": array = np.where(array > 273.16, np.nan, array) return array else: if varname == "ts": delta = 273.15 else: delta = 0 return dataNc.read(varname) + delta else: ntime_nc = dataNc.getlendim("time") emptyvar = np.empty(ntime_nc, "float") emptyvar[:] = np.nan return emptyvar
[docs] def read_sim_ifpresent(self, dataNc, varname): """ read a variable from simulation file :param dataNc: dataset :type dataNc: prosimu :param varname: variable name to read :type varname: str :return: """ return self.read_var_ifpresent(dataNc, varname, convert1d=True)
[docs] def varsimname(self, varname): """ convert variable name to variable name in SURFEX output :param varname: variable name :type varname: str :return: corresponding value from `SURFEX_dicvarnames` :rtype: str """ return SURFEX_dicvarnames[varname]
[docs] def varobsname(self, varname): """ convert variable name to ESMSnowMIP variable name :param varname: variable name :type varname: str :return: corresponding value from `ESMSnowMIP_dicvarnames` :rtype: str """ return ESMSnowMIP_dicvarnames[varname]
[docs] def read(self, profiles, obsfile, varname): """ read simulation files and observation file and extract dates with available observations in winter. sets self.obsCommon and self.ensCommon :param profiles: list of simulation files (one per ensemble member, netCDF format) :type profiles: list of path-likes :param obsfile: observation file in NetCDF format :type obsfile: pathlike :param varname: variable to be read """ for p, profile in enumerate(profiles): print("open file " + profile) data_sim = prosimu(profile) if p == 0: time_sim = data_sim.readtime() ensemble = np.empty((len(profiles), len(time_sim)), "float") var_sim = self.read_sim_ifpresent(data_sim, self.varsimname(varname)) ensemble[p, :] = var_sim print("close file") data_sim.close() print("open obs") data_obs = prosimu(obsfile) time_obs = data_obs.readtime() var_obs = self.read_var_ifpresent(data_obs, self.varobsname(varname)) data_obs.close() print("close obs") print(var_obs.shape) '''Extract common date between observations and simulations''' # Identify winter period winter = np.empty_like(time_obs, 'bool') for i, t in enumerate(time_obs): winter[i] = t.month >= self.startwinter or t.month <= self.endwinter # First reduce observation vector to available observations and winter ind_obs_ok = np.invert(np.isnan(var_obs)) & winter print("number of valid obs in winter") print(np.sum(ind_obs_ok)) time_obs_ok = time_obs[ind_obs_ok] obs_ok = var_obs[ind_obs_ok] mask_sim = np.in1d(time_sim, time_obs_ok) mask_obs = np.in1d(time_obs_ok, time_sim) self.obsCommon = obs_ok[mask_obs] self.ensCommon = ensemble[:, mask_sim]