-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPrCA.py
164 lines (154 loc) · 7.21 KB
/
PrCA.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
#!/usr/bin/python
# import modules used here -- sys is a very standard one
import sys
import numpy as np
import scipy as sp
import netCDF4 as nc
import scipy.linalg as la
def PrCA(YY,YYi,signal,noise,mask,neofs,surf='l',MCarlo=False,nmonte=1000):
"""
Subroutine that calculates the Predictable Component Analysis of a set of numerical simulations
Input:
YY: Eigenfunction.
Dimensions [# of points in latitude x # of points in longitude, # of modes]
YYi: Pseudo Enverse Eigenfunction.
Dimensions [# of points in latitude x # of points in longitude, # of modes]
YY and YYi are defined in the function laplacians.py
signal: 3D array with signal values (i.e. ensemble mean of simulation anomalies).
Dimension [# of points in time, #of points in longitude]
noise: 4D array with noise values (i.e. simulation anomalies minus signal).
Dimension [# of points in time, # of simulation members, # of points in latitude,
#of points in longitude]
mask: 2D array with value 0 (zero) over the ocean and value 1 over land
neofs: integer that indicates the number of modes to be retained
surf: type of surface of interest ('l' for land; 'o' for ocean; 'a' for both land and ocean).
Default is land
MCarlo (optional): when set to True the code performs a Monte Carlo simulation of the
maximized signal-to-noise rations
nmonte: number of iterations in the Monte Carlo simulation
Output:
evalue: Eigenvalues maximizing signal-to-noise ratio
DImensions [# of modes]
evector: Eigenvectors maximizing signal-to-noise ratio
Dimensions [# of modes, #of modes]
tseries: time series of each Predictable component.
Dimensions [time, # of modes]
spat: Spatial Pattern of the Predictable Components.
Dimensions [# of points in latitude, # of points in longitude, # of modes]
nsig: 2D array with the upper (95th percentile) and lower (5th percentile) boundaries
of the maximized signal-to-noise ratios according to a Monte Carlo simulation.
Dimensions [# of modes, 2]
nsig[:,0] --> lower boundary (5th percentile)
nsig[:,1] --> upper boundary (95th percentile)
Examples:
---------
For 20 modes over land only:
>>> evalue, evector, tseries, spat, nsig = PrCA(YY,YYi,signal,noise,mask,20)
For 10 modes over the ocean:
>>> evalue, evector, tseries, spat, nsig = PrCA(YY,YYi,signal,noise,mask,10,'o')
For 10 modes over the ocean, including a Monte Carlo simulation with 1000 iterations:
>>> evalue, evector, tseries, spat, nsig = PrCA(YY,YYi,signal,noise,mask,10,'o',True,1000)
Recommendation: run the with MCarlo set to False while testing for the number of optimum
PrCAs to retain. The Monte Carlo simulation will decrease the code's
efficiency.
Suplementary information:
---------
Projected precipitation time series [phat; as in Fig. 11a of Bombardi et al. (2018)]
can be calculated as:
nyrs = # of points in time
nlat = # of points in latitude
nlon = # of points in longitude
signal = observed precipitation anomalies. Dimesnions [nyrs, nlat, nlon]
fld=signal.reshape(nyrs,nlat*nlon)
if surf == 'a':
id = (mask.reshape(nlat*nlon) > -1.)
if surf == 'l':
id = (mask.reshape(nlat*nlon) == 1.)
if surf == 'o':
id = (mask.reshape(nlat*nlon) == 0.)
pcsig = np.matmul(fld[:,id],YYi[id,0:neofs])
phat=np.matmul(pcsig,evector)
Conversely, the input data (sim) can be reconstructed as:
simhat = np.matmul(tseries.transpose(),pcsig)
deof=np.zeros((nlon*nlat,neofs))
if surf == 'a':
id = (mask.reshape(nlat*nlon) > -1.)
if surf == 'l':
id = (mask.reshape(nlat*nlon) == 1.)
if surf == 'o':
id = (mask.reshape(nlat*nlon) == 0.)
deof[id,:] = np.matmul(YY[id,0:10],simhat.transpose())
tmp=np.matmul(deof,tseries.transpose())
sim=tmp.reshape(nlat,nlon,ntmp)
"""
# defining dimentions
ntmp=noise[:,0,0,0].size
nens=noise[0,:,0,0].size
nlat=noise[0,0,:,0].size
nlon=noise[0,0,0,:].size
if surf != 'l' and \
surf != 'o' and \
surf != 'a':
warnings.warn('Invalid value for surf')
if mask.max() > 1. or mask.min() < 0.:
warnings.warn('Invalid values found in mask')
if surf == 'a':
id = (mask.reshape(nlat*nlon) > -1.)
if surf == 'l':
id = (mask.reshape(nlat*nlon) == 1.)
if surf == 'o':
id = (mask.reshape(nlat*nlon) == 0.)
#---- Calculating eof of signal
fld=signal.reshape(ntmp,nlat*nlon)
pcsig = np.matmul(fld[:,id],YYi[id,0:neofs])
scov = np.matmul(pcsig.transpose(),pcsig)/float(ntmp)
#---- Calculating eof of noise
ncov=np.zeros((neofs,neofs))
pcnoise=np.zeros((ntmp,nens,neofs))
fld=noise.reshape(ntmp,nens,nlat*nlon)
tmpcov=np.zeros((ntmp,neofs,neofs))
for yt in range(0,ntmp):
tmp1 = fld[yt,:,:]
pcnoise[yt,:,:] = np.matmul(tmp1[:,id],YYi[id,0:neofs])
tmp2 = pcnoise[yt,:,:]
tmpcov[yt,:,:]= tmpcov[yt,:,:] + np.matmul(tmp2.transpose(),tmp2)/float(nens)
ncov = np.mean(tmpcov,0)
evalue,evector = la.eig(scov,ncov,right=True)
rankl=evalue.ravel().argsort()[::-1]
# signal-to-noise eigenfunction
evector = evector[:,rankl]
evalue=evalue[rankl]
# PrCA time series
tseries=np.matmul(pcsig,evector)
#---- Printing spatial patterns
spat=(np.matmul(np.matmul(YY[:,0:neofs],ncov),evector)).reshape(nlat,nlon,neofs)
#---- Monte Carlo simulation
nsig = np.zeros((neofs,2))
if MCarlo:
nlambda=np.zeros((neofs,nmonte))
for nt in range(0,nmonte):
nnpc1=np.random.randn(ntmp,neofs,nens)
nspc=np.mean(nnpc1,axis=2) #calculating mean of nens axis in nnpc matrix
nspc[:,:] = nspc[:,:] - np.mean(nspc[:,:])
scov2 = np.matmul(nspc.transpose(),nspc)/float(ntmp)
ncov2=np.zeros((neofs,neofs))
nnpc2=np.zeros((ntmp,neofs,nens))
tmpcov=np.zeros((ntmp,neofs,neofs))
for et in range(0,nens):
nnpc2[:,:,et] = nnpc1[:,:,et] - nspc[:,:]
for yt in range(0,ntmp):
tmp = nnpc2[yt,:,:]
tmpcov[yt,:,:] = np.matmul(tmp,tmp.transpose())/float(nens)
ncov2[:,:]=np.mean(tmpcov,0)
evalue2,evector2 = la.eig(scov2,ncov2,right=True)
rankl=evalue2.ravel().argsort()[::-1]
evector2 = evector2[:,rankl]
nlambda[:,nt]=evalue2[rankl]
#---- Calculating the upper (95th percentile) and lower (5th percentile) boundaries
# of the maximized signal-to-noise ratios according to a Monte Carlo simulation
for nt in range(0,neofs):
tmp1=nlambda[nt,:]
tmp2=tmp1[np.isfinite(tmp1)]
nsig[nt,0]=np.percentile(tmp2,5.,interpolation='midpoint')
nsig[nt,1]=np.percentile(tmp2,95.,interpolation='midpoint')
return evalue, evector, tseries, spat, nsig