-
Notifications
You must be signed in to change notification settings - Fork 4
/
Result_winter_shift_in_different_stations.py
206 lines (193 loc) · 8.18 KB
/
Result_winter_shift_in_different_stations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
"""Analyse extreme surges in different stations with monthly GEV models.
This code implements the analysis presented by Reinert et al. (2021)
in Section 4.4.
Written by Markus Reinert, August 2020 to November 2021.
"""
import numpy as np
from scipy import optimize
from matplotlib import pyplot as plt
from advanced_GEV_analysis import negative_log_likelihood, Modifiers, get_month_selection
from tools_surge import load_data, Timeseries
# Select months of interest (here: March and October)
selected_months = [3, 10]
# Select stations of interest
cities = [
"Newlyn",
"Cherbourg",
"Le Havre",
"Brest (our data)",
"Brest (GESLA-2)",
"La Rochelle",
"Saint-Jean-de-Luz",
"Santander",
"La Coruna",
"Vigo",
]
# Select the period of interest
year_start = 1950
year_end = 2000
# Define ranges for initial parameter values
# Explanation:
# To fit the time-dependent GEV model to monthly maxima using
# scipy.optimize.minimize, we need to specify an initial value for each
# parameter of the model. These initial values should be close to
# parameter values of the best fit. Since the best fit varies from
# station to station and from month to month, there is probably no set
# of initial values that works well for every station. Therefore, we
# search at first for the best fit over a discrete space of common
# parameter values, and we then use these values as initial values to
# compute the actual best fit. With the values given here, determining
# the initial values takes between 1 and 2 seconds on a normal PC, which
# we find an acceptable computation time.
mu0_range = np.linspace(5, 40, 5)
mu1_range = np.linspace(-50, 50, 10)
si0_range = np.linspace(5, 25, 5)
si1_range = np.linspace(-50, 50, 10)
xi_range = np.linspace(-0.6, 0.2, 11)
n_combinations = (
mu0_range.size * mu1_range.size * si0_range.size * si1_range.size * xi_range.size
)
print("Number of initial value combinations:", n_combinations)
t_calculation = 50e-6 # time typically required to compute negative_log_likelihood
print(f"Testing all combinations will take about {n_combinations*t_calculation:.1f} seconds")
# Compute for each station the linear trend in each month of interest
# and over the whole year, as well as their standard errors
linear_trends = {month: [] for month in selected_months}
linear_trends["year"] = []
linear_trends_std = {month: [] for month in selected_months}
linear_trends_std["year"] = []
for city in cities:
print("")
# Load the surge dataset of this city for the period of interest
if city == "Brest (our data)":
data = load_data("Brest", Timeseries.SKEW_SURGE, year_start, year_end)
elif city == "Brest (GESLA-2)":
data = load_data("Brest", Timeseries.SKEW_SURGE_GESLA, year_start, year_end)
else:
data = load_data(city, Timeseries.SKEW_SURGE_GESLA, year_start, year_end)
# Get (date-)time and height of all monthly maxima,
# and sort those of interest by month
t_MM = {month: [] for month in selected_months}
t_MM["year"] = []
h_MM = {month: [] for month in selected_months}
h_MM["year"] = []
for year in range(data["year_start"], data["year_end"] + 1):
for month in range(1, 13):
sel = get_month_selection(year, month, data["t"])
if np.any(sel):
i_max = np.argmax(data["h"][sel])
t_MM["year"].append(data["t"][sel][i_max])
h_MM["year"].append(data["h"][sel][i_max])
if month in selected_months:
t_MM[month].append(data["t"][sel][i_max])
h_MM[month].append(data["h"][sel][i_max])
for k in t_MM:
t_MM[k] = np.array(t_MM[k])
h_MM[k] = np.array(h_MM[k])
# Fit a GEV model with annual cycles and linear trends
# to all monthly maxima to extract the overall trend
with np.errstate(invalid="ignore"):
# Do not warn when the function value infinity occurs in the optimisation
result_full_year = optimize.minimize(
negative_log_likelihood,
[20, 1, 1, 1, 15, 1, 1, 1, -0.1],
args=(
h_MM["year"],
t_MM["year"],
[Modifiers.LINEAR_TREND, Modifiers.SEASONAL_OSCILLATION],
[Modifiers.LINEAR_TREND, Modifiers.SEASONAL_OSCILLATION],
),
options={"gtol": 1e-4},
)
if not result_full_year.success:
print("Warning:", result_full_year.message)
fit_params = result_full_year.x
fit_errors = np.sqrt(np.diag(result_full_year.hess_inv))
for i, name in enumerate([
"mu", "mu_trend", "mu_cos", "mu_sin", "sigma", "si_trend", "si_cos", "si_sin", "xi"
]):
print("{:8} = {:7.4f} ± {:.4f}".format(name, fit_params[i], fit_errors[i]))
linear_trends["year"].append(fit_params[1])
linear_trends_std["year"].append(fit_errors[1])
# Compute the linear trends in the months of interest
for month in selected_months:
print("Month:", month)
print("#years =", t_MM[month].size)
# Find good initial parameters for the fit
least_nLL = np.infty
best_mu0 = fit_params[0]
best_mu1 = 1.0
best_si0 = fit_params[4]
best_si1 = 1.0
best_xi = fit_params[-1]
for mu0 in mu0_range:
for mu1 in mu1_range:
for si0 in si0_range:
for si1 in si1_range:
for xi in xi_range:
nLL = negative_log_likelihood(
[mu0, mu1, si0, si1, xi],
h_MM[month],
t_MM[month],
[Modifiers.LINEAR_TREND],
[Modifiers.LINEAR_TREND],
)
if nLL < least_nLL:
least_nLL = nLL
best_mu0 = mu0
best_mu1 = mu1
best_si0 = si0
best_si1 = si1
best_xi = xi
# Fit GEV model with linear trends in the parameters for this month
with np.errstate(invalid="ignore"):
# Do not warn when the function value infinity occurs in the optimisation
result_month = optimize.minimize(
negative_log_likelihood,
[best_mu0, best_mu1, best_si0, best_si1, best_xi],
args=(
h_MM[month], t_MM[month], [Modifiers.LINEAR_TREND], [Modifiers.LINEAR_TREND]
),
options={"gtol": 1e-4},
)
if not result_month.success:
print("Warning:", result_month.message)
fit_params_month = result_month.x
fit_errors_month = np.sqrt(np.diag(result_month.hess_inv))
for i, name in enumerate(["mu", "mu_trend", "sigma", "si_trend", "xi"]):
print("{:8} = {:7.4f} ± {:.4f}".format(
name, fit_params_month[i], fit_errors_month[i]
))
linear_trends[month].append(fit_params_month[1])
linear_trends_std[month].append(fit_errors_month[1])
fig, ax = plt.subplots(figsize=(15, 6))
ax.set_title(
"Relative linear trend of the location parameter for the period {} to {}".format(
year_start, year_end
) + "\n(error bars denote 95 % confidence intervals)"
)
ax.set_ylabel("Difference of monthly and yearly trends [cm/century]")
for month in selected_months:
# Denote with error bars the 95 % confidence intervals.
# The 95 % confidence intervals are computed by multiplying
# standard errors with 1.96. The standard error is computed by
# combining the standard errors of month and full year according
# to the standard formula for error propagation.
ax.errorbar(
cities,
[
linear_trends[month][i_city] - linear_trends["year"][i_city]
for i_city in range(len(cities))
],
[
1.96 * np.sqrt(
linear_trends_std[month][i_city]**2 + linear_trends_std["year"][i_city]**2
)
for i_city in range(len(cities))
],
fmt="o",
label="Month: {}".format(month),
)
ax.legend()
ax.grid(linestyle=":")
plt.show()