import pandas as pd import numpy as np import matplotlib.pylab as plt from matplotlib.legend_handler import HandlerLine2D from matplotlib.pylab import rcParams rcParams['figure.figsize'] = 15, 9
import seaborn as sns
import lmoments3 from lmoments3 import distr
from plotnine.ggplot import * from plotnine.qplot import * from plotnine.geoms import * from plotnine.coords import * from plotnine.labels import * from plotnine.facets import * from plotnine.scales import * from plotnine.themes.theme import *
在统计学中,自助法(Bootstrap Method,Bootstrapping,或自助抽样法)是一种从给定训练集中有放回的均匀抽样,也就是说,每当选中一个样本,它等可能地被再次选中并被再次添加到训练集中。自助法由Bradley Efron于1979年在《Annals of Statistics》上发表。当样本来自总体,能以正态分布来描述,其抽样分布(Sampling Distribution)为正态分布(The Normal Distribution);但当样本来自的总体无法以正态分布来描述,则以渐进分析法、自助法等来分析。采用随机可置换抽样(random sampling with replacement)。对于小数据集,自助法效果很好
import bootstrap from collections import OrderedDict
defextreme_bsci(df, gevfit): # Calculate confidence intervals using parametric bootstrap and the # percentil interval method # This is used to obtain confidence intervals for the estimators and # the return values for several return values. # More info about bootstrapping can be found on: # - https://github.com/cgevans/scikits-bootstrap # - Efron: "An Introduction to the Bootstrap", Chapman & Hall (1993) # - https://en.wikipedia.org/wiki/Bootstrapping_%28statistics%29
# parametric bootstrap for return levels and parameters
# The function to bootstrap deffunc(data): fitted_gev = distr.gev(**gevfit) sample = fitted_gev.rvs(len(df.index)) samgevfit = distr.gev.lmom_fit(sample)
T = np.arange(0.1, 999.1, 0.1) + 1 fitted_sam_gev = distr.gev(**samgevfit) sT = fitted_sam_gev.ppf(1.0 - 1./T)
res = list(samgevfit.values()) res.extend(sT.tolist()) returntuple(res)
# the calculations itself
out = bootstrap.ci(df, statfunction=func, n_samples=10000)
C:\ProgramData\Anaconda3\lib\site-packages\ipykernel_launcher.py:32: InstabilityWarning: Some values used top 10 low/high samples; results may be unstable.
1 2 3 4 5 6 7 8 9
# prepare index for obs N = np.r_[1:len(df.index)+1]*1.0#must *1.0 to convert int to float Nmax = max(N) y=sorted(df.TMAX, reverse=True) empirical = pd.DataFrame({ 'T':Nmax/N, 'empirical':y }) empirical = pd.melt(empirical,id_vars='T')
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
# begin plotting
fig, ax = plt.subplots(figsize=(15,9))
plt.setp(ax.lines, linewidth = 2, color = 'magenta')
ax.set_title("GEV Distribution") ax.set_xlabel("Return Period (Year)") ax.set_ylabel("Precipitation") ax.semilogx(T, sT) ax.scatter(Nmax/N, sorted(df.TMAX)[::-1], color = 'orangered')