1///////////////////////////////////////////////////////////////////////////////
2// peaks_over_threshold.hpp
3//
4// Copyright 2006 Daniel Egloff, Olivier Gygi. Distributed under the Boost
5// Software License, Version 1.0. (See accompanying file
6// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7
8#ifndef BOOST_ACCUMULATORS_STATISTICS_PEAKS_OVER_THRESHOLD_HPP_DE_01_01_2006
9#define BOOST_ACCUMULATORS_STATISTICS_PEAKS_OVER_THRESHOLD_HPP_DE_01_01_2006
10
11#include <vector>
12#include <limits>
13#include <numeric>
14#include <functional>
15#include <boost/config/no_tr1/cmath.hpp> // pow
16#include <sstream> // stringstream
17#include <stdexcept> // runtime_error
18#include <boost/throw_exception.hpp>
19#include <boost/range.hpp>
20#include <boost/mpl/if.hpp>
21#include <boost/mpl/int.hpp>
22#include <boost/mpl/placeholders.hpp>
23#include <boost/parameter/keyword.hpp>
24#include <boost/tuple/tuple.hpp>
25#include <boost/accumulators/accumulators_fwd.hpp>
26#include <boost/accumulators/framework/accumulator_base.hpp>
27#include <boost/accumulators/framework/extractor.hpp>
28#include <boost/accumulators/numeric/functional.hpp>
29#include <boost/accumulators/framework/parameters/sample.hpp>
30#include <boost/accumulators/framework/depends_on.hpp>
31#include <boost/accumulators/statistics_fwd.hpp>
32#include <boost/accumulators/statistics/parameters/quantile_probability.hpp>
33#include <boost/accumulators/statistics/count.hpp>
34#include <boost/accumulators/statistics/tail.hpp>
35
36#ifdef _MSC_VER
37# pragma warning(push)
38# pragma warning(disable: 4127) // conditional expression is constant
39#endif
40
41namespace boost { namespace accumulators
42{
43
44///////////////////////////////////////////////////////////////////////////////
45// threshold_probability and threshold named parameters
46//
47BOOST_PARAMETER_NESTED_KEYWORD(tag, pot_threshold_value, threshold_value)
48BOOST_PARAMETER_NESTED_KEYWORD(tag, pot_threshold_probability, threshold_probability)
49
50BOOST_ACCUMULATORS_IGNORE_GLOBAL(pot_threshold_value)
51BOOST_ACCUMULATORS_IGNORE_GLOBAL(pot_threshold_probability)
52
53namespace impl
54{
55 ///////////////////////////////////////////////////////////////////////////////
56 // peaks_over_threshold_impl
57 // works with an explicit threshold value and does not depend on order statistics
58 /**
59 @brief Peaks over Threshold Method for Quantile and Tail Mean Estimation
60
61 According to the theorem of Pickands-Balkema-de Haan, the distribution function \f$F_u(x)\f$ of
62 the excesses \f$x\f$ over some sufficiently high threshold \f$u\f$ of a distribution function \f$F(x)\f$
63 may be approximated by a generalized Pareto distribution
64 \f[
65 G_{\xi,\beta}(x) =
66 \left\{
67 \begin{array}{ll}
68 \beta^{-1}\left(1+\frac{\xi x}{\beta}\right)^{-1/\xi-1} & \textrm{if }\xi\neq0\\
69 \beta^{-1}\exp\left(-\frac{x}{\beta}\right) & \textrm{if }\xi=0,
70 \end{array}
71 \right.
72 \f]
73 with suitable parameters \f$\xi\f$ and \f$\beta\f$ that can be estimated, e.g., with the method of moments, cf.
74 Hosking and Wallis (1987),
75 \f[
76 \begin{array}{lll}
77 \hat{\xi} & = & \frac{1}{2}\left[1-\frac{(\hat{\mu}-u)^2}{\hat{\sigma}^2}\right]\\
78 \hat{\beta} & = & \frac{\hat{\mu}-u}{2}\left[\frac{(\hat{\mu}-u)^2}{\hat{\sigma}^2}+1\right],
79 \end{array}
80 \f]
81 \f$\hat{\mu}\f$ and \f$\hat{\sigma}^2\f$ being the empirical mean and variance of the samples over
82 the threshold \f$u\f$. Equivalently, the distribution function
83 \f$F_u(x-u)\f$ of the exceedances \f$x-u\f$ can be approximated by
84 \f$G_{\xi,\beta}(x-u)=G_{\xi,\beta,u}(x)\f$. Since for \f$x\geq u\f$ the distribution function \f$F(x)\f$
85 can be written as
86 \f[
87 F(x) = [1 - \P(X \leq u)]F_u(x - u) + \P(X \leq u)
88 \f]
89 and the probability \f$\P(X \leq u)\f$ can be approximated by the empirical distribution function
90 \f$F_n(u)\f$ evaluated at \f$u\f$, an estimator of \f$F(x)\f$ is given by
91 \f[
92 \widehat{F}(x) = [1 - F_n(u)]G_{\xi,\beta,u}(x) + F_n(u).
93 \f]
94 It can be shown that \f$\widehat{F}(x)\f$ is a generalized
95 Pareto distribution \f$G_{\xi,\bar{\beta},\bar{u}}(x)\f$ with \f$\bar{\beta}=\beta[1-F_n(u)]^{\xi}\f$
96 and \f$\bar{u}=u-\bar{\beta}\left\{[1-F_n(u)]^{-\xi}-1\right\}/\xi\f$. By inverting \f$\widehat{F}(x)\f$,
97 one obtains an estimator for the \f$\alpha\f$-quantile,
98 \f[
99 \hat{q}_{\alpha} = \bar{u} + \frac{\bar{\beta}}{\xi}\left[(1-\alpha)^{-\xi}-1\right],
100 \f]
101 and similarly an estimator for the (coherent) tail mean,
102 \f[
103 \widehat{CTM}_{\alpha} = \hat{q}_{\alpha} - \frac{\bar{\beta}}{\xi-1}(1-\alpha)^{-\xi},
104 \f]
105 cf. McNeil and Frey (2000).
106
107 Note that in case extreme values of the left tail are fitted, the distribution is mirrored with respect to the
108 \f$y\f$ axis such that the left tail can be treated as a right tail. The computed fit parameters thus define
109 the Pareto distribution that fits the mirrored left tail. When quantities like a quantile or a tail mean are
110 computed using the fit parameters obtained from the mirrored data, the result is mirrored back, yielding the
111 correct result.
112
113 For further details, see
114
115 J. R. M. Hosking and J. R. Wallis, Parameter and quantile estimation for the generalized Pareto distribution,
116 Technometrics, Volume 29, 1987, p. 339-349
117
118 A. J. McNeil and R. Frey, Estimation of Tail-Related Risk Measures for Heteroscedastic Financial Time Series:
119 an Extreme Value Approach, Journal of Empirical Finance, Volume 7, 2000, p. 271-300
120
121 @param quantile_probability
122 @param pot_threshold_value
123 */
124 template<typename Sample, typename LeftRight>
125 struct peaks_over_threshold_impl
126 : accumulator_base
127 {
128 typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type;
129 // for boost::result_of
130 typedef boost::tuple<float_type, float_type, float_type> result_type;
131 // for left tail fitting, mirror the extreme values
132 typedef mpl::int_<is_same<LeftRight, left>::value ? -1 : 1> sign;
133
134 template<typename Args>
135 peaks_over_threshold_impl(Args const &args)
136 : Nu_(0)
137 , mu_(sign::value * numeric::fdiv(args[sample | Sample()], (std::size_t)1))
138 , sigma2_(numeric::fdiv(args[sample | Sample()], (std::size_t)1))
139 , threshold_(sign::value * args[pot_threshold_value])
140 , fit_parameters_(boost::make_tuple(t0: 0., t1: 0., t2: 0.))
141 , is_dirty_(true)
142 {
143 }
144
145 template<typename Args>
146 void operator ()(Args const &args)
147 {
148 this->is_dirty_ = true;
149
150 if (sign::value * args[sample] > this->threshold_)
151 {
152 this->mu_ += args[sample];
153 this->sigma2_ += args[sample] * args[sample];
154 ++this->Nu_;
155 }
156 }
157
158 template<typename Args>
159 result_type result(Args const &args) const
160 {
161 if (this->is_dirty_)
162 {
163 this->is_dirty_ = false;
164
165 std::size_t cnt = count(args);
166
167 this->mu_ = sign::value * numeric::fdiv(this->mu_, this->Nu_);
168 this->sigma2_ = numeric::fdiv(this->sigma2_, this->Nu_);
169 this->sigma2_ -= this->mu_ * this->mu_;
170
171 float_type threshold_probability = numeric::fdiv(cnt - this->Nu_, cnt);
172
173 float_type tmp = numeric::fdiv(( this->mu_ - this->threshold_ )*( this->mu_ - this->threshold_ ), this->sigma2_);
174 float_type xi_hat = 0.5 * ( 1. - tmp );
175 float_type beta_hat = 0.5 * ( this->mu_ - this->threshold_ ) * ( 1. + tmp );
176 float_type beta_bar = beta_hat * std::pow(1. - threshold_probability, xi_hat);
177 float_type u_bar = this->threshold_ - beta_bar * ( std::pow(1. - threshold_probability, -xi_hat) - 1.)/xi_hat;
178 this->fit_parameters_ = boost::make_tuple(u_bar, beta_bar, xi_hat);
179 }
180
181 return this->fit_parameters_;
182 }
183
184 private:
185 std::size_t Nu_; // number of samples larger than threshold
186 mutable float_type mu_; // mean of Nu_ largest samples
187 mutable float_type sigma2_; // variance of Nu_ largest samples
188 float_type threshold_;
189 mutable result_type fit_parameters_; // boost::tuple that stores fit parameters
190 mutable bool is_dirty_;
191 };
192
193 ///////////////////////////////////////////////////////////////////////////////
194 // peaks_over_threshold_prob_impl
195 // determines threshold from a given threshold probability using order statistics
196 /**
197 @brief Peaks over Threshold Method for Quantile and Tail Mean Estimation
198
199 @sa peaks_over_threshold_impl
200
201 @param quantile_probability
202 @param pot_threshold_probability
203 */
204 template<typename Sample, typename LeftRight>
205 struct peaks_over_threshold_prob_impl
206 : accumulator_base
207 {
208 typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type;
209 // for boost::result_of
210 typedef boost::tuple<float_type, float_type, float_type> result_type;
211 // for left tail fitting, mirror the extreme values
212 typedef mpl::int_<is_same<LeftRight, left>::value ? -1 : 1> sign;
213
214 template<typename Args>
215 peaks_over_threshold_prob_impl(Args const &args)
216 : mu_(sign::value * numeric::fdiv(args[sample | Sample()], (std::size_t)1))
217 , sigma2_(numeric::fdiv(args[sample | Sample()], (std::size_t)1))
218 , threshold_probability_(args[pot_threshold_probability])
219 , fit_parameters_(boost::make_tuple(t0: 0., t1: 0., t2: 0.))
220 , is_dirty_(true)
221 {
222 }
223
224 void operator ()(dont_care)
225 {
226 this->is_dirty_ = true;
227 }
228
229 template<typename Args>
230 result_type result(Args const &args) const
231 {
232 if (this->is_dirty_)
233 {
234 this->is_dirty_ = false;
235
236 std::size_t cnt = count(args);
237
238 // the n'th cached sample provides an approximate threshold value u
239 std::size_t n = static_cast<std::size_t>(
240 std::ceil(
241 cnt * ( ( is_same<LeftRight, left>::value ) ? this->threshold_probability_ : 1. - this->threshold_probability_ )
242 )
243 );
244
245 // If n is in a valid range, return result, otherwise return NaN or throw exception
246 if ( n >= static_cast<std::size_t>(tail(args).size()))
247 {
248 if (std::numeric_limits<float_type>::has_quiet_NaN)
249 {
250 return boost::make_tuple(
251 std::numeric_limits<float_type>::quiet_NaN()
252 , std::numeric_limits<float_type>::quiet_NaN()
253 , std::numeric_limits<float_type>::quiet_NaN()
254 );
255 }
256 else
257 {
258 std::ostringstream msg;
259 msg << "index n = " << n << " is not in valid range [0, " << tail(args).size() << ")";
260 boost::throw_exception(e: std::runtime_error(msg.str()));
261 return boost::make_tuple(Sample(0), Sample(0), Sample(0));
262 }
263 }
264 else
265 {
266 float_type u = *(tail(args).begin() + n - 1) * sign::value;
267
268 // compute mean and variance of samples above/under threshold value u
269 for (std::size_t i = 0; i < n; ++i)
270 {
271 mu_ += *(tail(args).begin() + i);
272 sigma2_ += *(tail(args).begin() + i) * (*(tail(args).begin() + i));
273 }
274
275 this->mu_ = sign::value * numeric::fdiv(this->mu_, n);
276 this->sigma2_ = numeric::fdiv(this->sigma2_, n);
277 this->sigma2_ -= this->mu_ * this->mu_;
278
279 if (is_same<LeftRight, left>::value)
280 this->threshold_probability_ = 1. - this->threshold_probability_;
281
282 float_type tmp = numeric::fdiv(( this->mu_ - u )*( this->mu_ - u ), this->sigma2_);
283 float_type xi_hat = 0.5 * ( 1. - tmp );
284 float_type beta_hat = 0.5 * ( this->mu_ - u ) * ( 1. + tmp );
285 float_type beta_bar = beta_hat * std::pow(1. - threshold_probability_, xi_hat);
286 float_type u_bar = u - beta_bar * ( std::pow(1. - threshold_probability_, -xi_hat) - 1.)/xi_hat;
287 this->fit_parameters_ = boost::make_tuple(u_bar, beta_bar, xi_hat);
288 }
289 }
290
291 return this->fit_parameters_;
292 }
293
294 private:
295 mutable float_type mu_; // mean of samples above threshold u
296 mutable float_type sigma2_; // variance of samples above threshold u
297 mutable float_type threshold_probability_;
298 mutable result_type fit_parameters_; // boost::tuple that stores fit parameters
299 mutable bool is_dirty_;
300 };
301
302} // namespace impl
303
304///////////////////////////////////////////////////////////////////////////////
305// tag::peaks_over_threshold
306//
307namespace tag
308{
309 template<typename LeftRight>
310 struct peaks_over_threshold
311 : depends_on<count>
312 , pot_threshold_value
313 {
314 /// INTERNAL ONLY
315 ///
316 typedef accumulators::impl::peaks_over_threshold_impl<mpl::_1, LeftRight> impl;
317 };
318
319 template<typename LeftRight>
320 struct peaks_over_threshold_prob
321 : depends_on<count, tail<LeftRight> >
322 , pot_threshold_probability
323 {
324 /// INTERNAL ONLY
325 ///
326 typedef accumulators::impl::peaks_over_threshold_prob_impl<mpl::_1, LeftRight> impl;
327 };
328
329 struct abstract_peaks_over_threshold
330 : depends_on<>
331 {
332 };
333}
334
335///////////////////////////////////////////////////////////////////////////////
336// extract::peaks_over_threshold
337//
338namespace extract
339{
340 extractor<tag::abstract_peaks_over_threshold> const peaks_over_threshold = {};
341
342 BOOST_ACCUMULATORS_IGNORE_GLOBAL(peaks_over_threshold)
343}
344
345using extract::peaks_over_threshold;
346
347// peaks_over_threshold<LeftRight>(with_threshold_value) -> peaks_over_threshold<LeftRight>
348template<typename LeftRight>
349struct as_feature<tag::peaks_over_threshold<LeftRight>(with_threshold_value)>
350{
351 typedef tag::peaks_over_threshold<LeftRight> type;
352};
353
354// peaks_over_threshold<LeftRight>(with_threshold_probability) -> peaks_over_threshold_prob<LeftRight>
355template<typename LeftRight>
356struct as_feature<tag::peaks_over_threshold<LeftRight>(with_threshold_probability)>
357{
358 typedef tag::peaks_over_threshold_prob<LeftRight> type;
359};
360
361template<typename LeftRight>
362struct feature_of<tag::peaks_over_threshold<LeftRight> >
363 : feature_of<tag::abstract_peaks_over_threshold>
364{
365};
366
367template<typename LeftRight>
368struct feature_of<tag::peaks_over_threshold_prob<LeftRight> >
369 : feature_of<tag::abstract_peaks_over_threshold>
370{
371};
372
373// So that peaks_over_threshold can be automatically substituted
374// with weighted_peaks_over_threshold when the weight parameter is non-void.
375template<typename LeftRight>
376struct as_weighted_feature<tag::peaks_over_threshold<LeftRight> >
377{
378 typedef tag::weighted_peaks_over_threshold<LeftRight> type;
379};
380
381template<typename LeftRight>
382struct feature_of<tag::weighted_peaks_over_threshold<LeftRight> >
383 : feature_of<tag::peaks_over_threshold<LeftRight> >
384{};
385
386// So that peaks_over_threshold_prob can be automatically substituted
387// with weighted_peaks_over_threshold_prob when the weight parameter is non-void.
388template<typename LeftRight>
389struct as_weighted_feature<tag::peaks_over_threshold_prob<LeftRight> >
390{
391 typedef tag::weighted_peaks_over_threshold_prob<LeftRight> type;
392};
393
394template<typename LeftRight>
395struct feature_of<tag::weighted_peaks_over_threshold_prob<LeftRight> >
396 : feature_of<tag::peaks_over_threshold_prob<LeftRight> >
397{};
398
399}} // namespace boost::accumulators
400
401#ifdef _MSC_VER
402# pragma warning(pop)
403#endif
404
405#endif
406

source code of boost/boost/accumulators/statistics/peaks_over_threshold.hpp