MINT2
GeneralisedPareto.cpp
Go to the documentation of this file.
1 // author: Jonas Rademacker (Jonas.Rademacker@bristol.ac.uk)
2 // status: Mon 9 Feb 2009 19:17:56 GMT
4 #include "Mint/Minimiser.h"
5 
6 #include <cmath>
7 #include <algorithm>
8 #include <iostream>
9 
10 using namespace std;
11 using namespace MINT;
12 
13 /*
14 http://www.isse.ucar.edu/extremevalues/tutorial/Appendix.html#subsec:GPD
15 
16 Formula:
17 
18 G(z) = exp{ - [1 + xi*((z-mu)/sigma)]^(-1/xi) }
19 
20 for large enough threshold u, cumulative distribution of y:=(x-u):
21 
22 H(y) = 1 - (1 + xi*y/sigma_bar)^(-1/xi) <-- I use this
23 
24 with sigma_bar = sigma + xi*(u - mu)
25 */
26 
27 double MINT::generalisedPareto_cumulative(double y, double xi, double sigma_bar){
28 
29  return 1 - pow(1.0 + xi*y/sigma_bar, -1.0/xi);
30 }
31 double MINT::generalisedPareto_pdf(double y, double xi, double sigma_bar){
32  double d = 1.0 + xi*y/fabs(sigma_bar);
33  if(d < 0) return 0;
34  return (1./fabs(sigma_bar))*pow(1.0 + xi*y/fabs(sigma_bar), -1.0/xi - 1.0);
35 }
36 double MINT::generalisedPareto_logPdf(double y, double xi, double sigma_bar){
37  double d = 1.0 + xi*y/fabs(sigma_bar);
38  if(d < 0) return -99999;
39  return -log(fabs(sigma_bar)) + log(1.0 + xi*y/fabs(sigma_bar)) *(-1.0/xi - 1.0);
40 }
41 
42 double MINT::generalisedPareto_xiFromMeanRMS(double mean, double rms){
43  // assuming threshold == 0, shift parameters if necessary
44  return 0.5*(1.0 - mean*mean/(rms*rms));
45 }
46 double MINT::generalisedPareto_sigmaFromMeanRMS(double mean, double rms){
47  // assuming threshold == 0, shift parameters if necessary
48  return 0.5*mean*(1.0 + mean*mean/(rms*rms));
49 }
50 
51 double MINT::generalisedPareto_yFromCL(double CL, double xi, double sigma_bar){
52 
53  return sigma_bar/xi * ( pow( 1.0 - CL , -xi) - 1.0 );
54 }
55 
56 double MINT::generalisedPareto_limit(double xi, double sigma_bar){
57  if(xi >=0){
58  cout << " MINT::generalisedPareto_limit: Limit only for negative xi."
59  << " This xi = " << xi << endl;
60  return -9999.0;
61  }
62  return -sigma_bar/xi;
63 }
64 
65 double MINT::generalisedPareto_estimateMaximum(const std::vector<double>& input
66  , double CL){
67  double paretoMax=-9999;
68  double actualMax=-9999;
70  , CL
71  , paretoMax
72  , actualMax);
73 }
74 
76 
77 MINT::minimisePareto::minimisePareto(std::vector<double>& data, double threshold
78  , double xiInit, double sigInit)
79  : _data(data)
80  , _threshold(threshold)
81  , _xi("xi", 0, xiInit, fabs(xiInit)/2.0 + 0.1, 0, 0, &_mps)
82  , _sigma("sigma", 0, fabs(sigInit), fabs(sigInit)/2.0 + 0.1, 0, 0, &_mps)
83 {
84  this->setPset(&_mps);
85 }
87  double sum=0;
88  for(unsigned int i=0; i < _data.size(); i++){
89  double x = _data[i] - _threshold;
90  if(x > 0) sum -= 2*generalisedPareto_logPdf(x, _xi, _sigma);
91  }
92  return sum;
93 }
94 
95 double MINT::minimisePareto::getXi() const{ return _xi;}
96 double MINT::minimisePareto::getSigma() const{ return _sigma;}
97 
98 double MINT::generalisedPareto_estimateMaximum(std::vector<double> input_a
99  , double CL
100  , double& actualMax
101  , double& paretoMax
102  , int numEvents
103  ){
104  bool dbThis=false;
105  bool verbose=true;
106 
107  double max=-9999;
108  std::vector<double> input;
109  for(unsigned int i=0; i < input_a.size(); i++){
110  if(i > 0 && i%200 == 0){
111  input.push_back(max);
112  max = -9999;
113  }
114  if(input_a[i] > max) max=input_a[i];
115  }
116 
117  sort(input.begin(), input.end());
118 
119  unsigned int n=-9999;
120 // unsigned int n=9999;
121 
122  if(numEvents > 0){
123  n = (unsigned int) numEvents;
124  }else{
125  // let's take 0.2% of events, but at least 100, at most 1000:
126  n = input.size()/50; // originally /500,
127  if(n < 100) n=100;
128  if(n > 1000) n=1000;
129  }
130  if(n > input.size()) n = input.size()-1;
131  if(n == 0){
132  cout << "MINT::generalisedPareto_estimateMaximum: "
133  << input.size() << " too few events."
134  << endl;
135  return -9999;
136  }
137  double sum=0, sumsq=0;
138  unsigned int counter=0;
139  double lastVal=-9999;
140  std::vector<double>::reverse_iterator it = input.rbegin();
141  double maxVal = *it;
142  for( ;
143  it != input.rend() && counter < n;
144  it++, counter++){
145  if(dbThis) cout << *it << endl;
146  lastVal = *it;
147  sum += lastVal;
148  sumsq += (lastVal) * (lastVal);
149  }
150  double threshold = 0.5*(lastVal + *it); // *it is one event beyond lastVal;
151 
152  double mean = sum/((double)n);
153  double meansq = sumsq/((double)n);
154  double var = meansq - mean*mean;
155  double rms = sqrt(var);
156 
157  double xi = MINT::generalisedPareto_xiFromMeanRMS(mean - threshold, rms);
158  double sg = MINT::generalisedPareto_sigmaFromMeanRMS(mean - threshold, rms);
159 
160  if(sg < -1.5*xi*(maxVal-threshold)) sg = fabs(1.5*xi*(maxVal-threshold));
161 
162  cout << "xi, sg before fit: xi = " << xi << ", sg = " << sg << endl;
163  minimisePareto ll(input, threshold, xi, sg);
164  Minimiser mini(&ll);
165  mini.doFit();
166  xi = ll.getXi();
167  sg = fabs(ll.getSigma());
168  cout << "xi, sg after fit: xi = " << xi << ", sg = " << sg << endl;
169 
170  double limity = -9999;
171  if(xi < 0){
172  limity = MINT::generalisedPareto_limit(xi, sg);
173  }else{
174  limity = MINT::generalisedPareto_yFromCL(CL, xi, sg);
175  }
176 
177  double limit = limity + threshold;
178 
179  if(verbose || dbThis){
180  cout << "MINT::generalisedPareto_estimateMaximum with CL = " << CL << endl;
181  cout << "\n maxVal " << maxVal
182  << "\n mean " << mean
183  << "\n rms " << rms
184  << "\n threshold " << threshold
185  << "\n xi " << xi
186  << "\n sg " << sg
187  << "\n limity " << limity
188  << "\n limit " << limit
189  << endl;
190  }
191  if(limit < maxVal){
192  cout << "MINT::generalisedPareto_estimateMaximum with CL = " << CL
193  << ": Something clearly went wrong"
194  << "\n Estimated upper limit smaller than actual upper limit:"
195  << " actual: " << maxVal << ", estimated " << limit << endl;
196  actualMax = maxVal;
197  paretoMax = maxVal;
198  return maxVal;
199  }
200  actualMax = maxVal;
201  paretoMax = limit;
202  return limit;
203 }
204 //
double generalisedPareto_yFromCL(double CL, double xi, double sigma_bar)
double generalisedPareto_sigmaFromMeanRMS(double mean, double rms)
double generalisedPareto_xiFromMeanRMS(double mean, double rms)
double generalisedPareto_estimateMaximum(const std::vector< double > &input, double CL=0.001)
double generalisedPareto_pdf(double y, double xi, double sigma_bar)
static MinuitParameterSet _mps
minimisePareto(std::vector< double > &data, double threshold, double xiInit, double sigInit)
void setPset(MinuitParameterSet *mps)
Definition: Minimisable.cpp:21
double generalisedPareto_cumulative(double y, double xi, double sigma_bar)
double generalisedPareto_limit(double xi, double sigma_bar)
double generalisedPareto_logPdf(double y, double xi, double sigma_bar)
virtual double getVal()