BFGSFitter.cxx
Go to the documentation of this file.
1 
12 #ifdef _MSC_VER
13 #include "msdevstudio/MSconfig.h"
14 #endif
15 
16 #ifdef _MSC_VER
17 #define isnan _isnan
18 #endif
19 
20 //To have isnan.
21 #ifdef __APPLE__
22 #include <cstdlib>
23 #define _GLIBCPP_USE_C99 1
24 #endif
25 
26 #include "BFGSFitter.h"
27 
28 #include "NumLinAlg.h"
29 #include "StatedFCN.h"
30 
31 // see todo
32 #include <iostream>
33 using std::cout;
34 using std::endl;
35 
36 #include <cfloat>
37 #include <cstdlib>
38 #include <cassert>
39 #include <cmath>
40 #include <ctime>
41 
42 #ifdef __APPLE__
43 using std::isnan;
44 #endif
45 
46 
47 using std::pow;
48 using std::swap;
49 using std::min;
50 using std::abs;
51 using std::vector;
52 using std::vector;
53 using std::string;
54 using std::map;
55 
56 using namespace hippodraw;
57 
58 using namespace Numeric;
59 
60 BFGSFitter::BFGSFitter( const char * name )
61  : Fitter ( name ),
62  m_xinit( 1 ),
63  m_grad_cutoff( 1e-6 ),
64  m_step_cutoff( 1e-6 ),
65  m_proj_cutoff( 1e-6 ),
66  m_c1( 1e-4 ),
67  m_c2( 0.9 ),
68  m_alpha_max( 4 ),
69  m_alpha1( 1 )
70 {
71  m_iter_params[ "grad_cutoff" ] = & m_grad_cutoff;
72  m_iter_params[ "step_cutoff" ] = & m_step_cutoff;
73  m_iter_params[ "proj_cutoff" ] = & m_proj_cutoff;
74  m_iter_params[ "c1" ] = & m_c1;
75  m_iter_params[ "c2" ] = & m_c2;
76  m_iter_params[ "alpha_max" ] = & m_alpha_max;
77  m_iter_params[ "alpha1" ] = & m_alpha1;
78 }
79 
81 {
82  return new BFGSFitter ( *this ); // uses compiler generated copy constructor
83 }
84 
87 bool
90 {
91  double Alpha_star;
92 
93  // Initialization
94  vector < double > init_params;
95  m_fcn -> fillFreeParameters ( init_params );
96  setInitIter( init_params );
97 
98  // Allocate space for various matrices / vectors needed
99  vector< double > xold = m_xinit;
100  vector< double > xnew = m_xinit;
101  vector< double > gk = gradient( xold );
102  vector< double > gkPlusUn = gradient( xnew );
103 
104  vector< double > pk( m_xinit.size() );
105  vector< double > s( m_xinit.size() );
106  vector< double > y( m_xinit.size() );
107 
108  // The standard quasi newton initialization
109  eye ( m_M, m_xinit.size() );
110  m_M = ( 1.0 / norm( gk ) ) * m_M;
111 
112  vector< vector< double > > t1 , t2;
113  eye( t1, m_xinit.size() );
114  eye( t2, m_xinit.size() );
115 
116  double fx = function( xnew );
117  double fxold = fx;
118  for( int k = 1; k <= m_max_iterations; k++ )
119  {
120  // Update the iterate.
121  gk = gkPlusUn;
122  pk = m_M * (-gk);
123  Alpha_star = wolfeStep( xold, pk );
124 
125  do
126  {
127  xnew = xold + Alpha_star * pk;
128  fx = function( xnew );
129  Alpha_star /= 3.0;
130  }
131  while( isnan( fx ) );
132 
133  gkPlusUn = gradient( xnew );
134 
135  s = xnew - xold;
136  y = gkPlusUn - gk;
137 
138  double ys = innerProduct( y, s );
139  double yy = norm( y );
140  double ss = norm( s );
141 
142  // Termination criteria
143  if( ( abs( ys ) < m_proj_cutoff ) ||
144  ( abs( Alpha_star ) < DBL_EPSILON ) ||
145  ( ss < m_step_cutoff ) ||
146  ( yy < m_grad_cutoff ) ||
147  ( fx >= fxold ) )
148  break;
149 
150  // DFP Update of inverse of approximate hessian.
151  // M = M-(s*y'*M+M*y*s')/(y'*s)+(1+(y'*M*y)/(y'*s))*(s*s')/(y'*s);
152  double temp = ( 1 + innerProduct( y, m_M * y ) / ys ) / ys;
153 
154  t1 = ( outerProduct(s, y) * m_M)/ys + ( m_M * outerProduct(y , s) ) / ys;
155  t2 = temp * outerProduct( s, s );
156  m_M = m_M - t1 + t2;
157 
158  // one pass of the loop is over so refresh
159  xold = xnew;
160  fxold = fx;
161  }
162 
163  m_fcn -> setFreeParameters ( xold );
164  //write( xold );
165 
166  return true;
167 }
168 
171 double BFGSFitter::wolfeStep( const std::vector< double >& x0,
172  const std::vector< double >& p ) const
173 {
174  double step_fac = sqrt(2.0); // Geometric step factor; must be > 1
175 
176  double phi0 = function( x0 ); // Function value at the initial point
177  double dphi0 = gradp( x0, p ); // innner product of gradient and p
178 
179  // dphi0 >= 0 means you are ascending. To avoid it set step = 0 and
180  // terminate the iterations ( maybe after giving out a warning )
181  // Algorithm ensures that such case will not arise but curse of finite
182  // precision mathematics lead to some very small positive dphi0.
183  if ( dphi0 >= 0 )
184  return 0.0;
185 
186  double Alpha0 = 0;
187  double Alphai = m_alpha1;
188  double Alphaim = Alpha0;
189  double phiim = phi0;
190  int i = 1;
191  int done = 0;
192  int cnt = 0;
193 
194  double phii, dphii;
195 
196  while ( !done )
197  {
198  phii = function( x0 + Alphai * p );
199 
200  if ( (phii > (phi0 + m_c1 * Alphai * dphi0)) ||
201  ( (phii >= phiim) && ( i > 1)) )
202  return zoom( x0, p, phi0, dphi0, Alphaim, Alphai );
203 
204  dphii = gradp( x0 + Alphai * p , p );
205 
206  if ( abs( dphii ) <= -m_c2 * dphi0 )
207  return Alphai;
208 
209  if (dphii >= 0)
210  return zoom( x0, p, phi0, dphi0, Alphai, Alphaim );
211 
212  // Choose new Alphai in (Alphai, Alpha_max)
213  Alphaim = Alphai;
214  phiim = phii;
215  Alphai = min( step_fac * Alphai, m_alpha_max );
216 
217  if ( Alphai == m_alpha_max )
218  cnt += 1;
219 
220  if (cnt > 1)
221  {
222 // cout << "WARNING: Unable to bracket a strong Wolfe pt. in [ "
223 // << m_alpha1 << ", " << m_alpha_max << " ]" << endl;
224  return m_alpha_max;
225  }
226 
227  i += 1;
228  //cout << " Wolfe Iteration: " << i << endl;
229  }
230 
231  return 0.0;
232 }
233 
234 double BFGSFitter::zoom( const std::vector< double >& x0,
235  const std::vector< double >& p,
236  double phi0, double dphi0,
237  double Alphalo, double Alphahi ) const
238 {
239  int MaxIter = 20;
240  int iter = 0;
241  int done = 0;
242 
243  double philo, phij;
244  double dphij;
245  double Alphaj;
246  double Alpha_star = 0.0;
247 
248  while ( !done && iter < MaxIter )
249  {
250  iter += 1;
251 
252  philo = function( x0 + Alphalo * p );
253 
254  // Find a trial step length Alphaj between Alphalo and Alphahi
255  Alphaj = interpolate( x0, p, Alphalo, Alphahi );
256 
257  // Evalaute phi( Alphaj )
258  phij = function( x0 + Alphaj * p );
259 
260  if( (phij > phi0 + m_c1 * Alphaj * dphi0) || (phij >= philo) )
261  Alphahi = Alphaj;
262  else
263  {
264  dphij = gradp( x0 + Alphaj * p , p );
265 
266  if ( abs( dphij ) <= -m_c2 * dphi0 )
267  Alpha_star = Alphaj;
268  return Alpha_star;
269 
270  if ( dphij * ( Alphahi - Alphalo ) >= 0 )
271  Alphahi = Alphalo;
272 
273  Alphalo = Alphaj;
274  }
275 
276  }
277 
278  // If above loop fails take the mid-point
279  if (iter == MaxIter)
280  Alpha_star = 0.5 * ( Alphahi + Alphalo );
281 
282  return Alpha_star;
283 
284 }
285 
286 
287 double BFGSFitter::interpolate( const std::vector< double >& x0,
288  const std::vector< double >& p,
289  double Alphaim,
290  double Alphai ) const
291 {
292 
293  if ( Alphaim > Alphai )
294  swap( Alphaim, Alphai);
295 
296  double phiim = function( x0 + Alphaim * p );
297  double phii = function( x0 + Alphai * p );
298 
299  double dphiim = gradp( x0 + Alphaim * p, p );
300  double dphii = gradp( x0 + Alphai * p, p );
301 
302  double d1 = dphiim + dphii - 3 * ( (phiim - phii)/(Alphaim - Alphai) );
303  double d2 = sqrt( d1 * d1 - dphiim * dphii);
304 
305  double Alphaip = Alphai - (Alphai - Alphaim) *
306  ( (dphiim + d2 - d1) / (dphii - dphiim + 2 * d2) );
307 
308  double lth = abs(Alphai - Alphaim);
309 
310  if( abs(Alphaip - Alphai) < 0.05 * lth ||
311  abs(Alphaip - Alphaim) < 0.05 * lth ||
312  Alphaip < Alphaim ||
313  Alphaip > Alphai )
314  Alphaip = 0.5 * (Alphai + Alphaim);
315 
316  return Alphaip;
317 }
318 
319 double BFGSFitter::function( const std::vector< double > & u ) const
320 {
321  // Gets the value of the objective function from pvfcn
322  // first you have to convert the vector to vector
323  vector< double > x( u.size() );
324 
325  for( unsigned int i = 0; i < u.size(); i++ )
326  x[i] = u[i];
327 
328  m_fcn -> setFreeParameters ( x );
329 
330  return objectiveValue();
331 
332  // Following are a few standard test functions
333  // which were used to test this minimizer.
334 
335  //double x = u[0]; double y = u[1];
336 
337  // Rosenbrock's function
338  //return 100 * (y - x * x) * (y - x * x) + (1 - x) * (1 - x);
339 
340  // Freudenstein and Roth's Function
341  //return pow(-13 + x + ((5 - y)*y - 2 )*y, 2) +
342  // pow(-29 + x + ((y + 1)*y - 14)*y, 2);
343 
344  // Beale Function
345  //return pow(1.5 - x*(1 - y ), 2) +
346  // pow(2.25 - x*(1 - y*y ), 2) +
347  // pow(2.625 - x*(1 - y*y*y ), 2);
348 }
349 
350 vector< double >
351 BFGSFitter::gradient( const std::vector< double > & u ) const
352 {
353  double h = 1e-5;
354  vector< double > x( u.size(), 0.0 );
355  vector< double > xph( u.size(), 0.0 );
356 
357  vector< double > g( u.size() );
358 
359  for( unsigned int i = 0; i < u.size(); i++ )
360  x[i] = u[i];
361 
362  // Calculating the gradient by finite differencing
363  m_fcn -> setFreeParameters ( x );
364  double fx = m_fcn -> objectiveValue ();
365  double fxph = 0.0;
366  for( unsigned int i = 0; i < u.size(); i++ )
367  {
368  for( unsigned int j = 0; j < u.size(); j++ ) {
369  xph[j] = ( i == j ) ? ( x[j] + h ) : x[j];
370  }
371  m_fcn -> setFreeParameters ( xph );
372  fxph = m_fcn -> objectiveValue ();
373  g[i] = ( fxph - fx ) / h;
374  }
375 
376  // Following are a few gradients of standard test functions
377  // which were used to test this minimizer.
378 
379  /* double x = u[0]; double y = u[1];
380  vector< double > g(2);*/
381 
382  // Gradient of Rosenbrock's function
383  /* g[0] = -400 * x * ( y - x * x ) - 2 * ( 1 - x );
384  g[1] = 200 * ( y - x * x );*/
385 
386  // Gradient of Freudenstein and Roth's Function
387  //g[0] = 2*(y*(y*(y+1)-14)+ x - 29) +
388  // 2*(y*((5-y)*y-2) + x - 13);
389  //g[1] = 2*(y*(2*y+1) + y*(y+1) - 14) * (y*(y*(y+1) - 14) + x - 29) +
390  // 2*((5-y)*y + (5-2*y)*y - 2) * (y*((5-y)*y - 2) + x - 13);
391 
392  // Gradient of Beale Function
393  //g[0]= 2*(2.625 - x * (1 - y*y*y) ) * (y*y*y - 1)+
394  // 2*(2.25 - x * (1 - y*y) ) * (y*y - 1)+
395  // 2*(1.5 - x * (1 - y ) ) * (y - 1);
396  //g[1]= 6*x*y*y*(2.625 - x * (1 - y*y*y)) +
397  // 4*x*y *(2.25 - x * (1 - y*y)) +
398  // 2*x *(1.5 - x * (1 - y ));
399 
400  return g;
401 }
402 
403 
404 double BFGSFitter::gradp( const std::vector< double > & u,
405  const std::vector< double > & p ) const
406 {
407  double h = 1e-5;
408  vector< double > x( u.size() );
409 
410  // Calculating the gradient in direction of p by finite differencing
411  for ( unsigned int i = 0; i < u.size(); i++ ) {
412  x[i] = u[i];
413  }
414 // double fx = m_fcn -> operator()( x );
415  m_fcn -> setFreeParameters ( x );
416  double fx = m_fcn -> objectiveValue ( );
417 
418  for ( unsigned int i = 0; i < u.size(); i++ ) {
419  x[i] += h * p[i] ;
420  }
421  m_fcn -> setFreeParameters ( x );
422  double fxph = m_fcn -> objectiveValue ();
423 
424  return ( fxph - fx ) / h;
425 
426  // Following are a directional derivative of standard test functions
427  // which were used to test this minimizer.
428 
429  /* double x = u[0]; double y = u[1];
430  vector< double > g(2);*/
431 
432  // Gradient of Rosenbrock's function
433  /* g[0] = -400 * x * ( y - x * x ) - 2 * ( 1 - x );
434  g[1] = 200 * ( y - x * x );*/
435 
436  // Gradient of Freudenstein and Roth's Function
437  //g[0] = 2*(y*(y*(y+1)-14)+ x - 29) +
438  // 2*(y*((5-y)*y-2) + x - 13);
439  //g[1] = 2*(y*(2*y+1) + y*(y+1) - 14) * (y*(y*(y+1) - 14) + x - 29) +
440  // 2*((5-y)*y + (5-2*y)*y - 2) * (y*((5-y)*y - 2) + x - 13);
441 
442  // Gradient of Beale Function
443  //g[0]= 2*(2.625 - x * (1 - y*y*y) ) * (y*y*y - 1)+
444  // 2*(2.25 - x * (1 - y*y) ) * (y*y - 1)+
445  // 2*(1.5 - x * (1 - y ) ) * (y - 1);
446  //g[1]= 6*x*y*y*(2.625 - x * (1 - y*y*y)) +
447  // 4*x*y *(2.25 - x * (1 - y*y)) +
448  // 2*x *(1.5 - x * (1 - y ));
449 
450  // return g[0] * p[0] + g[1] * p[1];
451 }
452 
453 const vector< double > & BFGSFitter::initIter() const
454 {
455  return m_xinit;
456 }
457 
458 int BFGSFitter::setInitIter( const std::vector< double > & xinit )
459 {
460  m_xinit.resize( xinit.size() );
461 
462  // Provide a random perturbations to the initial value
463  //srand( (unsigned) time( NULL ) );
464  //for( unsigned int i = 0; i < xinit.size(); i++ )
465  //m_xinit[i] = xinit[i] * (1 + 0.025 * ( 0.5 - rand() / ( RAND_MAX + 1.0 )));
466 
467  m_xinit = xinit;
468 
469  return EXIT_SUCCESS;
470 }
471 
472 int BFGSFitter::calcCovariance ( std::vector < std::vector < double > >& cov )
473 {
474  cov.resize( m_xinit.size() );
475  for( unsigned int i = 0; i < m_xinit.size(); i++ )
476  cov[i].resize( m_xinit.size(), 0.0 );
477 
478  for( unsigned int i = 0; i < m_xinit.size(); i++ )
479  for( unsigned int j = 0; j < m_xinit.size(); j++ )
480  cov[i][j] = m_M[i][j];
481 
482  // set return flag as EXIT_SUCCESS if cov is Positive Definite,
483  // and to EXIT_FAILURE otherwise.
484  int flag = cholFactor( cov );
485 
486  for( unsigned int i = 0; i < m_xinit.size(); i++ )
487  for( unsigned int j = 0; j < m_xinit.size(); j++ )
488  cov[i][j] = m_M[i][j];
489 
490  return flag;
491 }
492 
493 
494 double BFGSFitter::iterParam ( std::string name )
495 {
496  // First check if user is attempting to access the max_iterations
497  // This is a hack, but it was necessary to ensure a uniform interface
498  if( name == "max_iterations" )
499  return m_max_iterations;
500 
501  // Don't use map::operator[]() to find the name and its
502  // associated value, as it will create one if it doesn't exist.
503  map< string, double * >::const_iterator it
504  = m_iter_params.find ( name );
505 
506  if ( it == m_iter_params.end () )
507  cout << name << " is not a valid iteration parameter name" << endl;
508  else
509  return *m_iter_params[name];
510 
511  return 0.0;
512 }
513 
514 int BFGSFitter::setIterParam ( std::string name, double value )
515 
516 {
517 
518  // First check if user is attempting to modify the max_iterations
519  // This is a hack, but it was necessary to ensure a uniform interface
520  if( name == "max_iterations" )
521  {
522  m_max_iterations = ( int ) value;
523  return EXIT_SUCCESS;
524  }
525 
526  // Now start worrying about the other parameters.
527  // Don't diretly use map::operator[]() to find the name and its
528  // associated value, as it will create one if it doesn't exist.
529  map< string, double * >::const_iterator it
530  = m_iter_params.find ( name );
531 
532  if ( it == m_iter_params.end () )
533  {
534  cout << name << " is not a valid iteration parameter name" << endl;
535  return EXIT_FAILURE;
536  }
537  else
538  {
539  *m_iter_params[name] = value;
540  return EXIT_SUCCESS;
541  }
542 
543  return EXIT_FAILURE;
544 }
void fillFreeParameters(std::vector< double > &) const
Fills the vector with the free parameters values.
Definition: Fitter.cxx:142
unsigned int i
BFGSFitter(const char *name)
The constructor taking name of fitter as argument.
Definition: BFGSFitter.cxx:60
int setInitIter(const std::vector< double > &xinit)
Sets the initial value of the iterate, assuming it is given as a vector.
Definition: BFGSFitter.cxx:458
Collection of linear algebra functions.
virtual bool calcBestFit()
Main driver routine for BFGS algorithm which has been used in computing the bets fit for the function...
Definition: BFGSFitter.cxx:89
Fitter * clone() const
Makes a copy of the receiving object.
Definition: BFGSFitter.cxx:80
const std::vector< double > & initIter() const
Returns the initial value of the iterate.
Definition: BFGSFitter.cxx:453
double m_alpha_max
Maximum step length to try, suggested value by Nocedal and Wright is alpha_max = 4.
Definition: BFGSFitter.h:81
double m_alpha1
First step length to try and this must be less than Alpha_max.
Definition: BFGSFitter.h:85
double innerProduct(const std::vector< double > &a, const std::vector< double > &b)
Computes the dot or the inner product of two vectors(i.e.
Definition: NumLinAlg.cxx:239
int m_max_iterations
The maximum number of iterations allowed in attempting the fit.
Definition: Fitter.h:62
int cholFactor(std::vector< std::vector< double > > &A)
The subroutine which does cholesky factorization of a given Symmetric positive definite matrix (say) ...
Definition: NumLinAlg.cxx:292
double function(const std::vector< double > &x) const
The objective function.
Definition: BFGSFitter.cxx:319
The base class for fitters.
Definition: Fitter.h:33
double gradp(const std::vector< double > &u, const std::vector< double > &p) const
Efficient computation of gradient of the objective function with a vector p.
Definition: BFGSFitter.cxx:404
virtual int calcCovariance(std::vector< std::vector< double > > &cov)
Calculates the covariance matrix.
Definition: BFGSFitter.cxx:472
double wolfeStep(const std::vector< double > &x0, const std::vector< double > &p) const
Computes a step satisfying the Wolfe conditions.
Definition: BFGSFitter.cxx:171
double m_proj_cutoff
The projection cut-off parameter.
Definition: BFGSFitter.h:59
double interpolate(const std::vector< double > &x0, const std::vector< double > &p, double Alphaim, double Alphai) const
A cubic interpolation routine.
Definition: BFGSFitter.cxx:287
vector< vector< double > > outerProduct(const std::vector< double > &a, const std::vector< double > &b)
Computes the outer product of two vectors (i.e.
Definition: NumLinAlg.cxx:252
int eye(std::vector< std::vector< double > > &I, int n)
Creates an n x n identity matrix for M.
Definition: NumLinAlg.cxx:405
BFGSFitter class interface.
double zoom(const std::vector< double > &x0, const std::vector< double > &p, double phi0, double dphi0, double Alphalo, double Alphahi) const
A function which helps out Wolfe in deciding the step length.
Definition: BFGSFitter.cxx:234
double iterParam(std::string name)
Given a string, this function returns the value of the associated iteration parameter.
Definition: BFGSFitter.cxx:494
std::vector< std::vector< double > > m_M
The inverse of the quasi-Hessian.
Definition: BFGSFitter.h:42
double norm(const std::vector< double > &a)
Computes the two norm of the vector.
Definition: NumLinAlg.cxx:285
const std::string & name() const
Returns the name of the fitter.
Definition: Fitter.cxx:57
double m_grad_cutoff
The gradient cut-off parameter.
Definition: BFGSFitter.h:50
double m_step_cutoff
The step cut-off parameter.
Definition: BFGSFitter.h:55
virtual double objectiveValue() const
Calculates the value of the objective function at the current set of parameters.
Definition: Fitter.cxx:222
std::map< std::string, double * > m_iter_params
Map of the various iteration parameters to their name.
Definition: BFGSFitter.h:88
std::vector< double > m_xinit
The initial value to start the iteration from.
Definition: BFGSFitter.h:45
hippodraw::StatedFCN class interface
std::vector< double > gradient(const std::vector< double > &x) const
The gradient of the objective function.
Definition: BFGSFitter.cxx:351
int setIterParam(std::string name, double value)
Given a string and a double, this function sets the value of the associated iteration parameter...
Definition: BFGSFitter.cxx:514
StatedFCN * m_fcn
The objective function.
Definition: Fitter.h:59
list< QAction * >::iterator it
double m_c1
c1,c2 - constants such that 0 &lt; c1 &lt; c2 &lt; 1 and they ensure that strong Wolfe conditions hold true...
Definition: BFGSFitter.h:77

Generated for HippoDraw Class Library by doxygen