LBFGS++
LBFGSB.h
1 // Copyright (C) 2020 Yixuan Qiu <yixuan.qiu@cos.name>
2 // Under MIT license
3 
4 #ifndef LBFGSB_H
5 #define LBFGSB_H
6 
7 #include <stdexcept> // std::invalid_argument
8 #include <vector>
9 #include <Eigen/Core>
10 #include "LBFGSpp/Param.h"
11 #include "LBFGSpp/BFGSMat.h"
12 #include "LBFGSpp/Cauchy.h"
13 #include "LBFGSpp/SubspaceMin.h"
14 #include "LBFGSpp/LineSearchMoreThuente.h"
15 
16 
17 namespace LBFGSpp {
18 
19 
23 template < typename Scalar,
24  template<class> class LineSearch = LineSearchMoreThuente >
26 {
27 private:
28  typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;
29  typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix;
30  typedef Eigen::Map<Vector> MapVec;
31  typedef std::vector<int> IndexSet;
32 
33  const LBFGSBParam<Scalar>& m_param; // Parameters to control the LBFGS algorithm
34  BFGSMat<Scalar, true> m_bfgs; // Approximation to the Hessian matrix
35  Vector m_fx; // History of the objective function values
36  Vector m_xp; // Old x
37  Vector m_grad; // New gradient
38  Vector m_gradp; // Old gradient
39  Vector m_drt; // Moving direction
40 
41  // Reset internal variables
42  // n: dimension of the vector to be optimized
43  inline void reset(int n)
44  {
45  const int m = m_param.m;
46  m_bfgs.reset(n, m);
47  m_xp.resize(n);
48  m_grad.resize(n);
49  m_gradp.resize(n);
50  m_drt.resize(n);
51  if(m_param.past > 0)
52  m_fx.resize(m_param.past);
53  }
54 
55  // Check whether the vector is within the bounds
56  static bool in_bounds(const Vector& x, const Vector& lb, const Vector& ub)
57  {
58  const int n = x.size();
59  for(int i = 0; i < n; i++)
60  {
61  if(x[i] < lb[i] || x[i] > ub[i])
62  return false;
63  }
64  return true;
65  }
66 
67  // Project the vector x to the bound constraint set
68  static void force_bounds(Vector& x, const Vector& lb, const Vector& ub)
69  {
70  x.noalias() = x.cwiseMax(lb).cwiseMin(ub);
71  }
72 
73  // Norm of the projected gradient
74  // ||P(x-g, l, u) - x||_inf
75  static Scalar proj_grad_norm(const Vector& x, const Vector& g, const Vector& lb, const Vector& ub)
76  {
77  const int n = x.size();
78  Scalar res = Scalar(0);
79  for(int i = 0; i < n; i++)
80  {
81  Scalar proj = std::max(lb[i], x[i] - g[i]);
82  proj = std::min(ub[i], proj);
83  res = std::max(res, std::abs(proj - x[i]));
84  }
85  return res;
86  }
87 
88  // The maximum step size alpha such that x0 + alpha * d stays within the bounds
89  static Scalar max_step_size(const Vector& x0, const Vector& drt, const Vector& lb, const Vector& ub)
90  {
91  const int n = x0.size();
92  Scalar step = std::numeric_limits<Scalar>::infinity();
93 
94  for(int i = 0; i < n; i++)
95  {
96  if(drt[i] > Scalar(0))
97  {
98  step = std::min(step, (ub[i] - x0[i]) / drt[i]);
99  } else if(drt[i] < Scalar(0)) {
100  step = std::min(step, (lb[i] - x0[i]) / drt[i]);
101  }
102  }
103 
104  return step;
105  }
106 
107 public:
115  m_param(param)
116  {
117  m_param.check_param();
118  }
119 
135  template <typename Foo>
136  inline int minimize(Foo& f, Vector& x, Scalar& fx, const Vector& lb, const Vector& ub)
137  {
138  using std::abs;
139 
140  // Dimension of the vector
141  const int n = x.size();
142  if(lb.size() != n || ub.size() != n)
143  throw std::invalid_argument("'lb' and 'ub' must have the same size as 'x'");
144 
145  // Check whether the initial vector is within the bounds
146  if(!in_bounds(x, lb, ub))
147  throw std::invalid_argument("initial 'x' is out of the bounds");
148 
149  // Initialization
150  reset(n);
151 
152  // The length of lag for objective function value to test convergence
153  const int fpast = m_param.past;
154 
155  // Evaluate function and compute gradient
156  fx = f(x, m_grad);
157  Scalar xnorm = x.norm();
158  Scalar projgnorm = proj_grad_norm(x, m_grad, lb, ub);
159  if(fpast > 0)
160  m_fx[0] = fx;
161 
162  // std::cout << "x0 = " << x.transpose() << std::endl;
163  // std::cout << "f(x0) = " << fx << ", ||proj_grad|| = " << projgnorm << std::endl << std::endl;
164 
165  // Early exit if the initial x is already a minimizer
166  if(projgnorm <= m_param.epsilon * std::max(xnorm, Scalar(1)))
167  {
168  return 1;
169  }
170 
171  // Compute generalized Cauchy point
172  Vector xcp(n), vecc;
173  IndexSet newact_set, fv_set;
174  Cauchy<Scalar>::get_cauchy_point(m_bfgs, x, m_grad, lb, ub, xcp, vecc, newact_set, fv_set);
175 
176  /* Vector gcp(n);
177  Scalar fcp = f(xcp, gcp);
178  Scalar projgcpnorm = proj_grad_norm(xcp, gcp, lb, ub);
179  std::cout << "xcp = " << xcp.transpose() << std::endl;
180  std::cout << "f(xcp) = " << fcp << ", ||proj_grad|| = " << projgcpnorm << std::endl << std::endl; */
181 
182  // Initial direction
183  m_drt.noalias() = xcp - x;
184  m_drt.normalize();
185  // Tolerance for s'y >= eps * (y'y)
186  const Scalar eps = std::numeric_limits<Scalar>::epsilon();
187  // s and y vectors
188  Vector vecs(n), vecy(n);
189  // Number of iterations used
190  int k = 1;
191  for( ; ; )
192  {
193  // Save the curent x and gradient
194  m_xp.noalias() = x;
195  m_gradp.noalias() = m_grad;
196 
197  // Line search to update x, fx and gradient
198  Scalar step_max = max_step_size(x, m_drt, lb, ub);
199  step_max = std::min(m_param.max_step, step_max);
200  Scalar step = Scalar(1);
201  step = std::min(step, step_max);
202  LineSearch<Scalar>::LineSearch(f, fx, x, m_grad, step, step_max, m_drt, m_xp, m_param);
203 
204  // New x norm and projected gradient norm
205  xnorm = x.norm();
206  projgnorm = proj_grad_norm(x, m_grad, lb, ub);
207 
208  /* std::cout << "** Iteration " << k << std::endl;
209  std::cout << " x = " << x.transpose() << std::endl;
210  std::cout << " f(x) = " << fx << ", ||proj_grad|| = " << projgnorm << std::endl << std::endl; */
211 
212  // Convergence test -- gradient
213  if(projgnorm <= m_param.epsilon * std::max(xnorm, Scalar(1)))
214  {
215  return k;
216  }
217  // Convergence test -- objective function value
218  if(fpast > 0)
219  {
220  const Scalar fxd = m_fx[k % fpast];
221  if(k >= fpast && abs(fxd - fx) <= m_param.delta * std::max(std::max(abs(fx), abs(fxd)), Scalar(1)))
222  return k;
223 
224  m_fx[k % fpast] = fx;
225  }
226  // Maximum number of iterations
227  if(m_param.max_iterations != 0 && k >= m_param.max_iterations)
228  {
229  return k;
230  }
231 
232  // Update s and y
233  // s_{k+1} = x_{k+1} - x_k
234  // y_{k+1} = g_{k+1} - g_k
235  vecs.noalias() = x - m_xp;
236  vecy.noalias() = m_grad - m_gradp;
237  if(vecs.dot(vecy) > eps * vecy.squaredNorm())
238  m_bfgs.add_correction(vecs, vecy);
239 
240  force_bounds(x, lb, ub);
241  Cauchy<Scalar>::get_cauchy_point(m_bfgs, x, m_grad, lb, ub, xcp, vecc, newact_set, fv_set);
242 
243  /*Vector gcp(n);
244  Scalar fcp = f(xcp, gcp);
245  Scalar projgcpnorm = proj_grad_norm(xcp, gcp, lb, ub);
246  std::cout << "xcp = " << xcp.transpose() << std::endl;
247  std::cout << "f(xcp) = " << fcp << ", ||proj_grad|| = " << projgcpnorm << std::endl << std::endl;*/
248 
249  SubspaceMin<Scalar>::subspace_minimize(m_bfgs, x, xcp, m_grad, lb, ub,
250  vecc, newact_set, fv_set, m_param.max_submin, m_drt);
251 
252  /*Vector gsm(n);
253  Scalar fsm = f(x + m_drt, gsm);
254  Scalar projgsmnorm = proj_grad_norm(x + m_drt, gsm, lb, ub);
255  std::cout << "xsm = " << (x + m_drt).transpose() << std::endl;
256  std::cout << "f(xsm) = " << fsm << ", ||proj_grad|| = " << projgsmnorm << std::endl << std::endl;*/
257 
258  k++;
259  }
260 
261  return k;
262  }
263 };
264 
265 
266 } // namespace LBFGSpp
267 
268 #endif // LBFGSB_H
LBFGSpp::LBFGSBSolver
Definition: LBFGSB.h:25
LBFGSpp::LBFGSBParam::max_submin
int max_submin
Definition: Param.h:268
LBFGSpp::LBFGSBParam
Definition: Param.h:215
LBFGSpp::LBFGSBParam::check_param
void check_param() const
Definition: Param.h:327
LBFGSpp::LBFGSBParam::epsilon
Scalar epsilon
Definition: Param.h:236
LBFGSpp::LBFGSBParam::past
int past
Definition: Param.h:245
LBFGSpp::LBFGSBParam::m
int m
Definition: Param.h:226
LBFGSpp::LBFGSBParam::delta
Scalar delta
Definition: Param.h:254
LBFGSpp::LBFGSBSolver::LBFGSBSolver
LBFGSBSolver(const LBFGSBParam< Scalar > &param)
Definition: LBFGSB.h:114
LBFGSpp::LBFGSBParam::max_step
Scalar max_step
Definition: Param.h:286
LBFGSpp::LBFGSBSolver::minimize
int minimize(Foo &f, Vector &x, Scalar &fx, const Vector &lb, const Vector &ub)
Definition: LBFGSB.h:136
LBFGSpp::LBFGSBParam::max_iterations
int max_iterations
Definition: Param.h:262