LBFGS++
Loading...
Searching...
No Matches
LBFGSB.h
1// Copyright (C) 2020-2023 Yixuan Qiu <yixuan.qiu@cos.name>
2// Under MIT license
3
4#ifndef LBFGSPP_LBFGSB_H
5#define LBFGSPP_LBFGSB_H
6
7#include <stdexcept> // std::invalid_argument
8#include <vector>
9#include <Eigen/Core>
10#include "LBFGSpp/Param.h"
11#include "LBFGSpp/BFGSMat.h"
12#include "LBFGSpp/Cauchy.h"
13#include "LBFGSpp/SubspaceMin.h"
14#include "LBFGSpp/LineSearchMoreThuente.h"
15
16namespace LBFGSpp {
17
21template <typename Scalar,
22 template <class> class LineSearch = LineSearchMoreThuente>
24{
25private:
26 using Vector = Eigen::Matrix<Scalar, Eigen::Dynamic, 1>;
27 using Matrix = Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic>;
28 using MapVec = Eigen::Map<Vector>;
29 using IndexSet = std::vector<int>;
30
31 const LBFGSBParam<Scalar>& m_param; // Parameters to control the LBFGS algorithm
32 BFGSMat<Scalar, true> m_bfgs; // Approximation to the Hessian matrix
33 Vector m_fx; // History of the objective function values
34 Vector m_xp; // Old x
35 Vector m_grad; // New gradient
36 Scalar m_projgnorm; // Projected gradient norm
37 Vector m_gradp; // Old gradient
38 Vector m_drt; // Moving direction
39
40 // Reset internal variables
41 // n: dimension of the vector to be optimized
42 inline void reset(int n)
43 {
44 const int m = m_param.m;
45 m_bfgs.reset(n, m);
46 m_xp.resize(n);
47 m_grad.resize(n);
48 m_gradp.resize(n);
49 m_drt.resize(n);
50 if (m_param.past > 0)
51 m_fx.resize(m_param.past);
52 }
53
54 // Project the vector x to the bound constraint set
55 static void force_bounds(Vector& x, const Vector& lb, const Vector& ub)
56 {
57 x.noalias() = x.cwiseMax(lb).cwiseMin(ub);
58 }
59
60 // Norm of the projected gradient
61 // ||P(x-g, l, u) - x||_inf
62 static Scalar proj_grad_norm(const Vector& x, const Vector& g, const Vector& lb, const Vector& ub)
63 {
64 return ((x - g).cwiseMax(lb).cwiseMin(ub) - x).cwiseAbs().maxCoeff();
65 }
66
67 // The maximum step size alpha such that x0 + alpha * d stays within the bounds
68 static Scalar max_step_size(const Vector& x0, const Vector& drt, const Vector& lb, const Vector& ub)
69 {
70 const int n = x0.size();
71 Scalar step = std::numeric_limits<Scalar>::infinity();
72
73 for (int i = 0; i < n; i++)
74 {
75 if (drt[i] > Scalar(0))
76 {
77 step = std::min(step, (ub[i] - x0[i]) / drt[i]);
78 }
79 else if (drt[i] < Scalar(0))
80 {
81 step = std::min(step, (lb[i] - x0[i]) / drt[i]);
82 }
83 }
84
85 return step;
86 }
87
88public:
96 m_param(param)
97 {
98 m_param.check_param();
99 }
100
116 template <typename Foo>
117 inline int minimize(Foo& f, Vector& x, Scalar& fx, const Vector& lb, const Vector& ub)
118 {
119 using std::abs;
120
121 // Dimension of the vector
122 const int n = x.size();
123 if (lb.size() != n || ub.size() != n)
124 throw std::invalid_argument("'lb' and 'ub' must have the same size as 'x'");
125
126 // Check whether the initial vector is within the bounds
127 // If not, project to the feasible set
128 force_bounds(x, lb, ub);
129
130 // Initialization
131 reset(n);
132
133 // The length of lag for objective function value to test convergence
134 const int fpast = m_param.past;
135
136 // Evaluate function and compute gradient
137 fx = f(x, m_grad);
138 m_projgnorm = proj_grad_norm(x, m_grad, lb, ub);
139 if (fpast > 0)
140 m_fx[0] = fx;
141
142 // std::cout << "x0 = " << x.transpose() << std::endl;
143 // std::cout << "f(x0) = " << fx << ", ||proj_grad|| = " << m_projgnorm << std::endl << std::endl;
144
145 // Early exit if the initial x is already a minimizer
146 if (m_projgnorm <= m_param.epsilon || m_projgnorm <= m_param.epsilon_rel * x.norm())
147 {
148 return 1;
149 }
150
151 // Compute generalized Cauchy point
152 Vector xcp(n), vecc;
153 IndexSet newact_set, fv_set;
154 Cauchy<Scalar>::get_cauchy_point(m_bfgs, x, m_grad, lb, ub, xcp, vecc, newact_set, fv_set);
155
156 /* Vector gcp(n);
157 Scalar fcp = f(xcp, gcp);
158 Scalar projgcpnorm = proj_grad_norm(xcp, gcp, lb, ub);
159 std::cout << "xcp = " << xcp.transpose() << std::endl;
160 std::cout << "f(xcp) = " << fcp << ", ||proj_grad|| = " << projgcpnorm << std::endl << std::endl; */
161
162 // Initial direction
163 m_drt.noalias() = xcp - x;
164 m_drt.normalize();
165 // Tolerance for s'y >= eps * (y'y)
166 constexpr Scalar eps = std::numeric_limits<Scalar>::epsilon();
167 // s and y vectors
168 Vector vecs(n), vecy(n);
169 // Number of iterations used
170 int k = 1;
171 for (;;)
172 {
173 // Save the curent x and gradient
174 m_xp.noalias() = x;
175 m_gradp.noalias() = m_grad;
176 Scalar dg = m_grad.dot(m_drt);
177
178 // Maximum step size to make x feasible
179 Scalar step_max = max_step_size(x, m_drt, lb, ub);
180
181 // In some cases, the direction returned by the subspace minimization procedure
182 // in the previous iteration is pathological, leading to issues such as
183 // step_max~=0 and dg>=0. If this happens, we use xcp-x as the search direction,
184 // and reset the BFGS matrix. This is because xsm (the subspace minimizer)
185 // heavily depends on the BFGS matrix. If xsm is corrupted, then we may suspect
186 // there is something wrong in the BFGS matrix, and it is safer to reset the matrix.
187 // In contrast, xcp is obtained from a line search, which tends to be more robust
188 if (dg >= Scalar(0) || step_max <= m_param.min_step)
189 {
190 // Reset search direction
191 m_drt.noalias() = xcp - x;
192 // Reset BFGS matrix
193 m_bfgs.reset(n, m_param.m);
194 // Recompute dg and step_max
195 dg = m_grad.dot(m_drt);
196 step_max = max_step_size(x, m_drt, lb, ub);
197 }
198
199 // Line search to update x, fx and gradient
200 step_max = std::min(m_param.max_step, step_max);
201 Scalar step = Scalar(1);
202 step = std::min(step, step_max);
203 LineSearch<Scalar>::LineSearch(f, m_param, m_xp, m_drt, step_max, step, fx, m_grad, dg, x);
204
205 // New projected gradient norm
206 m_projgnorm = proj_grad_norm(x, m_grad, lb, ub);
207
208 /* std::cout << "** Iteration " << k << std::endl;
209 std::cout << " x = " << x.transpose() << std::endl;
210 std::cout << " f(x) = " << fx << ", ||proj_grad|| = " << m_projgnorm << std::endl << std::endl; */
211
212 // Convergence test -- gradient
213 if (m_projgnorm <= m_param.epsilon || m_projgnorm <= m_param.epsilon_rel * x.norm())
214 {
215 return k;
216 }
217 // Convergence test -- objective function value
218 if (fpast > 0)
219 {
220 const Scalar fxd = m_fx[k % fpast];
221 if (k >= fpast && abs(fxd - fx) <= m_param.delta * std::max(std::max(abs(fx), abs(fxd)), Scalar(1)))
222 return k;
223
224 m_fx[k % fpast] = fx;
225 }
226 // Maximum number of iterations
227 if (m_param.max_iterations != 0 && k >= m_param.max_iterations)
228 {
229 return k;
230 }
231
232 // Update s and y
233 // s_{k+1} = x_{k+1} - x_k
234 // y_{k+1} = g_{k+1} - g_k
235 vecs.noalias() = x - m_xp;
236 vecy.noalias() = m_grad - m_gradp;
237 if (vecs.dot(vecy) > eps * vecy.squaredNorm())
238 m_bfgs.add_correction(vecs, vecy);
239
240 force_bounds(x, lb, ub);
241 Cauchy<Scalar>::get_cauchy_point(m_bfgs, x, m_grad, lb, ub, xcp, vecc, newact_set, fv_set);
242
243 /*Vector gcp(n);
244 Scalar fcp = f(xcp, gcp);
245 Scalar projgcpnorm = proj_grad_norm(xcp, gcp, lb, ub);
246 std::cout << "xcp = " << xcp.transpose() << std::endl;
247 std::cout << "f(xcp) = " << fcp << ", ||proj_grad|| = " << projgcpnorm << std::endl << std::endl;*/
248
249 SubspaceMin<Scalar>::subspace_minimize(m_bfgs, x, xcp, m_grad, lb, ub,
250 vecc, newact_set, fv_set, m_param.max_submin, m_drt);
251
252 /*Vector gsm(n);
253 Scalar fsm = f(x + m_drt, gsm);
254 Scalar projgsmnorm = proj_grad_norm(x + m_drt, gsm, lb, ub);
255 std::cout << "xsm = " << (x + m_drt).transpose() << std::endl;
256 std::cout << "f(xsm) = " << fsm << ", ||proj_grad|| = " << projgsmnorm << std::endl << std::endl;*/
257
258 k++;
259 }
260
261 return k;
262 }
263
271 const Vector& final_grad() const { return m_grad; }
272
279 Scalar final_grad_norm() const { return m_projgnorm; }
280};
281
282} // namespace LBFGSpp
283
284#endif // LBFGSPP_LBFGSB_H
Scalar epsilon_rel
Definition: Param.h:256
void check_param() const
Definition: Param.h:350
const Vector & final_grad() const
Definition: LBFGSB.h:271
int minimize(Foo &f, Vector &x, Scalar &fx, const Vector &lb, const Vector &ub)
Definition: LBFGSB.h:117
Scalar final_grad_norm() const
Definition: LBFGSB.h:279
LBFGSBSolver(const LBFGSBParam< Scalar > &param)
Definition: LBFGSB.h:95