LBFGS++
Loading...
Searching...
No Matches
SubspaceMin.h
1// Copyright (C) 2020-2023 Yixuan Qiu <yixuan.qiu@cos.name>
2// Under MIT license
3
4#ifndef LBFGSPP_SUBSPACE_MIN_H
5#define LBFGSPP_SUBSPACE_MIN_H
6
7#include <stdexcept>
8#include <vector>
9#include <Eigen/Core>
10#include "BFGSMat.h"
11
13
14namespace LBFGSpp {
15
16//
17// Subspace minimization procedure of the L-BFGS-B algorithm,
18// mainly for internal use.
19//
20// The target of subspace minimization is to minimize the quadratic function m(x)
21// over the free variables, subject to the bound condition.
22// Free variables stand for coordinates that are not at the boundary in xcp,
23// the generalized Cauchy point.
24//
25// In the classical implementation of L-BFGS-B [1], the minimization is done by first
26// ignoring the box constraints, followed by a line search. Our implementation is
27// an exact minimization subject to the bounds, based on the BOXCQP algorithm [2].
28//
29// Reference:
30// [1] R. H. Byrd, P. Lu, and J. Nocedal (1995). A limited memory algorithm for bound constrained optimization.
31// [2] C. Voglis and I. E. Lagaris (2004). BOXCQP: An algorithm for bound constrained convex quadratic problems.
32//
33template <typename Scalar>
34class SubspaceMin
35{
36private:
37 using Vector = Eigen::Matrix<Scalar, Eigen::Dynamic, 1>;
38 using Matrix = Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic>;
39 using IndexSet = std::vector<int>;
40
41 // v[ind]
42 static Vector subvec(const Vector& v, const IndexSet& ind)
43 {
44 const int nsub = ind.size();
45 Vector res(nsub);
46 for (int i = 0; i < nsub; i++)
47 res[i] = v[ind[i]];
48 return res;
49 }
50
51 // v[ind] = rhs
52 static void subvec_assign(Vector& v, const IndexSet& ind, const Vector& rhs)
53 {
54 const int nsub = ind.size();
55 for (int i = 0; i < nsub; i++)
56 v[ind[i]] = rhs[i];
57 }
58
59 // Check whether the vector is within the bounds
60 static bool in_bounds(const Vector& x, const Vector& lb, const Vector& ub)
61 {
62 const int n = x.size();
63 for (int i = 0; i < n; i++)
64 {
65 if (x[i] < lb[i] || x[i] > ub[i])
66 return false;
67 }
68 return true;
69 }
70
71 // Test convergence of P set
72 static bool P_converged(const IndexSet& yP_set, const Vector& vecy, const Vector& vecl, const Vector& vecu)
73 {
74 const int nP = yP_set.size();
75 for (int i = 0; i < nP; i++)
76 {
77 const int coord = yP_set[i];
78 if (vecy[coord] < vecl[coord] || vecy[coord] > vecu[coord])
79 return false;
80 }
81 return true;
82 }
83
84 // Test convergence of L set
85 static bool L_converged(const IndexSet& yL_set, const Vector& lambda)
86 {
87 const int nL = yL_set.size();
88 for (int i = 0; i < nL; i++)
89 {
90 const int coord = yL_set[i];
91 if (lambda[coord] < Scalar(0))
92 return false;
93 }
94 return true;
95 }
96
97 // Test convergence of L set
98 static bool U_converged(const IndexSet& yU_set, const Vector& mu)
99 {
100 const int nU = yU_set.size();
101 for (int i = 0; i < nU; i++)
102 {
103 const int coord = yU_set[i];
104 if (mu[coord] < Scalar(0))
105 return false;
106 }
107 return true;
108 }
109
110public:
111 // bfgs: An object that represents the BFGS approximation matrix.
112 // x0: Current parameter vector.
113 // xcp: Computed generalized Cauchy point.
114 // g: Gradient at x0.
115 // lb: Lower bounds for x.
116 // ub: Upper bounds for x.
117 // Wd: W'(xcp - x0)
118 // newact_set: Coordinates that newly become active during the GCP procedure.
119 // fv_set: Free variable set.
120 // maxit: Maximum number of iterations.
121 // drt: The output direction vector, drt = xsm - x0.
122 static void subspace_minimize(
123 const BFGSMat<Scalar, true>& bfgs, const Vector& x0, const Vector& xcp, const Vector& g,
124 const Vector& lb, const Vector& ub, const Vector& Wd, const IndexSet& newact_set, const IndexSet& fv_set, int maxit,
125 Vector& drt)
126 {
127 // std::cout << "========================= Entering subspace minimization =========================\n\n";
128
129 // d = xcp - x0
130 drt.noalias() = xcp - x0;
131 // Size of free variables
132 const int nfree = fv_set.size();
133 // If there is no free variable, simply return drt
134 if (nfree < 1)
135 {
136 // std::cout << "========================= (Early) leaving subspace minimization =========================\n\n";
137 return;
138 }
139
140 // std::cout << "New active set = [ "; for(std::size_t i = 0; i < newact_set.size(); i++) std::cout << newact_set[i] << " "; std::cout << "]\n";
141 // std::cout << "Free variable set = [ "; for(std::size_t i = 0; i < fv_set.size(); i++) std::cout << fv_set[i] << " "; std::cout << "]\n\n";
142
143 // Extract the rows of W in the free variable set
144 Matrix WF = bfgs.Wb(fv_set);
145 // Compute F'BAb = -F'WMW'AA'd
146 Vector vecc(nfree);
147 bfgs.compute_FtBAb(WF, fv_set, newact_set, Wd, drt, vecc);
148 // Set the vector c=F'BAb+F'g for linear term, and vectors l and u for the new bounds
149 Vector vecl(nfree), vecu(nfree);
150 for (int i = 0; i < nfree; i++)
151 {
152 const int coord = fv_set[i];
153 vecl[i] = lb[coord] - x0[coord];
154 vecu[i] = ub[coord] - x0[coord];
155 vecc[i] += g[coord];
156 }
157 // Solve y = -inv(B[F, F]) * c
158 Vector vecy(nfree);
159 bfgs.solve_PtBP(WF, -vecc, vecy);
160 // Test feasibility
161 // If yes, then the solution has been found
162 if (in_bounds(vecy, vecl, vecu))
163 {
164 subvec_assign(drt, fv_set, vecy);
165 return;
166 }
167 // Otherwise, enter the iterations
168
169 // Make a copy of y as a fallback solution
170 Vector yfallback = vecy;
171 // Dual variables
172 Vector lambda = Vector::Zero(nfree), mu = Vector::Zero(nfree);
173
174 // Iterations
175 IndexSet L_set, U_set, P_set, yL_set, yU_set, yP_set;
176 L_set.reserve(nfree / 3);
177 yL_set.reserve(nfree / 3);
178 U_set.reserve(nfree / 3);
179 yU_set.reserve(nfree / 3);
180 P_set.reserve(nfree);
181 yP_set.reserve(nfree);
182 int k;
183 for (k = 0; k < maxit; k++)
184 {
185 // Construct the L, U, and P sets, and then update values
186 // Indices in original drt vector
187 L_set.clear();
188 U_set.clear();
189 P_set.clear();
190 // Indices in y
191 yL_set.clear();
192 yU_set.clear();
193 yP_set.clear();
194 for (int i = 0; i < nfree; i++)
195 {
196 const int coord = fv_set[i];
197 const Scalar li = vecl[i], ui = vecu[i];
198 if ((vecy[i] < li) || (vecy[i] == li && lambda[i] >= Scalar(0)))
199 {
200 L_set.push_back(coord);
201 yL_set.push_back(i);
202 vecy[i] = li;
203 mu[i] = Scalar(0);
204 }
205 else if ((vecy[i] > ui) || (vecy[i] == ui && mu[i] >= Scalar(0)))
206 {
207 U_set.push_back(coord);
208 yU_set.push_back(i);
209 vecy[i] = ui;
210 lambda[i] = Scalar(0);
211 }
212 else
213 {
214 P_set.push_back(coord);
215 yP_set.push_back(i);
216 lambda[i] = Scalar(0);
217 mu[i] = Scalar(0);
218 }
219 }
220
221 /* std::cout << "** Iter " << k << " **\n";
222 std::cout << " L = [ "; for(std::size_t i = 0; i < L_set.size(); i++) std::cout << L_set[i] << " "; std::cout << "]\n";
223 std::cout << " U = [ "; for(std::size_t i = 0; i < U_set.size(); i++) std::cout << U_set[i] << " "; std::cout << "]\n";
224 std::cout << " P = [ "; for(std::size_t i = 0; i < P_set.size(); i++) std::cout << P_set[i] << " "; std::cout << "]\n\n"; */
225
226 // Extract the rows of W in the P set
227 Matrix WP = bfgs.Wb(P_set);
228 // Solve y[P] = -inv(B[P, P]) * (B[P, L] * l[L] + B[P, U] * u[U] + c[P])
229 const int nP = P_set.size();
230 if (nP > 0)
231 {
232 Vector rhs = subvec(vecc, yP_set);
233 Vector lL = subvec(vecl, yL_set);
234 Vector uU = subvec(vecu, yU_set);
235 Vector tmp(nP);
236 bool nonzero = bfgs.apply_PtBQv(WP, L_set, lL, tmp, true);
237 if (nonzero)
238 rhs.noalias() += tmp;
239 nonzero = bfgs.apply_PtBQv(WP, U_set, uU, tmp, true);
240 if (nonzero)
241 rhs.noalias() += tmp;
242
243 bfgs.solve_PtBP(WP, -rhs, tmp);
244 subvec_assign(vecy, yP_set, tmp);
245 }
246
247 // Solve lambda[L] = B[L, F] * y + c[L]
248 const int nL = L_set.size();
249 const int nU = U_set.size();
250 Vector Fy;
251 if (nL > 0 || nU > 0)
252 bfgs.apply_WtPv(fv_set, vecy, Fy);
253 if (nL > 0)
254 {
255 Vector res;
256 bfgs.apply_PtWMv(L_set, Fy, res, Scalar(-1));
257 res.noalias() += subvec(vecc, yL_set) + bfgs.theta() * subvec(vecy, yL_set);
258 subvec_assign(lambda, yL_set, res);
259 }
260
261 // Solve mu[U] = -B[U, F] * y - c[U]
262 if (nU > 0)
263 {
264 Vector negRes;
265 bfgs.apply_PtWMv(U_set, Fy, negRes, Scalar(-1));
266 negRes.noalias() += subvec(vecc, yU_set) + bfgs.theta() * subvec(vecy, yU_set);
267 subvec_assign(mu, yU_set, -negRes);
268 }
269
270 // Test convergence
271 if (L_converged(yL_set, lambda) && U_converged(yU_set, mu) && P_converged(yP_set, vecy, vecl, vecu))
272 break;
273 }
274
275 // If the iterations do not converge, try the projection
276 if (k >= maxit)
277 {
278 vecy.noalias() = vecy.cwiseMax(vecl).cwiseMin(vecu);
279 subvec_assign(drt, fv_set, vecy);
280 // Test whether drt is a descent direction
281 Scalar dg = drt.dot(g);
282 // If yes, return the result
283 if (dg <= -std::numeric_limits<Scalar>::epsilon())
284 return;
285
286 // If not, fall back to the projected unconstrained solution
287 vecy.noalias() = yfallback.cwiseMax(vecl).cwiseMin(vecu);
288 subvec_assign(drt, fv_set, vecy);
289 dg = drt.dot(g);
290 if (dg <= -std::numeric_limits<Scalar>::epsilon())
291 return;
292
293 // If still not, fall back to the unconstrained solution
294 subvec_assign(drt, fv_set, yfallback);
295 return;
296 }
297
298 // std::cout << "** Minimization finished in " << k + 1 << " iteration(s) **\n\n";
299 // std::cout << "========================= Leaving subspace minimization =========================\n\n";
300
301 subvec_assign(drt, fv_set, vecy);
302 }
303};
304
305} // namespace LBFGSpp
306
308
309#endif // LBFGSPP_SUBSPACE_MIN_H