LBFGS++
Loading...
Searching...
No Matches
Cauchy.h
1// Copyright (C) 2020-2023 Yixuan Qiu <yixuan.qiu@cos.name>
2// Under MIT license
3
4#ifndef LBFGSPP_CAUCHY_H
5#define LBFGSPP_CAUCHY_H
6
7#include <vector>
8#include <Eigen/Core>
9#include "BFGSMat.h"
10
12
13namespace LBFGSpp {
14
15//
16// Class to compute the generalized Cauchy point (GCP) for the L-BFGS-B algorithm,
17// mainly for internal use.
18//
19// The target of the GCP procedure is to find a step size t such that
20// x(t) = x0 - t * g is a local minimum of the quadratic function m(x),
21// where m(x) is a local approximation to the objective function.
22//
23// First determine a sequence of break points t0=0, t1, t2, ..., tn.
24// On each interval [t[i-1], t[i]], x is changing linearly.
25// After passing a break point, one or more coordinates of x will be fixed at the bounds.
26// We search the first local minimum of m(x) by examining the intervals [t[i-1], t[i]] sequentially.
27//
28// Reference:
29// [1] R. H. Byrd, P. Lu, and J. Nocedal (1995). A limited memory algorithm for bound constrained optimization.
30//
31template <typename Scalar>
32class ArgSort
33{
34private:
35 using Vector = Eigen::Matrix<Scalar, Eigen::Dynamic, 1>;
36 using IndexSet = std::vector<int>;
37
38 const Scalar* values;
39
40public:
41 ArgSort(const Vector& value_vec) :
42 values(value_vec.data())
43 {}
44
45 inline bool operator()(int key1, int key2) { return values[key1] < values[key2]; }
46 inline void sort_key(IndexSet& key_vec) const
47 {
48 std::sort(key_vec.begin(), key_vec.end(), *this);
49 }
50};
51
52template <typename Scalar>
53class Cauchy
54{
55private:
56 typedef Eigen::Matrix<Scalar, Eigen::Dynamic, 1> Vector;
57 typedef Eigen::Matrix<int, Eigen::Dynamic, 1> IntVector;
58 typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> Matrix;
59 typedef std::vector<int> IndexSet;
60
61 // Find the smallest index i such that brk[ord[i]] > t, assuming brk[ord] is already sorted.
62 // If the return value equals n, then all values are <= t.
63 static int search_greater(const Vector& brk, const IndexSet& ord, const Scalar& t, int start = 0)
64 {
65 const int nord = ord.size();
66 int i;
67 for (i = start; i < nord; i++)
68 {
69 if (brk[ord[i]] > t)
70 break;
71 }
72
73 return i;
74 }
75
76public:
77 // bfgs: An object that represents the BFGS approximation matrix.
78 // x0: Current parameter vector.
79 // g: Gradient at x0.
80 // lb: Lower bounds for x.
81 // ub: Upper bounds for x.
82 // xcp: The output generalized Cauchy point.
83 // vecc: c = W'(xcp - x0), used in the subspace minimization routine.
84 // newact_set: Coordinates that newly become active during the GCP procedure.
85 // fv_set: Free variable set.
86 static void get_cauchy_point(
87 const BFGSMat<Scalar, true>& bfgs, const Vector& x0, const Vector& g, const Vector& lb, const Vector& ub,
88 Vector& xcp, Vector& vecc, IndexSet& newact_set, IndexSet& fv_set)
89 {
90 // std::cout << "========================= Entering GCP search =========================\n\n";
91
92 // Initialization
93 const int n = x0.size();
94 xcp.resize(n);
95 xcp.noalias() = x0;
96 vecc.resize(2 * bfgs.num_corrections());
97 vecc.setZero();
98 newact_set.clear();
99 newact_set.reserve(n);
100 fv_set.clear();
101 fv_set.reserve(n);
102
103 // Construct break points
104 Vector brk(n), vecd(n);
105 // If brk[i] == 0, i belongs to active set
106 // If brk[i] == Inf, i belongs to free variable set
107 // Others are currently undecided
108 IndexSet ord;
109 ord.reserve(n);
110 const Scalar inf = std::numeric_limits<Scalar>::infinity();
111 for (int i = 0; i < n; i++)
112 {
113 if (lb[i] == ub[i])
114 brk[i] = Scalar(0);
115 else if (g[i] < Scalar(0))
116 brk[i] = (x0[i] - ub[i]) / g[i];
117 else if (g[i] > Scalar(0))
118 brk[i] = (x0[i] - lb[i]) / g[i];
119 else
120 brk[i] = inf;
121
122 const bool iszero = (brk[i] == Scalar(0));
123 vecd[i] = iszero ? Scalar(0) : -g[i];
124
125 if (brk[i] == inf)
126 fv_set.push_back(i);
127 else if (!iszero)
128 ord.push_back(i);
129 }
130
131 // Sort indices of break points
132 ArgSort<Scalar> sorting(brk);
133 sorting.sort_key(ord);
134
135 // Break points `brko := brk[ord]` are in increasing order
136 // `ord` contains the coordinates that define the corresponding break points
137 // brk[i] == 0 <=> The i-th coordinate is on the boundary
138 const int nord = ord.size();
139 const int nfree = fv_set.size();
140 if ((nfree < 1) && (nord < 1))
141 {
142 /* std::cout << "** All coordinates at boundary **\n";
143 std::cout << "\n========================= Leaving GCP search =========================\n\n"; */
144 return;
145 }
146
147 // First interval: [il=0, iu=brk[ord[0]]]
148 // In case ord is empty, we take iu=Inf
149
150 // p = W'd, c = 0
151 Vector vecp;
152 bfgs.apply_Wtv(vecd, vecp);
153 // f' = -d'd
154 Scalar fp = -vecd.squaredNorm();
155 // f'' = -theta * f' - p'Mp
156 Vector cache;
157 bfgs.apply_Mv(vecp, cache); // cache = Mp
158 Scalar fpp = -bfgs.theta() * fp - vecp.dot(cache);
159
160 // Theoretical step size to move
161 Scalar deltatmin = -fp / fpp;
162
163 // Limit on the current interval
164 Scalar il = Scalar(0);
165 // We have excluded the case that max(brk) <= 0
166 int b = 0;
167 Scalar iu = (nord < 1) ? inf : brk[ord[b]];
168 Scalar deltat = iu - il;
169
170 /* int iter = 0;
171 std::cout << "** Iter " << iter << " **\n";
172 std::cout << " fp = " << fp << ", fpp = " << fpp << ", deltatmin = " << deltatmin << std::endl;
173 std::cout << " il = " << il << ", iu = " << iu << ", deltat = " << deltat << std::endl; */
174
175 // If deltatmin >= deltat, we need to do the following things:
176 // 1. Update vecc
177 // 2. Since we are going to cross iu, the coordinates that define iu become active
178 // 3. Update some quantities on these new active coordinates (xcp, vecd, vecp)
179 // 4. Move to the next interval and compute the new deltatmin
180 bool crossed_all = false;
181 const int ncorr = bfgs.num_corrections();
182 Vector wact(2 * ncorr);
183 while (deltatmin >= deltat)
184 {
185 // Step 1
186 vecc.noalias() += deltat * vecp;
187
188 // Step 2
189 // First check how many coordinates will be active when we cross the previous iu
190 // b is the smallest number such that brko[b] == iu
191 // Let bp be the largest number such that brko[bp] == iu
192 // Then coordinates ord[b] to ord[bp] will be active
193 const int act_begin = b;
194 const int act_end = search_greater(brk, ord, iu, b) - 1;
195
196 // If nfree == 0 and act_end == nord-1, then we have crossed all coordinates
197 // We only need to update xcp from ord[b] to ord[bp], and then exit
198 if ((nfree == 0) && (act_end == nord - 1))
199 {
200 // std::cout << "** [ ";
201 for (int i = act_begin; i <= act_end; i++)
202 {
203 const int act = ord[i];
204 xcp[act] = (vecd[act] > Scalar(0)) ? ub[act] : lb[act];
205 newact_set.push_back(act);
206 // std::cout << act + 1 << " ";
207 }
208 // std::cout << "] become active **\n\n";
209 // std::cout << "** All break points visited **\n\n";
210
211 crossed_all = true;
212 break;
213 }
214
215 // Step 3
216 // Update xcp and d on active coordinates
217 // std::cout << "** [ ";
218 fp += deltat * fpp;
219 for (int i = act_begin; i <= act_end; i++)
220 {
221 const int act = ord[i];
222 xcp[act] = (vecd[act] > Scalar(0)) ? ub[act] : lb[act];
223 // z = xcp - x0
224 const Scalar zact = xcp[act] - x0[act];
225 const Scalar gact = g[act];
226 const Scalar ggact = gact * gact;
227 wact.noalias() = bfgs.Wb(act);
228 bfgs.apply_Mv(wact, cache); // cache = Mw
229 fp += ggact + bfgs.theta() * gact * zact - gact * cache.dot(vecc);
230 fpp -= (bfgs.theta() * ggact + 2 * gact * cache.dot(vecp) + ggact * cache.dot(wact));
231 vecp.noalias() += gact * wact;
232 vecd[act] = Scalar(0);
233 newact_set.push_back(act);
234 // std::cout << act + 1 << " ";
235 }
236 // std::cout << "] become active **\n\n";
237
238 // Step 4
239 // Theoretical step size to move
240 deltatmin = -fp / fpp;
241 // Update interval bound
242 il = iu;
243 b = act_end + 1;
244 // If we have visited all finite-valued break points, and have not exited earlier,
245 // then the next iu will be infinity. Simply exit the loop now
246 if (b >= nord)
247 break;
248 iu = brk[ord[b]];
249 // Width of the current interval
250 deltat = iu - il;
251
252 /* iter++;
253 std::cout << "** Iter " << iter << " **\n";
254 std::cout << " fp = " << fp << ", fpp = " << fpp << ", deltatmin = " << deltatmin << std::endl;
255 std::cout << " il = " << il << ", iu = " << iu << ", deltat = " << deltat << std::endl; */
256 }
257
258 // In some rare cases fpp is numerically zero, making deltatmin equal to Inf
259 // If this happens, force fpp to be the machine precision
260 const Scalar eps = std::numeric_limits<Scalar>::epsilon();
261 if (fpp < eps)
262 deltatmin = -fp / eps;
263
264 // Last step
265 if (!crossed_all)
266 {
267 deltatmin = std::max(deltatmin, Scalar(0));
268 vecc.noalias() += deltatmin * vecp;
269 const Scalar tfinal = il + deltatmin;
270 // Update xcp on free variable coordinates
271 for (int i = 0; i < nfree; i++)
272 {
273 const int coord = fv_set[i];
274 xcp[coord] = x0[coord] + tfinal * vecd[coord];
275 }
276 for (int i = b; i < nord; i++)
277 {
278 const int coord = ord[i];
279 xcp[coord] = x0[coord] + tfinal * vecd[coord];
280 fv_set.push_back(coord);
281 }
282 }
283 // std::cout << "\n========================= Leaving GCP search =========================\n\n";
284 }
285};
286
287} // namespace LBFGSpp
288
290
291#endif // LBFGSPP_CAUCHY_H