LBFGS++
Loading...
Searching...
No Matches
LineSearchNocedalWright.h
1// Copyright (C) 2016-2023 Yixuan Qiu <yixuan.qiu@cos.name>
2// Copyright (C) 2016-2023 Dirk Toewe <DirkToewe@GoogleMail.com>
3// Under MIT license
4
5#ifndef LBFGSPP_LINE_SEARCH_NOCEDAL_WRIGHT_H
6#define LBFGSPP_LINE_SEARCH_NOCEDAL_WRIGHT_H
7
8#include <Eigen/Core>
9#include <stdexcept>
10
11namespace LBFGSpp {
12
20template <typename Scalar>
22{
23private:
24 using Vector = Eigen::Matrix<Scalar, Eigen::Dynamic, 1>;
25
26 // Use {fx_lo, fx_hi, dg_lo} to make a quadratic interpolation of
27 // the function, and the fitted quadratic function is used to
28 // estimate the minimum
29 static Scalar quad_interp(const Scalar& step_lo, const Scalar& step_hi,
30 const Scalar& fx_lo, const Scalar& fx_hi, const Scalar& dg_lo)
31 {
32 using std::abs;
33
34 // polynomial: p (x) = c0*(x - step)² + c1
35 // conditions: p (step_hi) = fx_hi
36 // p (step_lo) = fx_lo
37 // p'(step_lo) = dg_lo
38
39 // We allow fx_hi to be Inf, so first compute a candidate for step size,
40 // and test whether NaN occurs
41 const Scalar fdiff = fx_hi - fx_lo;
42 const Scalar sdiff = step_hi - step_lo;
43 const Scalar smid = (step_hi + step_lo) / Scalar(2);
44 Scalar step_candid = fdiff * step_lo - smid * sdiff * dg_lo;
45 step_candid = step_candid / (fdiff - sdiff * dg_lo);
46
47 // In some cases the interpolation is not a good choice
48 // This includes (a) NaN values; (b) too close to the end points; (c) outside the interval
49 // In such cases, a bisection search is used
50 const bool candid_nan = !(std::isfinite(step_candid));
51 const Scalar end_dist = std::min(abs(step_candid - step_lo), abs(step_candid - step_hi));
52 const bool near_end = end_dist < Scalar(0.01) * abs(sdiff);
53 const bool bisect = candid_nan ||
54 (step_candid <= std::min(step_lo, step_hi)) ||
55 (step_candid >= std::max(step_lo, step_hi)) ||
56 near_end;
57 const Scalar step = bisect ? smid : step_candid;
58 return step;
59 }
60
61public:
83 template <typename Foo>
84 static void LineSearch(Foo& f, const LBFGSParam<Scalar>& param,
85 const Vector& xp, const Vector& drt, const Scalar& step_max,
86 Scalar& step, Scalar& fx, Vector& grad, Scalar& dg, Vector& x)
87 {
88 // Check the value of step
89 if (step <= Scalar(0))
90 throw std::invalid_argument("'step' must be positive");
91
93 throw std::invalid_argument("'param.linesearch' must be 'LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE' for LineSearchNocedalWright");
94
95 // To make this implementation more similar to the other line search
96 // methods in LBFGSpp, the symbol names from the literature
97 // ("Numerical Optimizations") have been changed.
98 //
99 // Literature | LBFGSpp
100 // -----------|--------
101 // alpha | step
102 // phi | fx
103 // phi' | dg
104
105 // The expansion rate of the step size
106 const Scalar expansion = Scalar(2);
107
108 // Save the function value at the current x
109 const Scalar fx_init = fx;
110 // Projection of gradient on the search direction
111 const Scalar dg_init = dg;
112 // Make sure d points to a descent direction
113 if (dg_init > Scalar(0))
114 throw std::logic_error("the moving direction increases the objective function value");
115
116 const Scalar test_decr = param.ftol * dg_init, // Sufficient decrease
117 test_curv = -param.wolfe * dg_init; // Curvature
118
119 // Ends of the line search range (step_lo > step_hi is allowed)
120 // We can also define dg_hi, but it will never be used
121 Scalar step_hi, fx_hi;
122 Scalar step_lo = Scalar(0), fx_lo = fx_init, dg_lo = dg_init;
123 // We also need to save x and grad for step=step_lo, since we want to return the best
124 // step size along the path when strong Wolfe condition is not met
125 Vector x_lo = xp, grad_lo = grad;
126
127 // STEP 1: Bracketing Phase
128 // Find a range guaranteed to contain a step satisfying strong Wolfe.
129 // The bracketing phase exits if one of the following conditions is satisfied:
130 // (1) Current step violates the sufficient decrease condition
131 // (2) Current fx >= previous fx
132 // (3) Current dg >= 0
133 // (4) Strong Wolfe condition is met
134 //
135 // (4) terminates the whole line search, and (1)-(3) go to the zoom phase
136 //
137 // See also:
138 // "Numerical Optimization", "Algorithm 3.5 (Line Search Algorithm)".
139 int iter = 0;
140 for (;;)
141 {
142 // Evaluate the current step size
143 x.noalias() = xp + step * drt;
144 fx = f(x, grad);
145 dg = grad.dot(drt);
146
147 // Test the sufficient decrease condition
148 if (fx - fx_init > step * test_decr || (Scalar(0) < step_lo && fx >= fx_lo))
149 {
150 // Case (1) and (2)
151 step_hi = step;
152 fx_hi = fx;
153 // dg_hi = dg;
154 break;
155 }
156 // If reaching here, then the sufficient decrease condition is satisfied
157
158 // Test the curvature condition
159 if (std::abs(dg) <= test_curv)
160 return; // Case (4)
161
162 step_hi = step_lo;
163 fx_hi = fx_lo;
164 // dg_hi = dg_lo;
165 step_lo = step;
166 fx_lo = fx;
167 dg_lo = dg;
168 // Move x and grad to x_lo and grad_lo, respectively
169 x_lo.swap(x);
170 grad_lo.swap(grad);
171
172 if (dg >= Scalar(0))
173 break; // Case (3)
174
175 iter++;
176 // If we have used up all line search iterations in the bracketing phase,
177 // it means every new step decreases the objective function. Of course,
178 // the strong Wolfe condition is not met, but we choose not to raise an
179 // exception; instead, we return the best step size so far. This means that
180 // we exit the line search with the most recent step size, which has the
181 // smallest objective function value during the line search
182 if (iter >= param.max_linesearch)
183 {
184 // throw std::runtime_error("the line search routine reached the maximum number of iterations");
185
186 // At this point we can guarantee that {step, fx, dg}=={step, fx, dg}_lo
187 // But we need to move {x, grad}_lo back before returning
188 x.swap(x_lo);
189 grad.swap(grad_lo);
190 return;
191 }
192
193 // If we still stay in the loop, it means we can expand the current step
194 step *= expansion;
195 }
196
197 // STEP 2: Zoom Phase
198 // Given a range (step_lo,step_hi) that is guaranteed to
199 // contain a valid strong Wolfe step value, this method
200 // finds such a value.
201 //
202 // If step_lo > 0, then step_lo is, among all step sizes generated so far and
203 // satisfying the sufficient decrease condition, the one giving the smallest
204 // objective function value.
205 //
206 // See also:
207 // "Numerical Optimization", "Algorithm 3.6 (Zoom)".
208 for (;;)
209 {
210 // Use {fx_lo, fx_hi, dg_lo} to make a quadratic interpolation of
211 // the function, and the fitted quadratic function is used to
212 // estimate the minimum
213 step = quad_interp(step_lo, step_hi, fx_lo, fx_hi, dg_lo);
214
215 // Evaluate the current step size
216 x.noalias() = xp + step * drt;
217 fx = f(x, grad);
218 dg = grad.dot(drt);
219
220 // Test the sufficient decrease condition
221 if (fx - fx_init > step * test_decr || fx >= fx_lo)
222 {
223 if (step == step_hi)
224 throw std::runtime_error("the line search routine failed, possibly due to insufficient numeric precision");
225
226 step_hi = step;
227 fx_hi = fx;
228 // dg_hi = dg;
229 }
230 else
231 {
232 // Test the curvature condition
233 if (std::abs(dg) <= test_curv)
234 return;
235
236 if (dg * (step_hi - step_lo) >= Scalar(0))
237 {
238 step_hi = step_lo;
239 fx_hi = fx_lo;
240 // dg_hi = dg_lo;
241 }
242
243 if (step == step_lo)
244 throw std::runtime_error("the line search routine failed, possibly due to insufficient numeric precision");
245
246 // If reaching here, then the current step satisfies sufficient decrease condition
247 step_lo = step;
248 fx_lo = fx;
249 dg_lo = dg;
250 // Move x and grad to x_lo and grad_lo, respectively
251 x_lo.swap(x);
252 grad_lo.swap(grad);
253 }
254
255 iter++;
256 // If we have used up all line search iterations in the zoom phase,
257 // then the strong Wolfe condition is not met. We choose not to raise an
258 // exception (unless no step satisfying sufficient decrease is found),
259 // but to return the best step size so far, i.e., step_lo
260 if (iter >= param.max_linesearch)
261 {
262 // throw std::runtime_error("the line search routine reached the maximum number of iterations");
263 if (step_lo <= Scalar(0))
264 throw std::runtime_error("the line search routine failed, unable to sufficiently decrease the function value");
265
266 // Return everything with _lo
267 step = step_lo;
268 fx = fx_lo;
269 dg = dg_lo;
270 // Move {x, grad}_lo back
271 x.swap(x_lo);
272 grad.swap(grad_lo);
273 return;
274 }
275 }
276 }
277};
278
279} // namespace LBFGSpp
280
281#endif // LBFGSPP_LINE_SEARCH_NOCEDAL_WRIGHT_H
static void LineSearch(Foo &f, const LBFGSParam< Scalar > &param, const Vector &xp, const Vector &drt, const Scalar &step_max, Scalar &step, Scalar &fx, Vector &grad, Scalar &dg, Vector &x)
@ LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE
Definition: Param.h:61