LBFGS++ 0.4.0
A header-only C++ library for L-BFGS and L-BFGS-B algorithms.
Loading...
Searching...
No Matches
LineSearchNocedalWright.h
1// Copyright (C) 2016-2026 Yixuan Qiu <yixuan.qiu@cos.name>
2// Copyright (C) 2016-2026 Dirk Toewe <DirkToewe@GoogleMail.com>
3// Under MIT license
4
5#ifndef LBFGSPP_LINE_SEARCH_NOCEDAL_WRIGHT_H
6#define LBFGSPP_LINE_SEARCH_NOCEDAL_WRIGHT_H
7
8#include <Eigen/Core>
9#include <stdexcept>
10#include "Param.h"
11
12namespace LBFGSpp {
13
21template <typename Scalar>
23{
24private:
25 using Vector = Eigen::Matrix<Scalar, Eigen::Dynamic, 1>;
26
27 // Use {fx_lo, fx_hi, dg_lo} to make a quadratic interpolation of
28 // the function, and the fitted quadratic function is used to
29 // estimate the minimum
30 static Scalar quad_interp(const Scalar& step_lo, const Scalar& step_hi,
31 const Scalar& fx_lo, const Scalar& fx_hi, const Scalar& dg_lo)
32 {
33 using std::abs;
34
35 // polynomial: p (x) = c0*(x - step)² + c1
36 // conditions: p (step_hi) = fx_hi
37 // p (step_lo) = fx_lo
38 // p'(step_lo) = dg_lo
39
40 // We allow fx_hi to be Inf, so first compute a candidate for step size,
41 // and test whether NaN occurs
42 const Scalar fdiff = fx_hi - fx_lo;
43 const Scalar sdiff = step_hi - step_lo;
44 const Scalar smid = (step_hi + step_lo) / Scalar(2);
45 Scalar step_candid = fdiff * step_lo - smid * sdiff * dg_lo;
46 step_candid = step_candid / (fdiff - sdiff * dg_lo);
47
48 // In some cases the interpolation is not a good choice
49 // This includes (a) NaN values; (b) too close to the end points; (c) outside the interval
50 // In such cases, a bisection search is used
51 const bool candid_nan = !(std::isfinite(step_candid));
52 const Scalar end_dist = std::min(abs(step_candid - step_lo), abs(step_candid - step_hi));
53 const bool near_end = end_dist < Scalar(0.01) * abs(sdiff);
54 const bool bisect = candid_nan ||
55 (step_candid <= std::min(step_lo, step_hi)) ||
56 (step_candid >= std::max(step_lo, step_hi)) ||
57 near_end;
58 const Scalar step = bisect ? smid : step_candid;
59 return step;
60 }
61
62public:
84 template <typename Foo>
85 static void LineSearch(Foo& f, const LBFGSParam<Scalar>& param,
86 const Vector& xp, const Vector& drt, const Scalar& step_max,
87 Scalar& step, Scalar& fx, Vector& grad, Scalar& dg, Vector& x)
88 {
89 using std::abs;
90
91 // Check the value of step
92 if (step <= Scalar(0))
93 throw std::invalid_argument("'step' must be positive");
94
96 throw std::invalid_argument("'param.linesearch' must be 'LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE' for LineSearchNocedalWright");
97
98 // To make this implementation more similar to the other line search
99 // methods in LBFGSpp, the symbol names from the literature
100 // ("Numerical Optimizations") have been changed.
101 //
102 // Literature | LBFGSpp
103 // -----------|--------
104 // alpha | step
105 // phi | fx
106 // phi' | dg
107
108 // The expansion rate of the step size
109 const Scalar expansion = Scalar(2);
110
111 // Save the function value at the current x
112 const Scalar fx_init = fx;
113 // Projection of gradient on the search direction
114 const Scalar dg_init = dg;
115 // Make sure d points to a descent direction
116 if (dg_init > Scalar(0))
117 throw std::logic_error("the moving direction increases the objective function value");
118
119 const Scalar test_decr = param.ftol * dg_init, // Sufficient decrease
120 test_curv = -param.wolfe * dg_init; // Curvature
121
122 // Ends of the line search range (step_lo > step_hi is allowed)
123 // We can also define dg_hi, but it will never be used
124 Scalar step_hi, fx_hi;
125 Scalar step_lo = Scalar(0), fx_lo = fx_init, dg_lo = dg_init;
126 // We also need to save x and grad for step=step_lo, since we want to return the best
127 // step size along the path when strong Wolfe condition is not met
128 Vector x_lo = xp, grad_lo = grad;
129
130 // STEP 1: Bracketing Phase
131 // Find a range guaranteed to contain a step satisfying strong Wolfe.
132 // The bracketing phase exits if one of the following conditions is satisfied:
133 // (1) Current step violates the sufficient decrease condition
134 // (2) Current fx >= previous fx
135 // (3) Current dg >= 0
136 // (4) Strong Wolfe condition is met
137 //
138 // (4) terminates the whole line search, and (1)-(3) go to the zoom phase
139 //
140 // See also:
141 // "Numerical Optimization", "Algorithm 3.5 (Line Search Algorithm)".
142 int iter = 0;
143 for (;;)
144 {
145 // Evaluate the current step size
146 x.noalias() = xp + step * drt;
147 fx = f(x, grad);
148 dg = grad.dot(drt);
149
150 // Test the sufficient decrease condition
151 if (fx - fx_init > step * test_decr || (Scalar(0) < step_lo && fx >= fx_lo))
152 {
153 // Case (1) and (2)
154 step_hi = step;
155 fx_hi = fx;
156 // dg_hi = dg;
157 break;
158 }
159 // If reaching here, then the sufficient decrease condition is satisfied
160
161 // Test the curvature condition
162 if (abs(dg) <= test_curv)
163 return; // Case (4)
164
165 step_hi = step_lo;
166 fx_hi = fx_lo;
167 // dg_hi = dg_lo;
168 step_lo = step;
169 fx_lo = fx;
170 dg_lo = dg;
171 // Move x and grad to x_lo and grad_lo, respectively
172 x_lo.swap(x);
173 grad_lo.swap(grad);
174
175 if (dg >= Scalar(0))
176 break; // Case (3)
177
178 iter++;
179 // If we have used up all line search iterations in the bracketing phase,
180 // it means every new step decreases the objective function. Of course,
181 // the strong Wolfe condition is not met, but we choose not to raise an
182 // exception; instead, we return the best step size so far. This means that
183 // we exit the line search with the most recent step size, which has the
184 // smallest objective function value during the line search
185 if (iter >= param.max_linesearch)
186 {
187 // throw std::runtime_error("the line search routine reached the maximum number of iterations");
188
189 // At this point we can guarantee that {step, fx, dg}=={step, fx, dg}_lo
190 // But we need to move {x, grad}_lo back before returning
191 x.swap(x_lo);
192 grad.swap(grad_lo);
193 return;
194 }
195
196 // If we still stay in the loop, it means we can expand the current step
197 step *= expansion;
198 }
199
200 // STEP 2: Zoom Phase
201 // Given a range (step_lo,step_hi) that is guaranteed to
202 // contain a valid strong Wolfe step value, this method
203 // finds such a value.
204 //
205 // If step_lo > 0, then step_lo is, among all step sizes generated so far and
206 // satisfying the sufficient decrease condition, the one giving the smallest
207 // objective function value.
208 //
209 // See also:
210 // "Numerical Optimization", "Algorithm 3.6 (Zoom)".
211 for (;;)
212 {
213 // Use {fx_lo, fx_hi, dg_lo} to make a quadratic interpolation of
214 // the function, and the fitted quadratic function is used to
215 // estimate the minimum
216 step = quad_interp(step_lo, step_hi, fx_lo, fx_hi, dg_lo);
217
218 // Evaluate the current step size
219 x.noalias() = xp + step * drt;
220 fx = f(x, grad);
221 dg = grad.dot(drt);
222
223 // Test the sufficient decrease condition
224 if (fx - fx_init > step * test_decr || fx >= fx_lo)
225 {
226 if (step == step_hi)
227 throw std::runtime_error("the line search routine failed, possibly due to insufficient numeric precision");
228
229 step_hi = step;
230 fx_hi = fx;
231 // dg_hi = dg;
232 }
233 else
234 {
235 // Test the curvature condition
236 if (abs(dg) <= test_curv)
237 return;
238
239 if (dg * (step_hi - step_lo) >= Scalar(0))
240 {
241 step_hi = step_lo;
242 fx_hi = fx_lo;
243 // dg_hi = dg_lo;
244 }
245
246 if (step == step_lo)
247 throw std::runtime_error("the line search routine failed, possibly due to insufficient numeric precision");
248
249 // If reaching here, then the current step satisfies sufficient decrease condition
250 step_lo = step;
251 fx_lo = fx;
252 dg_lo = dg;
253 // Move x and grad to x_lo and grad_lo, respectively
254 x_lo.swap(x);
255 grad_lo.swap(grad);
256 }
257
258 iter++;
259 // If we have used up all line search iterations in the zoom phase,
260 // then the strong Wolfe condition is not met. We choose not to raise an
261 // exception (unless no step satisfying sufficient decrease is found),
262 // but to return the best step size so far, i.e., step_lo
263 if (iter >= param.max_linesearch)
264 {
265 // throw std::runtime_error("the line search routine reached the maximum number of iterations");
266 if (step_lo <= Scalar(0))
267 throw std::runtime_error("the line search routine failed, unable to sufficiently decrease the function value");
268
269 // Return everything with _lo
270 step = step_lo;
271 fx = fx_lo;
272 dg = dg_lo;
273 // Move {x, grad}_lo back
274 x.swap(x_lo);
275 grad.swap(grad_lo);
276 return;
277 }
278 }
279 }
280};
281
282} // namespace LBFGSpp
283
284#endif // LBFGSPP_LINE_SEARCH_NOCEDAL_WRIGHT_H
static void LineSearch(Foo &f, const LBFGSParam< Scalar > &param, const Vector &xp, const Vector &drt, const Scalar &step_max, Scalar &step, Scalar &fx, Vector &grad, Scalar &dg, Vector &x)
@ LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE
Definition Param.h:61