4#ifndef LBFGSPP_BFGS_MAT_H
5#define LBFGSPP_BFGS_MAT_H
26template <
typename Scalar,
bool LBFGSB = false>
30 using Vector = Eigen::Matrix<Scalar, Eigen::Dynamic, 1>;
31 using Matrix = Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic>;
32 using RefConstVec = Eigen::Ref<const Vector>;
33 using IndexSet = std::vector<int>;
52 BKLDLT<Scalar> m_permMsolver;
61 inline void reset(
int n,
int m)
74 m_permMinv.resize(2 * m, 2 * m);
76 m_permMinv.diagonal().setOnes();
81 inline void add_correction(
const RefConstVec& s,
const RefConstVec& y)
83 const int loc = m_ptr % m_m;
85 m_s.col(loc).noalias() = s;
86 m_y.col(loc).noalias() = y;
89 const Scalar ys = m_s.col(loc).dot(m_y.col(loc));
92 m_theta = m_y.col(loc).squaredNorm() / ys;
107 m_permMinv(loc, loc) = -ys;
111 Vector Ss = m_s.leftCols(m_ncorr).transpose() * m_s.col(loc);
112 m_permMinv.block(m_m + loc, m_m, 1, m_ncorr).noalias() = Ss.transpose();
113 m_permMinv.block(m_m, m_m + loc, m_ncorr, 1).noalias() = Ss;
127 const int len = m_ncorr - 1;
130 m_permMinv.block(m_m, loc, m_m, 1).setZero();
135 int yloc = (loc + m_m - 1) % m_m;
136 for (
int i = 0; i < len; i++)
138 m_permMinv(m_m + loc, yloc) = m_s.col(loc).dot(m_y.col(yloc));
139 yloc = (yloc + m_m - 1) % m_m;
143 m_permMinv.block(m_m, m_m, m_m, m_m) *= m_theta;
144 m_permMsolver.compute(m_permMinv);
145 m_permMinv.block(m_m, m_m, m_m, m_m) /= m_theta;
150 inline Matrix get_Bmat()
const
153 const int n = m_s.rows();
154 Matrix B = m_theta * Matrix::Identity(n, n);
163 Matrix W(n, 2 * m_ncorr);
166 int j = m_ptr % m_ncorr;
167 for (
int i = 0; i < m_ncorr; i++)
169 W.col(i).noalias() = m_y.col(j);
170 W.col(m_ncorr + i).noalias() = m_s.col(j);
179 Matrix Minv(2 * m_ncorr, 2 * m_ncorr);
180 Minv.topLeftCorner(m_ncorr, m_ncorr).setZero();
181 Vector ys = W.leftCols(m_ncorr).cwiseProduct(W.rightCols(m_ncorr)).colwise().sum().transpose();
182 Minv.diagonal().head(m_ncorr).noalias() = -ys;
188 Minv.bottomLeftCorner(m_ncorr, m_ncorr).setZero();
189 for (
int i = 0; i < m_ncorr - 1; i++)
192 const int nterm = m_ncorr - i - 1;
194 Minv.col(i).tail(nterm).noalias() = W.rightCols(nterm).transpose() * W.col(i);
197 Minv.topRightCorner(m_ncorr, m_ncorr).noalias() = Minv.bottomLeftCorner(m_ncorr, m_ncorr).transpose();
199 Minv.bottomRightCorner(m_ncorr, m_ncorr).noalias() = m_theta * W.rightCols(m_ncorr).transpose() * W.rightCols(m_ncorr);
202 W.rightCols(m_ncorr).array() *= m_theta;
205 Eigen::PartialPivLU<Matrix> M_solver(Minv);
206 B.noalias() -= W * M_solver.solve(W.transpose());
211 inline Matrix get_Hmat()
const
214 const int n = m_s.rows();
215 Matrix H = (Scalar(1) / m_theta) * Matrix::Identity(n, n);
224 Matrix W(n, 2 * m_ncorr);
227 int j = m_ptr % m_ncorr;
228 for (
int i = 0; i < m_ncorr; i++)
230 W.col(i).noalias() = m_y.col(j);
231 W.col(m_ncorr + i).noalias() = m_s.col(j);
239 Matrix M(2 * m_ncorr, 2 * m_ncorr);
245 for (
int i = 0; i < m_ncorr; i++)
247 M.col(i).head(i + 1).noalias() = W.middleCols(m_ncorr, i + 1).transpose() * W.col(i);
250 Matrix Rinv = M.topLeftCorner(m_ncorr, m_ncorr).template triangularView<Eigen::Upper>().solve(Matrix::Identity(m_ncorr, m_ncorr));
252 M.topLeftCorner(m_ncorr, m_ncorr).setZero();
254 M.topRightCorner(m_ncorr, m_ncorr).noalias() = -Rinv;
256 M.bottomLeftCorner(m_ncorr, m_ncorr).noalias() = -Rinv.transpose();
258 Matrix block = (Scalar(1) / m_theta) * W.leftCols(m_ncorr).transpose() * W.leftCols(m_ncorr);
260 Vector ys = W.leftCols(m_ncorr).cwiseProduct(W.rightCols(m_ncorr)).colwise().sum().transpose();
261 block.diagonal().array() += ys.array();
263 M.bottomRightCorner(m_ncorr, m_ncorr).noalias() = Rinv.transpose() * block * Rinv;
266 W.leftCols(m_ncorr).array() *= (Scalar(1) / m_theta);
269 H.noalias() += W * M * W.transpose();
276 inline void apply_Hv(
const Vector& v,
const Scalar& a, Vector& res)
278 res.resize(v.size());
283 res.noalias() = a * v;
285 for (
int i = 0; i < m_ncorr; i++)
287 j = (j + m_m - 1) % m_m;
288 m_alpha[j] = m_s.col(j).dot(res) / m_ys[j];
289 res.noalias() -= m_alpha[j] * m_y.col(j);
296 for (
int i = 0; i < m_ncorr; i++)
298 const Scalar beta = m_y.col(j).dot(res) / m_ys[j];
299 res.noalias() += (m_alpha[j] - beta) * m_s.col(j);
307 inline Scalar theta()
const {
return m_theta; }
310 inline int num_corrections()
const {
return m_ncorr; }
315 inline void apply_Wtv(
const Vector& v, Vector& res)
const
317 res.resize(2 * m_ncorr);
318 res.head(m_ncorr).noalias() = m_y.leftCols(m_ncorr).transpose() * v;
319 res.tail(m_ncorr).noalias() = m_theta * m_s.leftCols(m_ncorr).transpose() * v;
325 inline Vector Wb(
int b)
const
327 Vector res(2 * m_ncorr);
328 for (
int j = 0; j < m_ncorr; j++)
331 res[m_ncorr + j] = m_s(b, j);
333 res.tail(m_ncorr) *= m_theta;
338 inline Matrix Wb(
const IndexSet& b)
const
340 const int nb = b.size();
341 const int* bptr = b.data();
342 Matrix res(nb, 2 * m_ncorr);
344 for (
int j = 0; j < m_ncorr; j++)
346 const Scalar* Yptr = &m_y(0, j);
347 const Scalar* Sptr = &m_s(0, j);
348 Scalar* resYptr = res.data() + j * nb;
349 Scalar* resSptr = resYptr + m_ncorr * nb;
350 for (
int i = 0; i < nb; i++)
352 const int row = bptr[i];
353 resYptr[i] = Yptr[row];
354 resSptr[i] = Sptr[row];
361 inline void apply_Mv(
const Vector& v, Vector& res)
const
363 res.resize(2 * m_ncorr);
367 Vector vpadding = Vector::Zero(2 * m_m);
368 vpadding.head(m_ncorr).noalias() = v.head(m_ncorr);
369 vpadding.segment(m_m, m_ncorr).noalias() = v.tail(m_ncorr);
372 m_permMsolver.solve_inplace(vpadding);
374 res.head(m_ncorr).noalias() = vpadding.head(m_ncorr);
375 res.tail(m_ncorr).noalias() = vpadding.segment(m_m, m_ncorr);
382 inline bool apply_WtPv(
const IndexSet& P_set,
const Vector& v, Vector& res,
bool test_zero =
false)
const
384 const int* Pptr = P_set.data();
385 const Scalar* vptr = v.data();
386 int nP = P_set.size();
390 std::vector<Scalar> v_reduced;
393 P_reduced.reserve(nP);
394 for (
int i = 0; i < nP; i++)
396 if (vptr[i] != Scalar(0))
398 P_reduced.push_back(Pptr[i]);
399 v_reduced.push_back(vptr[i]);
402 Pptr = P_reduced.data();
403 vptr = v_reduced.data();
404 nP = P_reduced.size();
407 res.resize(2 * m_ncorr);
408 if (m_ncorr < 1 || nP < 1)
414 for (
int j = 0; j < m_ncorr; j++)
416 Scalar resy = Scalar(0), ress = Scalar(0);
417 const Scalar* yptr = &m_y(0, j);
418 const Scalar* sptr = &m_s(0, j);
419 for (
int i = 0; i < nP; i++)
421 const int row = Pptr[i];
422 resy += yptr[row] * vptr[i];
423 ress += sptr[row] * vptr[i];
426 res[m_ncorr + j] = ress;
428 res.tail(m_ncorr) *= m_theta;
435 inline bool apply_PtWMv(
const IndexSet& P_set,
const Vector& v, Vector& res,
const Scalar& scale)
const
437 const int nP = P_set.size();
440 if (m_ncorr < 1 || nP < 1)
446 Mv.tail(m_ncorr) *= m_theta;
447 for (
int j = 0; j < m_ncorr; j++)
449 const Scalar* yptr = &m_y(0, j);
450 const Scalar* sptr = &m_s(0, j);
451 const Scalar Mvy = Mv[j], Mvs = Mv[m_ncorr + j];
452 for (
int i = 0; i < nP; i++)
454 const int row = P_set[i];
455 res[i] += Mvy * yptr[row] + Mvs * sptr[row];
462 inline bool apply_PtWMv(
const Matrix& WP,
const Vector& v, Vector& res,
const Scalar& scale)
const
464 const int nP = WP.rows();
466 if (m_ncorr < 1 || nP < 1)
475 Mv.tail(m_ncorr) *= m_theta;
476 res.noalias() = scale * (WP * Mv);
486 inline void compute_FtBAb(
487 const Matrix& WF,
const IndexSet& fv_set,
const IndexSet& newact_set,
const Vector& Wd,
const Vector& drt,
490 const int nact = newact_set.size();
491 const int nfree = WF.rows();
493 if (m_ncorr < 1 || nact < 1 || nfree < 1)
500 Vector rhs(2 * m_ncorr);
505 for (
int i = 0; i < nact; i++)
506 Ad[i] = drt[newact_set[i]];
507 apply_WtPv(newact_set, Ad, rhs);
513 for (
int i = 0; i < nfree; i++)
514 Fd[i] = drt[fv_set[i]];
516 rhs.noalias() = WF.transpose() * Fd;
517 rhs.tail(m_ncorr) *= m_theta;
518 rhs.noalias() = Wd - rhs;
521 apply_PtWMv(WF, rhs, res, Scalar(-1));
529 inline void solve_PtBP(
const Matrix& WP,
const Vector& v, Vector& res)
const
531 const int nP = WP.rows();
533 if (m_ncorr < 1 || nP < 1)
535 res.noalias() = v / m_theta;
541 Matrix mid(2 * m_ncorr, 2 * m_ncorr);
543 for (
int j = 0; j < m_ncorr; j++)
545 mid.col(j).segment(j, m_ncorr - j).noalias() = m_permMinv.col(j).segment(j, m_ncorr - j) -
546 WP.block(0, j, nP, m_ncorr - j).transpose() * WP.col(j) / m_theta;
549 mid.block(m_ncorr, 0, m_ncorr, m_ncorr).noalias() = m_permMinv.block(m_m, 0, m_ncorr, m_ncorr) -
550 WP.rightCols(m_ncorr).transpose() * WP.leftCols(m_ncorr);
552 for (
int j = 0; j < m_ncorr; j++)
554 mid.col(m_ncorr + j).segment(m_ncorr + j, m_ncorr - j).noalias() = m_theta *
555 (m_permMinv.col(m_m + j).segment(m_m + j, m_ncorr - j) - WP.rightCols(m_ncorr - j).transpose() * WP.col(m_ncorr + j));
558 BKLDLT<Scalar> midsolver(mid);
560 Vector WPv = WP.transpose() * v;
561 WPv.tail(m_ncorr) *= m_theta;
562 midsolver.solve_inplace(WPv);
563 WPv.tail(m_ncorr) *= m_theta;
564 res.noalias() = v / m_theta + (WP * WPv) / (m_theta * m_theta);
570 inline bool apply_PtBQv(
const Matrix& WP,
const IndexSet& Q_set,
const Vector& v, Vector& res,
bool test_zero =
false)
const
572 const int nP = WP.rows();
573 const int nQ = Q_set.size();
575 if (m_ncorr < 1 || nP < 1 || nQ < 1)
582 bool nonzero = apply_WtPv(Q_set, v, WQtv, test_zero);
590 apply_Mv(WQtv, MWQtv);
591 MWQtv.tail(m_ncorr) *= m_theta;
592 res.noalias() = -WP * MWQtv;
596 inline bool apply_PtBQv(
const Matrix& WP,
const Matrix& WQ,
const Vector& v, Vector& res)
const
598 const int nP = WP.rows();
599 const int nQ = WQ.rows();
601 if (m_ncorr < 1 || nP < 1 || nQ < 1)
608 Vector WQtv = WQ.transpose() * v;
609 WQtv.tail(m_ncorr) *= m_theta;
611 apply_Mv(WQtv, MWQtv);
612 MWQtv.tail(m_ncorr) *= m_theta;
613 res.noalias() = -WP * MWQtv;