doc/BKLDLT_8h_source.html

// Copyright (C) 2020-2023 Yixuan Qiu <yixuan.qiu@cos.name>

// Under MIT license


#ifndef LBFGSPP_BK_LDLT_H

#define LBFGSPP_BK_LDLT_H


#include <vector>

#include <stdexcept>

#include <Eigen/Core>


namespace LBFGSpp {


enum COMPUTATION_INFO

{

    SUCCESSFUL = 0,

    NOT_COMPUTED,

    NUMERICAL_ISSUE

};


// Bunch-Kaufman LDLT decomposition

// References:

// 1. Bunch, J. R., & Kaufman, L. (1977). Some stable methods for calculating inertia and solving symmetric linear systems.

//    Mathematics of computation, 31(137), 163-179.

// 2. Golub, G. H., & Van Loan, C. F. (2012). Matrix computations (Vol. 3). JHU press. Section 4.4.

// 3. Bunch-Parlett diagonal pivoting <http://oz.nthu.edu.tw/~d947207/Chap13_GE3.ppt>

// 4. Ashcraft, C., Grimes, R. G., & Lewis, J. G. (1998). Accurate symmetric indefinite linear equation solvers.

//    SIAM Journal on Matrix Analysis and Applications, 20(2), 513-561.

template <typename Scalar = double>

class BKLDLT

{

private:

    using Index = Eigen::Index;

    using Matrix = Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic>;

    using Vector = Eigen::Matrix<Scalar, Eigen::Dynamic, 1>;

    using MapVec = Eigen::Map<Vector>;

    using MapConstVec = Eigen::Map<const Vector>;


    using IntVector = Eigen::Matrix<Index, Eigen::Dynamic, 1>;

    using GenericVector = Eigen::Ref<Vector>;

    using GenericMatrix = Eigen::Ref<Matrix>;

    using ConstGenericMatrix = const Eigen::Ref<const Matrix>;

    using ConstGenericVector = const Eigen::Ref<const Vector>;


    Index m_n;

    Vector m_data;                                  // storage for a lower-triangular matrix

    std::vector<Scalar*> m_colptr;                  // pointers to columns

    IntVector m_perm;                               // [-2, -1, 3, 1, 4, 5]: 0 <-> 2, 1 <-> 1, 2 <-> 3, 3 <-> 1, 4 <-> 4, 5 <-> 5

    std::vector<std::pair<Index, Index> > m_permc;  // compressed version of m_perm: [(0, 2), (2, 3), (3, 1)]


    bool m_computed;

    int m_info;


    // Access to elements

    // Pointer to the k-th column

    Scalar* col_pointer(Index k) { return m_colptr[k]; }

    // A[i, j] -> m_colptr[j][i - j], i >= j

    Scalar& coeff(Index i, Index j) { return m_colptr[j][i - j]; }

    const Scalar& coeff(Index i, Index j) const { return m_colptr[j][i - j]; }

    // A[i, i] -> m_colptr[i][0]

    Scalar& diag_coeff(Index i) { return m_colptr[i][0]; }

    const Scalar& diag_coeff(Index i) const { return m_colptr[i][0]; }


    // Compute column pointers

    void compute_pointer()

    {

        m_colptr.clear();

        m_colptr.reserve(m_n);

        Scalar* head = m_data.data();


        for (Index i = 0; i < m_n; i++)

        {

            m_colptr.push_back(head);

            head += (m_n - i);

        }

    }


    // Copy mat - shift * I to m_data

    void copy_data(ConstGenericMatrix& mat, int uplo, const Scalar& shift)

    {

        if (uplo == Eigen::Lower)

        {

            for (Index j = 0; j < m_n; j++)

            {

                const Scalar* begin = &mat.coeffRef(j, j);

                const Index len = m_n - j;

                std::copy(begin, begin + len, col_pointer(j));

                diag_coeff(j) -= shift;

            }

        }

        else

        {

            Scalar* dest = m_data.data();

            for (Index i = 0; i < m_n; i++)

            {

                for (Index j = i; j < m_n; j++, dest++)

                {

                    *dest = mat.coeff(i, j);

                }

                diag_coeff(i) -= shift;

            }

        }

    }


    // Compute compressed permutations

    void compress_permutation()

    {

        for (Index i = 0; i < m_n; i++)

        {

            // Recover the permutation action

            const Index perm = (m_perm[i] >= 0) ? (m_perm[i]) : (-m_perm[i] - 1);

            if (perm != i)

                m_permc.push_back(std::make_pair(i, perm));

        }

    }


    // Working on the A[k:end, k:end] submatrix

    // Exchange k <-> r

    // Assume r >= k

    void pivoting_1x1(Index k, Index r)

    {

        // No permutation

        if (k == r)

        {

            m_perm[k] = r;

            return;

        }


        // A[k, k] <-> A[r, r]

        std::swap(diag_coeff(k), diag_coeff(r));


        // A[(r+1):end, k] <-> A[(r+1):end, r]

        std::swap_ranges(&coeff(r + 1, k), col_pointer(k + 1), &coeff(r + 1, r));


        // A[(k+1):(r-1), k] <-> A[r, (k+1):(r-1)]

        Scalar* src = &coeff(k + 1, k);

        for (Index j = k + 1; j < r; j++, src++)

        {

            std::swap(*src, coeff(r, j));

        }


        m_perm[k] = r;

    }


    // Working on the A[k:end, k:end] submatrix

    // Exchange [k+1, k] <-> [r, p]

    // Assume p >= k, r >= k+1

    void pivoting_2x2(Index k, Index r, Index p)

    {

        pivoting_1x1(k, p);

        pivoting_1x1(k + 1, r);


        // A[k+1, k] <-> A[r, k]

        std::swap(coeff(k + 1, k), coeff(r, k));


        // Use negative signs to indicate a 2x2 block

        // Also minus one to distinguish a negative zero from a positive zero

        m_perm[k] = -m_perm[k] - 1;

        m_perm[k + 1] = -m_perm[k + 1] - 1;

    }


    // A[r1, c1:c2] <-> A[r2, c1:c2]

    // Assume r2 >= r1 > c2 >= c1

    void interchange_rows(Index r1, Index r2, Index c1, Index c2)

    {

        if (r1 == r2)

            return;


        for (Index j = c1; j <= c2; j++)

        {

            std::swap(coeff(r1, j), coeff(r2, j));

        }

    }


    // lambda = |A[r, k]| = max{|A[k+1, k]|, ..., |A[end, k]|}

    // Largest (in magnitude) off-diagonal element in the first column of the current reduced matrix

    // r is the row index

    // Assume k < end

    Scalar find_lambda(Index k, Index& r)

    {

        using std::abs;


        const Scalar* head = col_pointer(k);  // => A[k, k]

        const Scalar* end = col_pointer(k + 1);

        // Start with r=k+1, lambda=A[k+1, k]

        r = k + 1;

        Scalar lambda = abs(head[1]);

        // Scan remaining elements

        for (const Scalar* ptr = head + 2; ptr < end; ptr++)

        {

            const Scalar abs_elem = abs(*ptr);

            if (lambda < abs_elem)

            {

                lambda = abs_elem;

                r = k + (ptr - head);

            }

        }


        return lambda;

    }


    // sigma = |A[p, r]| = max {|A[k, r]|, ..., |A[end, r]|} \ {A[r, r]}

    // Largest (in magnitude) off-diagonal element in the r-th column of the current reduced matrix

    // p is the row index

    // Assume k < r < end

    Scalar find_sigma(Index k, Index r, Index& p)

    {

        using std::abs;


        // First search A[r+1, r], ...,  A[end, r], which has the same task as find_lambda()

        // If r == end, we skip this search

        Scalar sigma = Scalar(-1);

        if (r < m_n - 1)

            sigma = find_lambda(r, p);


        // Then search A[k, r], ..., A[r-1, r], which maps to A[r, k], ..., A[r, r-1]

        for (Index j = k; j < r; j++)

        {

            const Scalar abs_elem = abs(coeff(r, j));

            if (sigma < abs_elem)

            {

                sigma = abs_elem;

                p = j;

            }

        }


        return sigma;

    }


    // Generate permutations and apply to A

    // Return true if the resulting pivoting is 1x1, and false if 2x2

    bool permutate_mat(Index k, const Scalar& alpha)

    {

        using std::abs;


        Index r = k, p = k;

        const Scalar lambda = find_lambda(k, r);


        // If lambda=0, no need to interchange

        if (lambda > Scalar(0))

        {

            const Scalar abs_akk = abs(diag_coeff(k));

            // If |A[k, k]| >= alpha * lambda, no need to interchange

            if (abs_akk < alpha * lambda)

            {

                const Scalar sigma = find_sigma(k, r, p);


                // If sigma * |A[k, k]| >= alpha * lambda^2, no need to interchange

                if (sigma * abs_akk < alpha * lambda * lambda)

                {

                    if (abs_akk >= alpha * sigma)

                    {

                        // Permutation on A

                        pivoting_1x1(k, r);


                        // Permutation on L

                        interchange_rows(k, r, 0, k - 1);

                        return true;

                    }

                    else

                    {

                        // There are two versions of permutation here

                        // 1. A[k+1, k] <-> A[r, k]

                        // 2. A[k+1, k] <-> A[r, p], where p >= k and r >= k+1

                        //

                        // Version 1 and 2 are used by Ref[1] and Ref[2], respectively


                        // Version 1 implementation

                        p = k;


                        // Version 2 implementation

                        // [r, p] and [p, r] are symmetric, but we need to make sure

                        // p >= k and r >= k+1, so it is safe to always make r > p

                        // One exception is when min{r,p} == k+1, in which case we make

                        // r = k+1, so that only one permutation needs to be performed

                        /* const Index rp_min = std::min(r, p);

                        const Index rp_max = std::max(r, p);

                        if(rp_min == k + 1)

                        {

                            r = rp_min; p = rp_max;

                        } else {

                            r = rp_max; p = rp_min;

                        } */


                        // Right now we use Version 1 since it reduces the overhead of interchange


                        // Permutation on A

                        pivoting_2x2(k, r, p);

                        // Permutation on L

                        interchange_rows(k, p, 0, k - 1);

                        interchange_rows(k + 1, r, 0, k - 1);

                        return false;

                    }

                }

            }

        }


        return true;

    }


    // E = [e11, e12]

    //     [e21, e22]

    // Overwrite E with inv(E)

    void inverse_inplace_2x2(Scalar& e11, Scalar& e21, Scalar& e22) const

    {

        // inv(E) = [d11, d12], d11 = e22/delta, d21 = -e21/delta, d22 = e11/delta

        //          [d21, d22]

        const Scalar delta = e11 * e22 - e21 * e21;

        std::swap(e11, e22);

        e11 /= delta;

        e22 /= delta;

        e21 = -e21 / delta;

    }


    // Return value is the status, SUCCESSFUL/NUMERICAL_ISSUE

    int gaussian_elimination_1x1(Index k)

    {

        // D = 1 / A[k, k]

        const Scalar akk = diag_coeff(k);

        // Return NUMERICAL_ISSUE if not invertible

        if (akk == Scalar(0))

            return NUMERICAL_ISSUE;


        diag_coeff(k) = Scalar(1) / akk;


        // B -= l * l' / A[k, k], B := A[(k+1):end, (k+1):end], l := L[(k+1):end, k]

        Scalar* lptr = col_pointer(k) + 1;

        const Index ldim = m_n - k - 1;

        MapVec l(lptr, ldim);

        for (Index j = 0; j < ldim; j++)

        {

            MapVec(col_pointer(j + k + 1), ldim - j).noalias() -= (lptr[j] / akk) * l.tail(ldim - j);

        }


        // l /= A[k, k]

        l /= akk;


        return SUCCESSFUL;

    }


    // Return value is the status, SUCCESSFUL/NUMERICAL_ISSUE

    int gaussian_elimination_2x2(Index k)

    {

        // D = inv(E)

        Scalar& e11 = diag_coeff(k);

        Scalar& e21 = coeff(k + 1, k);

        Scalar& e22 = diag_coeff(k + 1);

        // Return NUMERICAL_ISSUE if not invertible

        if (e11 * e22 - e21 * e21 == Scalar(0))

            return NUMERICAL_ISSUE;


        inverse_inplace_2x2(e11, e21, e22);


        // X = l * inv(E), l := L[(k+2):end, k:(k+1)]

        Scalar* l1ptr = &coeff(k + 2, k);

        Scalar* l2ptr = &coeff(k + 2, k + 1);

        const Index ldim = m_n - k - 2;

        MapVec l1(l1ptr, ldim), l2(l2ptr, ldim);


        Eigen::Matrix<Scalar, Eigen::Dynamic, 2> X(ldim, 2);

        X.col(0).noalias() = l1 * e11 + l2 * e21;

        X.col(1).noalias() = l1 * e21 + l2 * e22;


        // B -= l * inv(E) * l' = X * l', B = A[(k+2):end, (k+2):end]

        for (Index j = 0; j < ldim; j++)

        {

            MapVec(col_pointer(j + k + 2), ldim - j).noalias() -= (X.col(0).tail(ldim - j) * l1ptr[j] + X.col(1).tail(ldim - j) * l2ptr[j]);

        }


        // l = X

        l1.noalias() = X.col(0);

        l2.noalias() = X.col(1);


        return SUCCESSFUL;

    }


public:

    BKLDLT() :

        m_n(0), m_computed(false), m_info(NOT_COMPUTED)

    {}


    // Factorize mat - shift * I

    BKLDLT(ConstGenericMatrix& mat, int uplo = Eigen::Lower, const Scalar& shift = Scalar(0)) :

        m_n(mat.rows()), m_computed(false), m_info(NOT_COMPUTED)

    {

        compute(mat, uplo, shift);

    }


    void compute(ConstGenericMatrix& mat, int uplo = Eigen::Lower, const Scalar& shift = Scalar(0))

    {

        using std::abs;


        m_n = mat.rows();

        if (m_n != mat.cols())

            throw std::invalid_argument("BKLDLT: matrix must be square");


        m_perm.setLinSpaced(m_n, 0, m_n - 1);

        m_permc.clear();


        // Copy data

        m_data.resize((m_n * (m_n + 1)) / 2);

        compute_pointer();

        copy_data(mat, uplo, shift);


        const Scalar alpha = (1.0 + std::sqrt(17.0)) / 8.0;

        Index k = 0;

        for (k = 0; k < m_n - 1; k++)

        {

            // 1. Interchange rows and columns of A, and save the result to m_perm

            bool is_1x1 = permutate_mat(k, alpha);


            // 2. Gaussian elimination

            if (is_1x1)

            {

                m_info = gaussian_elimination_1x1(k);

            }

            else

            {

                m_info = gaussian_elimination_2x2(k);

                k++;

            }


            // 3. Check status

            if (m_info != SUCCESSFUL)

                break;

        }

        // Invert the last 1x1 block if it exists

        if (k == m_n - 1)

        {

            const Scalar akk = diag_coeff(k);

            if (akk == Scalar(0))

                m_info = NUMERICAL_ISSUE;


            diag_coeff(k) = Scalar(1) / diag_coeff(k);

        }


        compress_permutation();


        m_computed = true;

    }


    // Solve Ax=b

    void solve_inplace(GenericVector b) const

    {

        if (!m_computed)

            throw std::logic_error("BKLDLT: need to call compute() first");


        // PAP' = LDL'

        // 1. b -> Pb

        Scalar* x = b.data();

        MapVec res(x, m_n);

        Index npermc = m_permc.size();

        for (Index i = 0; i < npermc; i++)

        {

            std::swap(x[m_permc[i].first], x[m_permc[i].second]);

        }


        // 2. Lz = Pb

        // If m_perm[end] < 0, then end with m_n - 3, otherwise end with m_n - 2

        const Index end = (m_perm[m_n - 1] < 0) ? (m_n - 3) : (m_n - 2);

        for (Index i = 0; i <= end; i++)

        {

            const Index b1size = m_n - i - 1;

            const Index b2size = b1size - 1;

            if (m_perm[i] >= 0)

            {

                MapConstVec l(&coeff(i + 1, i), b1size);

                res.segment(i + 1, b1size).noalias() -= l * x[i];

            }

            else

            {

                MapConstVec l1(&coeff(i + 2, i), b2size);

                MapConstVec l2(&coeff(i + 2, i + 1), b2size);

                res.segment(i + 2, b2size).noalias() -= (l1 * x[i] + l2 * x[i + 1]);

                i++;

            }

        }


        // 3. Dw = z

        for (Index i = 0; i < m_n; i++)

        {

            const Scalar e11 = diag_coeff(i);

            if (m_perm[i] >= 0)

            {

                x[i] *= e11;

            }

            else

            {

                const Scalar e21 = coeff(i + 1, i), e22 = diag_coeff(i + 1);

                const Scalar wi = x[i] * e11 + x[i + 1] * e21;

                x[i + 1] = x[i] * e21 + x[i + 1] * e22;

                x[i] = wi;

                i++;

            }

        }


        // 4. L'y = w

        // If m_perm[end] < 0, then start with m_n - 3, otherwise start with m_n - 2

        Index i = (m_perm[m_n - 1] < 0) ? (m_n - 3) : (m_n - 2);

        for (; i >= 0; i--)

        {

            const Index ldim = m_n - i - 1;

            MapConstVec l(&coeff(i + 1, i), ldim);

            x[i] -= res.segment(i + 1, ldim).dot(l);


            if (m_perm[i] < 0)

            {

                MapConstVec l2(&coeff(i + 1, i - 1), ldim);

                x[i - 1] -= res.segment(i + 1, ldim).dot(l2);

                i--;

            }

        }


        // 5. x = P'y

        for (i = npermc - 1; i >= 0; i--)

        {

            std::swap(x[m_permc[i].first], x[m_permc[i].second]);

        }

    }


    Vector solve(ConstGenericVector& b) const

    {

        Vector res = b;

        solve_inplace(res);

        return res;

    }


    int info() const { return m_info; }

};


}  // namespace LBFGSpp


#endif  // LBFGSPP_BK_LDLT_H