186 lines
5.3 KiB
C

/*******************************************************************
*
* M4RI: Linear Algebra over GF(2)
*
* Copyright (C) 2008 Jean-Guillaume.Dumas@imag.fr
*
* Distributed under the terms of the GNU General Public License (GPL)
* version 2 or higher.
*
* This code is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* The full text of the GPL is available at:
*
* http://www.gnu.org/licenses/
*
********************************************************************/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "solve.h"
#include "strassen.h"
#include "ple.h"
#include "triangular.h"
#include "mzp.h"
int mzd_solve_left(mzd_t *A, mzd_t *B, int const cutoff, int const inconsistency_check) {
if(A->ncols > B->nrows)
m4ri_die("mzd_solve_left: A ncols (%d) must be smaller than B nrows (%d).\n", A->ncols, B->nrows);
return _mzd_solve_left(A, B, cutoff, inconsistency_check);
}
int mzd_pluq_solve_left (mzd_t const *A, rci_t rank,
mzp_t const *P, mzp_t const *Q,
mzd_t *B, int const cutoff, int const inconsistency_check) {
if(A->ncols > B->nrows)
m4ri_die("mzd_pluq_solve_left: A ncols (%d) need to be lower than B nrows (%d).\n", A->ncols, B->nrows);
if(P->length != A->nrows)
m4ri_die("mzd_pluq_solve_left: A nrows (%d) need to match P size (%d).\n", A->nrows, P->length);
if(Q->length != A->ncols)
m4ri_die("mzd_pluq_solve_left: A ncols (%d) need to match Q size (%d).\n", A->ncols, P->length);
return _mzd_pluq_solve_left (A, rank, P, Q, B, cutoff, inconsistency_check);
}
int _mzd_pluq_solve_left(mzd_t const *A, rci_t rank,
mzp_t const *P, mzp_t const *Q,
mzd_t *B, int const cutoff, int const inconsistency_check) {
/** A is supposed to store L lower triangular and U upper triangular
* B is modified in place
* (Bi's in the comments are just modified versions of B)
* PLUQ = A
* 1) P B2 = B1
* 2) L B3 = B2
* 3) U B4 = B3
* 4) Q B5 = B4
*/
int retval = 0;
/* P B2 = B1 or B2 = P^T B1 */
mzd_apply_p_left(B, P);
/* L B3 = B2 */
/* view on the upper part of L */
mzd_t const *LU = mzd_init_window_const(A, 0, 0, rank, rank);
mzd_t *Y1 = mzd_init_window(B, 0, 0, rank, B->ncols);
mzd_trsm_lower_left(LU, Y1, cutoff);
if (inconsistency_check) { /* Check for inconsistency */
/** FASTER without this check; update with the lower part of L
*/
mzd_t const *H = mzd_init_window_const(A, rank, 0, A->nrows, rank);
mzd_t *Y2 = mzd_init_window(B, rank, 0, A->nrows, B->ncols);
if(A->nrows < B->nrows) {
mzd_t *Y3 = mzd_init_window(B, A->nrows, 0, B->nrows, B->ncols);
mzd_set_ui(Y3, 0);
mzd_free_window(Y3);
}
mzd_addmul(Y2, H, Y1, cutoff);
/*
* test whether Y2 is the zero matrix
*/
if(!mzd_is_zero(Y2)) {
retval = -1;
}
mzd_free_window((mzd_t*)H);
mzd_free_window(Y2);
}
/* U B4 = B3 */
mzd_trsm_upper_left(LU, Y1, cutoff);
mzd_free_window((mzd_t*)LU);
mzd_free_window(Y1);
if (!inconsistency_check) {
/** Default is to set the undefined bits to zero if inconsistency
* has been checked then Y2 bits are already all zeroes thus this
* clearing is not needed
*/
for(rci_t i = rank; i < B->nrows; ++i) {
for(rci_t j = 0; j < B->ncols; j += m4ri_radix) {
mzd_clear_bits(B, i, j, MIN(m4ri_radix, B->ncols - j));
}
}
}
/* Q B5 = B4 or B5 = Q^T B4 */
mzd_apply_p_left_trans(B, Q);
/* P L U Q B5 = B1 */
__M4RI_DD_MZD(B);
__M4RI_DD_INT(retval);
return retval;
}
int _mzd_solve_left(mzd_t *A, mzd_t *B, int const cutoff, int const inconsistency_check) {
/**
* B is modified in place
* (Bi's in the comments are just modified versions of B)
* 1) PLUQ = A
* 2) P B2 = B1
* 3) L B3 = B2
* 4) U B4 = B3
* 5) Q B5 = B4
*/
mzp_t *P = mzp_init(A->nrows);
mzp_t *Q = mzp_init(A->ncols);
/* PLUQ = A */
rci_t rank = _mzd_pluq(A, P, Q, cutoff);
/* 2, 3, 4, 5 */
int retval = mzd_pluq_solve_left(A, rank, P, Q, B, cutoff, inconsistency_check);
mzp_free(P);
mzp_free(Q);
__M4RI_DD_MZD(A);
__M4RI_DD_MZD(B);
return retval;
}
mzd_t *mzd_kernel_left_pluq(mzd_t *A, int const cutoff) {
mzp_t *P = mzp_init(A->nrows);
mzp_t *Q = mzp_init(A->ncols);
rci_t r = mzd_pluq(A, P, Q, cutoff);
if (r == A->ncols) {
mzp_free(P);
mzp_free(Q);
__M4RI_DD_MZD(A);
return NULL;
}
mzd_t *U = mzd_init_window(A, 0, 0, r, r);
mzd_t *R = mzd_init(A->ncols, A->ncols - r);
mzd_t *RU = mzd_init_window(R, 0, 0, r, R->ncols);
for(rci_t i=0; i< r ; i++) {
for(rci_t j=0; j< RU->ncols; j+=m4ri_radix) {
const int workload = MIN(m4ri_radix, RU->ncols - j);
mzd_xor_bits(RU, i, j, workload, mzd_read_bits(A, i, r + j, workload));
}
}
mzd_trsm_upper_left(U, RU, cutoff);
for(rci_t i = 0; i < R->ncols; ++i) {
mzd_write_bit(R, r + i, i, 1);
}
mzd_apply_p_left_trans(R, Q);
mzp_free(P);
mzp_free(Q);
mzd_free_window(RU);
mzd_free_window(U);
__M4RI_DD_MZD(A);
__M4RI_DD_MZD(R);
return R;
}