1314 lines
34 KiB
C

/**
* \file mzd.h
* \brief Dense matrices over GF(2) represented as a bit field.
*
* \author Gregory Bard <bard@fordham.edu>
* \author Martin Albrecht <martinralbrecht+m4ri@googlemail.com>
* \author Carlo Wood <carlo@alinoe.com>
*/
#ifndef M4RI_MZD
#define M4RI_MZD
/*******************************************************************
*
* M4RI: Linear Algebra over GF(2)
*
* Copyright (C) 2007, 2008 Gregory Bard <bard@fordham.edu>
* Copyright (C) 2008-2013 Martin Albrecht <M.R.Albrecht@rhul.ac.uk>
* Copyright (C) 2011 Carlo Wood <carlo@alinoe.com>
*
* Distributed under the terms of the GNU General Public License (GPL)
* version 2 or higher.
*
* This code is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* The full text of the GPL is available at:
*
* http://www.gnu.org/licenses/
*
********************************************************************/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <m4ri/m4ri_config.h>
#include <assert.h>
#include <math.h>
#include <stdio.h>
#if __M4RI_HAVE_SSE2
#include <emmintrin.h>
#endif
#include <m4ri/misc.h>
#include <m4ri/debug_dump.h>
/**
* Maximum number of words allocated for one mzd_t block.
*
* \note This value must fit in an int, even though it's type is size_t.
*/
#define __M4RI_MAX_MZD_BLOCKSIZE (((size_t)1) << 27)
/**
* \brief Matrix multiplication block-ing dimension.
*
* Defines the number of rows of the matrix A that are
* processed as one block during the execution of a multiplication
* algorithm.
*/
#define __M4RI_MUL_BLOCKSIZE MIN(((int)sqrt((double)(4 * __M4RI_CPU_L3_CACHE))) / 2, 2048)
/**
* \brief Data containers containing the values packed into words
*/
typedef struct {
size_t size; /*!< number of words */
word* begin; /*!< first word */
word* end; /*!< last word */
} mzd_block_t;
/**
* \brief Dense matrices over GF(2).
*
* The most fundamental data type in this library.
*/
typedef struct mzd_t {
rci_t nrows; /*!< Number of rows. */
rci_t ncols; /*!< Number of columns. */
wi_t width; /*!< Number of words with valid bits: width = ceil(ncols / m4ri_radix) */
/**
* Offset in words between rows.
*
* rowstride = (width < mzd_paddingwidth || (width & 1) == 0) ? width : width + 1;
* where width is the width of the underlying non-windowed matrix.
*/
wi_t rowstride;
/**
* Offset in words from start of block to first word.
*
* rows[0] = blocks[0].begin + offset_vector;
* This, together with rowstride, makes the rows array obsolete.
*/
wi_t offset_vector;
wi_t row_offset; /*!< Number of rows to the first row counting from the start of the first block. */
/**
* Booleans to speed up things.
*
* The bits have the following meaning:
*
* 1: Has non-zero excess.
* 2: Is windowed, but has zero offset.
* 3: Is windowed, but has zero excess.
* 4: Is windowed, but owns the blocks allocations.
* 5: Spans more than 1 block.
*/
uint8_t flags;
/**
* blockrows_log = log2(blockrows);
* where blockrows is the number of rows in one block, which is a power of 2.
*/
uint8_t blockrows_log;
word high_bitmask; /*!< Mask for valid bits in the word with the highest index (width - 1). */
mzd_block_t *blocks; /*!< Pointers to the actual blocks of memory containing the values packed into words. */
word **rows; /*!< Address of first word in each row, so the first word of row i is is m->rows[i] */
uint64_t dummy; /*!< ensures sizeof(mzd_t) == 64 */
} mzd_t;
/**
* \brief The minimum width where padding occurs.
*/
static wi_t const mzd_paddingwidth = 1;
/**
* \brief flag when ncols%64 == 0
*/
static uint8_t const mzd_flag_nonzero_excess = 0x2;
/**
* \brief flag for windowed matrix
*/
static uint8_t const mzd_flag_windowed_zerooffset = 0x4;
/**
* \brief flag for windowed matrix where ncols%64 == 0
*/
static uint8_t const mzd_flag_windowed_zeroexcess = 0x8;
/**
* \brief flag for windowed matrix wich owns its memory
*/
static uint8_t const mzd_flag_windowed_ownsblocks = 0x10;
/**
* \brief flag for multiply blocks
*/
static uint8_t const mzd_flag_multiple_blocks = 0x20;
/**
* \brief Test if a matrix is windowed.
*
* \param M Matrix
*
* \return a non-zero value if the matrix is windowed, otherwise return zero.
*/
static inline int mzd_is_windowed(mzd_t const *M) {
return M->flags & (mzd_flag_windowed_zerooffset);
}
/**
* \brief Test if this mzd_t should free blocks.
*
* \param M Matrix
*
* \return TRUE iff blocks is non-zero and should be freed upon a call to mzd_free.
*/
static inline int mzd_owns_blocks(mzd_t const *M) {
return M->blocks && (!mzd_is_windowed(M) || ((M->flags & mzd_flag_windowed_ownsblocks)));
}
/**
* \brief Get a pointer the first word.
*
* \param M Matrix
*
* \return a pointer to the first word of the first row.
*/
static inline word* mzd_first_row(mzd_t const *M) {
word* result = M->blocks[0].begin + M->offset_vector;
assert(M->nrows == 0 || result == M->rows[0]);
return result;
}
/**
* \brief Get a pointer to the first word in block n.
*
* Use mzd_first_row for block number 0.
*
* \param M Matrix
* \param n The block number. Must be larger than 0.
*
* \return a pointer to the first word of the first row in block n.
*/
static inline word* mzd_first_row_next_block(mzd_t const* M, int n) {
assert(n > 0);
return M->blocks[n].begin + M->offset_vector - M->row_offset * M->rowstride;
}
/**
* \brief Convert row to blocks index.
*
* \param M Matrix.
* \param row The row to convert.
*
* \return the block number that contains this row.
*/
static inline int mzd_row_to_block(mzd_t const* M, rci_t row) {
return (M->row_offset + row) >> M->blockrows_log;
}
/**
* \brief Total number of rows in this block.
*
* Should be called with a constant n=0, or with
* n > 0 when n is a variable, for optimization
* reasons.
*
* \param M Matrix
* \param n The block number.
*
* \return the total number of rows in this block.
*/
static inline wi_t mzd_rows_in_block(mzd_t const* M, int n) {
if (__M4RI_UNLIKELY(M->flags & mzd_flag_multiple_blocks)) {
if (__M4RI_UNLIKELY(n == 0)) {
return (1 << M->blockrows_log) - M->row_offset;
} else {
int const last_block = mzd_row_to_block(M, M->nrows - 1);
if (n < last_block)
return (1 << M->blockrows_log);
return M->nrows + M->row_offset - (n << M->blockrows_log);
}
}
return n ? 0 : M->nrows;
}
/**
* \brief Number of rows in this block including r
*
* \param M Matrix
* \param r row
*
* \return the number of rows with index >= r in this block
*/
static inline wi_t mzd_remaining_rows_in_block(mzd_t const* M, rci_t r) {
const int n = mzd_row_to_block(M, r);
r = (r - (n << M->blockrows_log));
if (__M4RI_UNLIKELY(M->flags & mzd_flag_multiple_blocks)) {
if (__M4RI_UNLIKELY(n == 0)) {
return (1 << M->blockrows_log) - M->row_offset - r;
} else {
int const last_block = mzd_row_to_block(M, M->nrows - 1);
if (n < last_block)
return (1 << M->blockrows_log) - r;
return M->nrows + M->row_offset - (n << M->blockrows_log) - r;
}
}
return n ? 0 : M->nrows - r;
}
/**
* \brief Get pointer to first word of row.
*
* \param M Matrix
* \param row The row index.
*
* \return pointer to first word of the row.
*/
static inline word* mzd_row(mzd_t const* M, rci_t row) {
wi_t big_vector = M->offset_vector + row * M->rowstride;
word* result = M->blocks[0].begin + big_vector;
if (__M4RI_UNLIKELY(M->flags & mzd_flag_multiple_blocks)) {
int const n = (M->row_offset + row) >> M->blockrows_log;
result = M->blocks[n].begin + big_vector - n * (M->blocks[0].size / sizeof(word));
}
assert(result == M->rows[row]);
return result;
}
/**
* \brief Create a new matrix of dimension r x c.
*
* Use mzd_free to kill it.
*
* \param r Number of rows
* \param c Number of columns
*
*/
mzd_t *mzd_init(rci_t const r, rci_t const c);
/**
* \brief Free a matrix created with mzd_init.
*
* \param A Matrix
*/
void mzd_free(mzd_t *A);
/**
* \brief Create a window/view into the matrix M.
*
* A matrix window for M is a meta structure on the matrix M. It is
* setup to point into the matrix so M \em must \em not be freed while the
* matrix window is used.
*
* This function puts the restriction on the provided parameters that
* all parameters must be within range for M which is not enforced
* currently .
*
* Use mzd_free_window to free the window.
*
* \param M Matrix
* \param lowr Starting row (inclusive)
* \param lowc Starting column (inclusive, must be multiple of m4ri_radix)
* \param highr End row (exclusive)
* \param highc End column (exclusive)
*
*/
mzd_t *mzd_init_window(mzd_t *M, rci_t const lowr, rci_t const lowc, rci_t const highr, rci_t const highc);
/**
* \brief Create a const window/view into a const matrix M.
*
* See mzd_init_window, but for constant M.
*/
static inline mzd_t const *mzd_init_window_const(mzd_t const *M, rci_t const lowr, rci_t const lowc, rci_t const highr, rci_t const highc)
{
return mzd_init_window((mzd_t*)M, lowr, lowc, highr, highc);
}
/**
* \brief Free a matrix window created with mzd_init_window.
*
* \param A Matrix
*/
#define mzd_free_window mzd_free
/**
* \brief Swap the two rows rowa and rowb starting at startblock.
*
* \param M Matrix with a zero offset.
* \param rowa Row index.
* \param rowb Row index.
* \param startblock Start swapping only in this block.
*/
static inline void _mzd_row_swap(mzd_t *M, rci_t const rowa, rci_t const rowb, wi_t const startblock) {
if ((rowa == rowb) || (startblock >= M->width))
return;
wi_t width = M->width - startblock - 1;
word *a = M->rows[rowa] + startblock;
word *b = M->rows[rowb] + startblock;
word tmp;
word const mask_end = M->high_bitmask;
for(wi_t i = 0; i < width; ++i) {
tmp = a[i];
a[i] = b[i];
b[i] = tmp;
}
tmp = (a[width] ^ b[width]) & mask_end;
a[width] ^= tmp;
b[width] ^= tmp;
__M4RI_DD_ROW(M, rowa);
__M4RI_DD_ROW(M, rowb);
}
/**
* \brief Swap the two rows rowa and rowb.
*
* \param M Matrix
* \param rowa Row index.
* \param rowb Row index.
*/
static inline void mzd_row_swap(mzd_t *M, rci_t const rowa, rci_t const rowb) {
_mzd_row_swap(M, rowa, rowb, 0);
}
/**
* \brief copy row j from A to row i from B.
*
* The offsets of A and B must match and the number of columns of A
* must be less than or equal to the number of columns of B.
*
* \param B Target matrix.
* \param i Target row index.
* \param A Source matrix.
* \param j Source row index.
*/
void mzd_copy_row(mzd_t *B, rci_t i, mzd_t const *A, rci_t j);
/**
* \brief Swap the two columns cola and colb.
*
* \param M Matrix.
* \param cola Column index.
* \param colb Column index.
*/
void mzd_col_swap(mzd_t *M, rci_t const cola, rci_t const colb);
/**
* \brief Swap the two columns cola and colb but only between start_row and stop_row.
*
* \param M Matrix.
* \param cola Column index.
* \param colb Column index.
* \param start_row Row index.
* \param stop_row Row index (exclusive).
*/
static inline void mzd_col_swap_in_rows(mzd_t *M, rci_t const cola, rci_t const colb, rci_t const start_row, rci_t const stop_row) {
if (cola == colb)
return;
rci_t const _cola = cola;
rci_t const _colb = colb;
wi_t const a_word = _cola / m4ri_radix;
wi_t const b_word = _colb / m4ri_radix;
int const a_bit = _cola % m4ri_radix;
int const b_bit = _colb % m4ri_radix;
word* RESTRICT ptr = mzd_row(M, start_row);
int max_bit = MAX(a_bit, b_bit);
int count_remaining = stop_row - start_row;
int min_bit = a_bit + b_bit - max_bit;
int block = mzd_row_to_block(M, start_row);
int offset = max_bit - min_bit;
word mask = m4ri_one << min_bit;
int count = MIN(mzd_remaining_rows_in_block(M, start_row), count_remaining);
// Apparently we're calling with start_row == stop_row sometimes (seems a bug to me).
if (count <= 0)
return;
if (a_word == b_word) {
while(1) {
count_remaining -= count;
ptr += a_word;
int fast_count = count / 4;
int rest_count = count - 4 * fast_count;
word xor_v[4];
wi_t const rowstride = M->rowstride;
while (fast_count--) {
xor_v[0] = ptr[0];
xor_v[1] = ptr[rowstride];
xor_v[2] = ptr[2 * rowstride];
xor_v[3] = ptr[3 * rowstride];
xor_v[0] ^= xor_v[0] >> offset;
xor_v[1] ^= xor_v[1] >> offset;
xor_v[2] ^= xor_v[2] >> offset;
xor_v[3] ^= xor_v[3] >> offset;
xor_v[0] &= mask;
xor_v[1] &= mask;
xor_v[2] &= mask;
xor_v[3] &= mask;
xor_v[0] |= xor_v[0] << offset;
xor_v[1] |= xor_v[1] << offset;
xor_v[2] |= xor_v[2] << offset;
xor_v[3] |= xor_v[3] << offset;
ptr[0] ^= xor_v[0];
ptr[rowstride] ^= xor_v[1];
ptr[2 * rowstride] ^= xor_v[2];
ptr[3 * rowstride] ^= xor_v[3];
ptr += 4 * rowstride;
}
while (rest_count--) {
word xor_v = *ptr;
xor_v ^= xor_v >> offset;
xor_v &= mask;
*ptr ^= xor_v | (xor_v << offset);
ptr += rowstride;
}
block++;
if ((count = MIN(mzd_rows_in_block(M, block), count_remaining)) <= 0)
break;
ptr = mzd_first_row_next_block(M, block);
}
} else {
word* RESTRICT min_ptr;
wi_t max_offset;
if (min_bit == a_bit) {
min_ptr = ptr + a_word;
max_offset = b_word - a_word;
} else {
min_ptr = ptr + b_word;
max_offset = a_word - b_word;
}
while(1) {
count_remaining -= count;
wi_t const rowstride = M->rowstride;
while(count--) {
word xor_v = (min_ptr[0] ^ (min_ptr[max_offset] >> offset)) & mask;
min_ptr[0] ^= xor_v;
min_ptr[max_offset] ^= xor_v << offset;
min_ptr += rowstride;
}
block++;
if ((count = MIN(mzd_rows_in_block(M,+block), count_remaining)) <= 0)
break;
ptr = mzd_first_row_next_block(M, block);
if (min_bit == a_bit)
min_ptr = ptr + a_word;
else
min_ptr = ptr + b_word;
}
}
__M4RI_DD_MZD(M);
}
/**
* \brief Read the bit at position M[row,col].
*
* \param M Matrix
* \param row Row index
* \param col Column index
*
* \note No bounds checks whatsoever are performed.
*
*/
static inline BIT mzd_read_bit(mzd_t const *M, rci_t const row, rci_t const col ) {
return __M4RI_GET_BIT(M->rows[row][col/m4ri_radix], col%m4ri_radix);
}
/**
* \brief Write the bit value to position M[row,col]
*
* \param M Matrix
* \param row Row index
* \param col Column index
* \param value Either 0 or 1
*
* \note No bounds checks whatsoever are performed.
*
*/
static inline void mzd_write_bit(mzd_t *M, rci_t const row, rci_t const col, BIT const value) {
__M4RI_WRITE_BIT(M->rows[row][col/m4ri_radix], col%m4ri_radix, value);
}
/**
* \brief XOR n bits from values to M starting a position (x,y).
*
* \param M Source matrix.
* \param x Starting row.
* \param y Starting column.
* \param n Number of bits (<= m4ri_radix);
* \param values Word with values;
*/
static inline void mzd_xor_bits(mzd_t const *M, rci_t const x, rci_t const y, int const n, word values) {
int const spot = y % m4ri_radix;
wi_t const block = y / m4ri_radix;
M->rows[x][block] ^= values << spot;
int const space = m4ri_radix - spot;
if (n > space)
M->rows[x][block + 1] ^= values >> space;
}
/**
* \brief AND n bits from values to M starting a position (x,y).
*
* \param M Source matrix.
* \param x Starting row.
* \param y Starting column.
* \param n Number of bits (<= m4ri_radix);
* \param values Word with values;
*/
static inline void mzd_and_bits(mzd_t const *M, rci_t const x, rci_t const y, int const n, word values) {
/* This is the best way, since this will drop out once we inverse the bits in values: */
values >>= (m4ri_radix - n); /* Move the bits to the lowest columns */
int const spot = y % m4ri_radix;
wi_t const block = y / m4ri_radix;
M->rows[x][block] &= values << spot;
int const space = m4ri_radix - spot;
if (n > space)
M->rows[x][block + 1] &= values >> space;
}
/**
* \brief Clear n bits in M starting a position (x,y).
*
* \param M Source matrix.
* \param x Starting row.
* \param y Starting column.
* \param n Number of bits (0 < n <= m4ri_radix);
*/
static inline void mzd_clear_bits(mzd_t const *M, rci_t const x, rci_t const y, int const n) {
assert(n>0 && n <= m4ri_radix);
word values = m4ri_ffff >> (m4ri_radix - n);
int const spot = y % m4ri_radix;
wi_t const block = y / m4ri_radix;
M->rows[x][block] &= ~(values << spot);
int const space = m4ri_radix - spot;
if (n > space)
M->rows[x][block + 1] &= ~(values >> space);
}
/**
* \brief Add the rows sourcerow and destrow and stores the total in the row
* destrow, but only begins at the column coloffset.
*
* \param M Matrix
* \param dstrow Index of target row
* \param srcrow Index of source row
* \param coloffset Start column (0 <= coloffset < M->ncols)
*
* \warning This function expects that there is at least one word worth of work.
*/
static inline void mzd_row_add_offset(mzd_t *M, rci_t dstrow, rci_t srcrow, rci_t coloffset) {
assert(dstrow < M->nrows && srcrow < M->nrows && coloffset < M->ncols);
wi_t const startblock= coloffset/m4ri_radix;
wi_t wide = M->width - startblock;
word *src = M->rows[srcrow] + startblock;
word *dst = M->rows[dstrow] + startblock;
word const mask_begin = __M4RI_RIGHT_BITMASK(m4ri_radix - coloffset % m4ri_radix);
word const mask_end = M->high_bitmask;
*dst++ ^= *src++ & mask_begin;
--wide;
#if __M4RI_HAVE_SSE2
int not_aligned = __M4RI_ALIGNMENT(src,16) != 0; /* 0: Aligned, 1: Not aligned */
if (wide > not_aligned + 1) /* Speed up for small matrices */
{
if (not_aligned) {
*dst++ ^= *src++;
--wide;
}
/* Now wide > 1 */
__m128i* __src = (__m128i*)src;
__m128i* __dst = (__m128i*)dst;
__m128i* const eof = (__m128i*)((unsigned long)(src + wide) & ~0xFUL);
do
{
__m128i xmm1 = _mm_xor_si128(*__dst, *__src);
*__dst++ = xmm1;
}
while(++__src < eof);
src = (word*)__src;
dst = (word*)__dst;
wide = ((sizeof(word)*wide)%16)/sizeof(word);
}
#endif
wi_t i = -1;
while(++i < wide)
dst[i] ^= src[i];
/*
* Revert possibly non-zero excess bits.
* Note that i == wide here, and wide can be 0.
* But really, src[wide - 1] is M->rows[srcrow][M->width - 1] ;)
* We use i - 1 here to let the compiler know these are the same addresses
* that we last accessed, in the previous loop.
*/
dst[i - 1] ^= src[i - 1] & ~mask_end;
__M4RI_DD_ROW(M, dstrow);
}
/**
* \brief Add the rows sourcerow and destrow and stores the total in
* the row destrow.
*
* \param M Matrix
* \param sourcerow Index of source row
* \param destrow Index of target row
*
* \note this can be done much faster with mzd_combine.
*/
void mzd_row_add(mzd_t *M, rci_t const sourcerow, rci_t const destrow);
/**
* \brief Transpose a matrix.
*
* This function uses the fact that:
\verbatim
[ A B ]T [AT CT]
[ C D ] = [BT DT]
\endverbatim
* and thus rearranges the blocks recursively.
*
* \param DST Preallocated return matrix, may be NULL for automatic creation.
* \param A Matrix
*/
mzd_t *mzd_transpose(mzd_t *DST, mzd_t const *A);
/**
* \brief Naive cubic matrix multiplication.
*
* That is, compute C such that C == AB.
*
* \param C Preallocated product matrix, may be NULL for automatic creation.
* \param A Input matrix A.
* \param B Input matrix B.
*
* \note Normally, if you will multiply several times by b, it is
* smarter to calculate bT yourself, and keep it, and then use the
* function called _mzd_mul_naive
*
*/
mzd_t *mzd_mul_naive(mzd_t *C, mzd_t const *A, mzd_t const *B);
/**
* \brief Naive cubic matrix multiplication and addition
*
* That is, compute C such that C == C + AB.
*
* \param C Preallocated product matrix.
* \param A Input matrix A.
* \param B Input matrix B.
*
* \note Normally, if you will multiply several times by b, it is
* smarter to calculate bT yourself, and keep it, and then use the
* function called _mzd_mul_naive
*/
mzd_t *mzd_addmul_naive(mzd_t *C, mzd_t const *A, mzd_t const *B);
/**
* \brief Naive cubic matrix multiplication with the pre-transposed B.
*
* That is, compute C such that C == AB^t.
*
* \param C Preallocated product matrix.
* \param A Input matrix A.
* \param B Pre-transposed input matrix B.
* \param clear Whether to clear C before accumulating AB
*/
mzd_t *_mzd_mul_naive(mzd_t *C, mzd_t const *A, mzd_t const *B, int const clear);
/**
* \brief Matrix multiplication optimized for v*A where v is a vector.
*
* \param C Preallocated product matrix.
* \param v Input matrix v.
* \param A Input matrix A.
* \param clear If set clear C first, otherwise add result to C.
*
*/
mzd_t *_mzd_mul_va(mzd_t *C, mzd_t const *v, mzd_t const *A, int const clear);
/**
* \brief Fill matrix M with uniformly distributed bits.
*
* \param M Matrix
*
* \todo Allow the user to provide a RNG callback.
*/
void mzd_randomize(mzd_t *M);
/**
* \brief Set the matrix M to the value equivalent to the integer
* value provided.
*
* Specifically, this function does nothing if value%2 == 0 and
* returns the identity matrix if value%2 == 1.
*
* If the matrix is not square then the largest possible square
* submatrix is set to the identity matrix.
*
* \param M Matrix
* \param value Either 0 or 1
*/
void mzd_set_ui(mzd_t *M, unsigned int const value);
/**
* \brief Gaussian elimination.
*
* This will do Gaussian elimination on the matrix m but will start
* not at column 0 necc but at column startcol. If full=FALSE, then it
* will do triangular style elimination, and if full=TRUE, it will do
* Gauss-Jordan style, or full elimination.
*
* \param M Matrix
* \param startcol First column to consider for reduction.
* \param full Gauss-Jordan style or upper triangular form only.
*/
rci_t mzd_gauss_delayed(mzd_t *M, rci_t const startcol, int const full);
/**
* \brief Gaussian elimination.
*
* This will do Gaussian elimination on the matrix m. If full=FALSE,
* then it will do triangular style elimination, and if full=TRUE,
* it will do Gauss-Jordan style, or full elimination.
*
* \param M Matrix
* \param full Gauss-Jordan style or upper triangular form only.
*
* \sa mzd_echelonize_m4ri(), mzd_echelonize_pluq()
*/
rci_t mzd_echelonize_naive(mzd_t *M, int const full);
/**
* \brief Return TRUE if A == B.
*
* \param A Matrix
* \param B Matrix
*/
int mzd_equal(mzd_t const *A, mzd_t const *B);
/**
* \brief Return -1,0,1 if if A < B, A == B or A > B respectively.
*
* \param A Matrix.
* \param B Matrix.
*
* \note This comparison is not well defined mathematically and
* relatively arbitrary since elements of GF(2) don't have an
* ordering.
*/
int mzd_cmp(mzd_t const *A, mzd_t const *B);
/**
* \brief Copy matrix A to DST.
*
* \param DST May be NULL for automatic creation.
* \param A Source matrix.
*/
mzd_t *mzd_copy(mzd_t *DST, mzd_t const *A);
/**
* \brief Concatenate B to A and write the result to C.
*
* That is,
*
\verbatim
[ A ], [ B ] -> [ A B ] = C
\endverbatim
*
* The inputs are not modified but a new matrix is created.
*
* \param C Matrix, may be NULL for automatic creation
* \param A Matrix
* \param B Matrix
*
* \note This is sometimes called augment.
*/
mzd_t *mzd_concat(mzd_t *C, mzd_t const *A, mzd_t const *B);
/**
* \brief Stack A on top of B and write the result to C.
*
* That is,
*
\verbatim
[ A ], [ B ] -> [ A ] = C
[ B ]
\endverbatim
*
* The inputs are not modified but a new matrix is created.
*
* \param C Matrix, may be NULL for automatic creation
* \param A Matrix
* \param B Matrix
*/
mzd_t *mzd_stack(mzd_t *C, mzd_t const *A, mzd_t const *B);
/**
* \brief Copy a submatrix.
*
* Note that the upper bounds are not included.
*
* \param S Preallocated space for submatrix, may be NULL for automatic creation.
* \param M Matrix
* \param lowr start rows
* \param lowc start column
* \param highr stop row (this row is \em not included)
* \param highc stop column (this column is \em not included)
*/
mzd_t *mzd_submatrix(mzd_t *S, mzd_t const *M, rci_t const lowr, rci_t const lowc, rci_t const highr, rci_t const highc);
/**
* \brief Invert the matrix target using Gaussian elimination.
*
* To avoid recomputing the identity matrix over and over again, I may
* be passed in as identity parameter.
*
* \param INV Preallocated space for inversion matrix, may be NULL for automatic creation.
* \param A Matrix to be reduced.
* \param I Identity matrix.
*/
mzd_t *mzd_invert_naive(mzd_t *INV, mzd_t const *A, mzd_t const *I);
/**
* \brief Set C = A+B.
*
* C is also returned. If C is NULL then a new matrix is created which
* must be freed by mzd_free.
*
* \param C Preallocated sum matrix, may be NULL for automatic creation.
* \param A Matrix
* \param B Matrix
*/
mzd_t *mzd_add(mzd_t *C, mzd_t const *A, mzd_t const *B);
/**
* \brief Same as mzd_add but without any checks on the input.
*
* \param C Preallocated sum matrix, may be NULL for automatic creation.
* \param A Matrix
* \param B Matrix
*/
mzd_t *_mzd_add(mzd_t *C, mzd_t const *A, mzd_t const *B);
/**
* \brief Same as mzd_add.
*
* \param C Preallocated difference matrix, may be NULL for automatic creation.
* \param A Matrix
* \param B Matrix
*/
#define mzd_sub mzd_add
/**
* \brief Same as mzd_sub but without any checks on the input.
*
* \param C Preallocated difference matrix, may be NULL for automatic creation.
* \param A Matrix
* \param B Matrix
*/
#define _mzd_sub _mzd_add
/**
* Get n bits starting a position (x,y) from the matrix M.
*
* \param M Source matrix.
* \param x Starting row.
* \param y Starting column.
* \param n Number of bits (<= m4ri_radix);
*/
static inline word mzd_read_bits(mzd_t const *M, rci_t const x, rci_t const y, int const n) {
int const spot = y % m4ri_radix;
wi_t const block = y / m4ri_radix;
int const spill = spot + n - m4ri_radix;
word temp = (spill <= 0) ? M->rows[x][block] << -spill : (M->rows[x][block + 1] << (m4ri_radix - spill)) | (M->rows[x][block] >> spill);
return temp >> (m4ri_radix - n);
}
/**
* \brief a_row[a_startblock:] += b_row[b_startblock:] for offset 0
*
* Adds a_row of A, starting with a_startblock to the end, to
* b_row of B, starting with b_startblock to the end. This gets stored
* in A, in a_row, starting with a_startblock.
*
* \param A destination matrix
* \param a_row destination row for matrix C
* \param a_startblock starting block to work on in matrix C
* \param B source matrix
* \param b_row source row for matrix B
* \param b_startblock starting block to work on in matrix B
*
*/
static inline void mzd_combine_even_in_place(mzd_t *A, rci_t const a_row, wi_t const a_startblock,
mzd_t const *B, rci_t const b_row, wi_t const b_startblock) {
wi_t wide = A->width - a_startblock - 1;
word *a = A->rows[a_row] + a_startblock;
word *b = B->rows[b_row] + b_startblock;
#if __M4RI_HAVE_SSE2
if(wide > 2) {
/** check alignments **/
if (__M4RI_ALIGNMENT(a,16)) {
*a++ ^= *b++;
wide--;
}
if (__M4RI_ALIGNMENT(a, 16) == 0 && __M4RI_ALIGNMENT(b, 16) == 0) {
__m128i *a128 = (__m128i*)a;
__m128i *b128 = (__m128i*)b;
const __m128i *eof = (__m128i*)((unsigned long)(a + wide) & ~0xFUL);
do {
*a128 = _mm_xor_si128(*a128, *b128);
++b128;
++a128;
} while(a128 < eof);
a = (word*)a128;
b = (word*)b128;
wide = ((sizeof(word) * wide) % 16) / sizeof(word);
}
}
#endif // __M4RI_HAVE_SSE2
if (wide > 0) {
wi_t n = (wide + 7) / 8;
switch (wide % 8) {
case 0: do { *(a++) ^= *(b++);
case 7: *(a++) ^= *(b++);
case 6: *(a++) ^= *(b++);
case 5: *(a++) ^= *(b++);
case 4: *(a++) ^= *(b++);
case 3: *(a++) ^= *(b++);
case 2: *(a++) ^= *(b++);
case 1: *(a++) ^= *(b++);
} while (--n > 0);
}
}
*a ^= *b & A->high_bitmask;
__M4RI_DD_MZD(A);
}
/**
* \brief c_row[c_startblock:] = a_row[a_startblock:] + b_row[b_startblock:] for offset 0
*
* Adds a_row of A, starting with a_startblock to the end, to
* b_row of B, starting with b_startblock to the end. This gets stored
* in C, in c_row, starting with c_startblock.
*
* \param C destination matrix
* \param c_row destination row for matrix C
* \param c_startblock starting block to work on in matrix C
* \param A source matrix
* \param a_row source row for matrix A
* \param a_startblock starting block to work on in matrix A
* \param B source matrix
* \param b_row source row for matrix B
* \param b_startblock starting block to work on in matrix B
*
*/
static inline void mzd_combine_even(mzd_t *C, rci_t const c_row, wi_t const c_startblock,
mzd_t const *A, rci_t const a_row, wi_t const a_startblock,
mzd_t const *B, rci_t const b_row, wi_t const b_startblock) {
wi_t wide = A->width - a_startblock - 1;
word *a = A->rows[a_row] + a_startblock;
word *b = B->rows[b_row] + b_startblock;
word *c = C->rows[c_row] + c_startblock;
#if __M4RI_HAVE_SSE2
if(wide > 2) {
/** check alignments **/
if (__M4RI_ALIGNMENT(a,16)) {
*c++ = *b++ ^ *a++;
wide--;
}
if ( (__M4RI_ALIGNMENT(b, 16) | __M4RI_ALIGNMENT(c, 16)) == 0) {
__m128i *a128 = (__m128i*)a;
__m128i *b128 = (__m128i*)b;
__m128i *c128 = (__m128i*)c;
const __m128i *eof = (__m128i*)((unsigned long)(a + wide) & ~0xFUL);
do {
*c128 = _mm_xor_si128(*a128, *b128);
++c128;
++b128;
++a128;
} while(a128 < eof);
a = (word*)a128;
b = (word*)b128;
c = (word*)c128;
wide = ((sizeof(word) * wide) % 16) / sizeof(word);
}
}
#endif // __M4RI_HAVE_SSE2
if (wide > 0) {
wi_t n = (wide + 7) / 8;
switch (wide % 8) {
case 0: do { *(c++) = *(a++) ^ *(b++);
case 7: *(c++) = *(a++) ^ *(b++);
case 6: *(c++) = *(a++) ^ *(b++);
case 5: *(c++) = *(a++) ^ *(b++);
case 4: *(c++) = *(a++) ^ *(b++);
case 3: *(c++) = *(a++) ^ *(b++);
case 2: *(c++) = *(a++) ^ *(b++);
case 1: *(c++) = *(a++) ^ *(b++);
} while (--n > 0);
}
}
*c ^= ((*a ^ *b ^ *c) & C->high_bitmask);
__M4RI_DD_MZD(C);
}
/**
* \brief row3[col3:] = row1[col1:] + row2[col2:]
*
* Adds row1 of SC1, starting with startblock1 to the end, to
* row2 of SC2, starting with startblock2 to the end. This gets stored
* in DST, in row3, starting with startblock3.
*
* \param C destination matrix
* \param c_row destination row for matrix dst
* \param c_startblock starting block to work on in matrix dst
* \param A source matrix
* \param a_row source row for matrix sc1
* \param a_startblock starting block to work on in matrix sc1
* \param B source matrix
* \param b_row source row for matrix sc2
* \param b_startblock starting block to work on in matrix sc2
*
*/
static inline void mzd_combine(mzd_t *C, rci_t const c_row, wi_t const c_startblock,
mzd_t const *A, rci_t const a_row, wi_t const a_startblock,
mzd_t const *B, rci_t const b_row, wi_t const b_startblock) {
if( (C == A) & (a_row == c_row) & (a_startblock == c_startblock) )
mzd_combine_even_in_place(C, c_row, c_startblock, B, b_row, b_startblock);
else
mzd_combine_even(C, c_row, c_startblock, A, a_row, a_startblock, B, b_row, b_startblock);
return;
}
/**
* \brief Get n bits starting a position (x,y) from the matrix M.
*
* This function is in principle the same as mzd_read_bits,
* but it explicitely returns an 'int' and is used as
* index into an array (Gray code).
*/
static inline int mzd_read_bits_int(mzd_t const *M, rci_t const x, rci_t const y, int const n) {
return __M4RI_CONVERT_TO_INT(mzd_read_bits(M, x, y, n));
}
/**
* \brief Zero test for matrix.
*
* \param A Input matrix.
*
*/
int mzd_is_zero(mzd_t const *A);
/**
* \brief Clear the given row, but only begins at the column coloffset.
*
* \param M Matrix
* \param row Index of row
* \param coloffset Column offset
*/
void mzd_row_clear_offset(mzd_t *M, rci_t const row, rci_t const coloffset);
/**
* \brief Find the next nonzero entry in M starting at start_row and start_col.
*
* This function walks down rows in the inner loop and columns in the
* outer loop. If a nonzero entry is found this function returns 1 and
* zero otherwise.
*
* If and only if a nonzero entry is found r and c are updated.
*
* \param M Matrix
* \param start_row Index of row where to start search
* \param start_col Index of column where to start search
* \param r Row index updated if pivot is found
* \param c Column index updated if pivot is found
*/
int mzd_find_pivot(mzd_t const *M, rci_t start_row, rci_t start_col, rci_t *r, rci_t *c);
/**
* \brief Return the number of nonzero entries divided by nrows *
* ncols
*
* If res = 0 then 100 samples per row are made, if res > 0 the
* function takes res sized steps within each row (res = 1 uses every
* word).
*
* \param A Matrix
* \param res Resolution of sampling (in words)
*/
double mzd_density(mzd_t const *A, wi_t res);
/**
* \brief Return the number of nonzero entries divided by nrows *
* ncols considering only the submatrix starting at (r,c).
*
* If res = 0 then 100 samples per row are made, if res > 0 the
* function takes res sized steps within each row (res = 1 uses every
* word).
*
* \param A Matrix
* \param res Resolution of sampling (in words)
* \param r Row to start counting
* \param c Column to start counting
*/
double _mzd_density(mzd_t const *A, wi_t res, rci_t r, rci_t c);
/**
* \brief Return the first row with all zero entries.
*
* If no such row can be found returns nrows.
*
* \param A Matrix
*/
rci_t mzd_first_zero_row(mzd_t const *A);
/**
* \brief Return hash value for matrix.
*
* \param A Matrix
*/
static inline word mzd_hash(mzd_t const *A) {
word hash = 0;
for (rci_t r = 0; r < A->nrows; ++r)
hash ^= rotate_word(calculate_hash(A->rows[r], A->width), r % m4ri_radix);
return hash;
}
/**
* Return upper triangular submatrix of A
*
* \param U Output matrix, if NULL a new matrix will be returned
* \param A Source matrix
*
* \return U
*/
mzd_t *mzd_extract_u(mzd_t *U, mzd_t const *A);
/**
* Return lower triangular submatrix of A
*
* \param L Output matrix, if NULL a new matrix will be returned
* \param A Source matrix
*
* \return L
*/
mzd_t *mzd_extract_l(mzd_t *L, mzd_t const *A);
#endif // M4RI_MZD