1314 lines
34 KiB
C
1314 lines
34 KiB
C
|
/**
|
||
|
* \file mzd.h
|
||
|
* \brief Dense matrices over GF(2) represented as a bit field.
|
||
|
*
|
||
|
* \author Gregory Bard <bard@fordham.edu>
|
||
|
* \author Martin Albrecht <martinralbrecht+m4ri@googlemail.com>
|
||
|
* \author Carlo Wood <carlo@alinoe.com>
|
||
|
*/
|
||
|
|
||
|
#ifndef M4RI_MZD
|
||
|
#define M4RI_MZD
|
||
|
|
||
|
/*******************************************************************
|
||
|
*
|
||
|
* M4RI: Linear Algebra over GF(2)
|
||
|
*
|
||
|
* Copyright (C) 2007, 2008 Gregory Bard <bard@fordham.edu>
|
||
|
* Copyright (C) 2008-2013 Martin Albrecht <M.R.Albrecht@rhul.ac.uk>
|
||
|
* Copyright (C) 2011 Carlo Wood <carlo@alinoe.com>
|
||
|
*
|
||
|
* Distributed under the terms of the GNU General Public License (GPL)
|
||
|
* version 2 or higher.
|
||
|
*
|
||
|
* This code is distributed in the hope that it will be useful,
|
||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
|
* General Public License for more details.
|
||
|
*
|
||
|
* The full text of the GPL is available at:
|
||
|
*
|
||
|
* http://www.gnu.org/licenses/
|
||
|
*
|
||
|
********************************************************************/
|
||
|
|
||
|
#ifdef HAVE_CONFIG_H
|
||
|
#include "config.h"
|
||
|
#endif
|
||
|
|
||
|
#include <m4ri/m4ri_config.h>
|
||
|
|
||
|
#include <assert.h>
|
||
|
#include <math.h>
|
||
|
#include <stdio.h>
|
||
|
|
||
|
#if __M4RI_HAVE_SSE2
|
||
|
#include <emmintrin.h>
|
||
|
#endif
|
||
|
|
||
|
#include <m4ri/misc.h>
|
||
|
#include <m4ri/debug_dump.h>
|
||
|
|
||
|
/**
|
||
|
* Maximum number of words allocated for one mzd_t block.
|
||
|
*
|
||
|
* \note This value must fit in an int, even though it's type is size_t.
|
||
|
*/
|
||
|
|
||
|
#define __M4RI_MAX_MZD_BLOCKSIZE (((size_t)1) << 27)
|
||
|
|
||
|
/**
|
||
|
* \brief Matrix multiplication block-ing dimension.
|
||
|
*
|
||
|
* Defines the number of rows of the matrix A that are
|
||
|
* processed as one block during the execution of a multiplication
|
||
|
* algorithm.
|
||
|
*/
|
||
|
|
||
|
#define __M4RI_MUL_BLOCKSIZE MIN(((int)sqrt((double)(4 * __M4RI_CPU_L3_CACHE))) / 2, 2048)
|
||
|
|
||
|
/**
|
||
|
* \brief Data containers containing the values packed into words
|
||
|
*/
|
||
|
|
||
|
typedef struct {
|
||
|
size_t size; /*!< number of words */
|
||
|
word* begin; /*!< first word */
|
||
|
word* end; /*!< last word */
|
||
|
} mzd_block_t;
|
||
|
|
||
|
/**
|
||
|
* \brief Dense matrices over GF(2).
|
||
|
*
|
||
|
* The most fundamental data type in this library.
|
||
|
*/
|
||
|
|
||
|
typedef struct mzd_t {
|
||
|
|
||
|
rci_t nrows; /*!< Number of rows. */
|
||
|
rci_t ncols; /*!< Number of columns. */
|
||
|
wi_t width; /*!< Number of words with valid bits: width = ceil(ncols / m4ri_radix) */
|
||
|
|
||
|
/**
|
||
|
* Offset in words between rows.
|
||
|
*
|
||
|
* rowstride = (width < mzd_paddingwidth || (width & 1) == 0) ? width : width + 1;
|
||
|
* where width is the width of the underlying non-windowed matrix.
|
||
|
*/
|
||
|
|
||
|
wi_t rowstride;
|
||
|
|
||
|
/**
|
||
|
* Offset in words from start of block to first word.
|
||
|
*
|
||
|
* rows[0] = blocks[0].begin + offset_vector;
|
||
|
* This, together with rowstride, makes the rows array obsolete.
|
||
|
*/
|
||
|
|
||
|
wi_t offset_vector;
|
||
|
|
||
|
wi_t row_offset; /*!< Number of rows to the first row counting from the start of the first block. */
|
||
|
|
||
|
/**
|
||
|
* Booleans to speed up things.
|
||
|
*
|
||
|
* The bits have the following meaning:
|
||
|
*
|
||
|
* 1: Has non-zero excess.
|
||
|
* 2: Is windowed, but has zero offset.
|
||
|
* 3: Is windowed, but has zero excess.
|
||
|
* 4: Is windowed, but owns the blocks allocations.
|
||
|
* 5: Spans more than 1 block.
|
||
|
*/
|
||
|
|
||
|
uint8_t flags;
|
||
|
|
||
|
/**
|
||
|
* blockrows_log = log2(blockrows);
|
||
|
* where blockrows is the number of rows in one block, which is a power of 2.
|
||
|
*/
|
||
|
|
||
|
uint8_t blockrows_log;
|
||
|
|
||
|
word high_bitmask; /*!< Mask for valid bits in the word with the highest index (width - 1). */
|
||
|
mzd_block_t *blocks; /*!< Pointers to the actual blocks of memory containing the values packed into words. */
|
||
|
word **rows; /*!< Address of first word in each row, so the first word of row i is is m->rows[i] */
|
||
|
uint64_t dummy; /*!< ensures sizeof(mzd_t) == 64 */
|
||
|
|
||
|
} mzd_t;
|
||
|
|
||
|
/**
|
||
|
* \brief The minimum width where padding occurs.
|
||
|
*/
|
||
|
static wi_t const mzd_paddingwidth = 1;
|
||
|
|
||
|
/**
|
||
|
* \brief flag when ncols%64 == 0
|
||
|
*/
|
||
|
|
||
|
static uint8_t const mzd_flag_nonzero_excess = 0x2;
|
||
|
|
||
|
/**
|
||
|
* \brief flag for windowed matrix
|
||
|
*/
|
||
|
|
||
|
static uint8_t const mzd_flag_windowed_zerooffset = 0x4;
|
||
|
|
||
|
/**
|
||
|
* \brief flag for windowed matrix where ncols%64 == 0
|
||
|
*/
|
||
|
|
||
|
static uint8_t const mzd_flag_windowed_zeroexcess = 0x8;
|
||
|
|
||
|
/**
|
||
|
* \brief flag for windowed matrix wich owns its memory
|
||
|
*/
|
||
|
|
||
|
static uint8_t const mzd_flag_windowed_ownsblocks = 0x10;
|
||
|
|
||
|
/**
|
||
|
* \brief flag for multiply blocks
|
||
|
*/
|
||
|
|
||
|
static uint8_t const mzd_flag_multiple_blocks = 0x20;
|
||
|
|
||
|
/**
|
||
|
* \brief Test if a matrix is windowed.
|
||
|
*
|
||
|
* \param M Matrix
|
||
|
*
|
||
|
* \return a non-zero value if the matrix is windowed, otherwise return zero.
|
||
|
*/
|
||
|
static inline int mzd_is_windowed(mzd_t const *M) {
|
||
|
return M->flags & (mzd_flag_windowed_zerooffset);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* \brief Test if this mzd_t should free blocks.
|
||
|
*
|
||
|
* \param M Matrix
|
||
|
*
|
||
|
* \return TRUE iff blocks is non-zero and should be freed upon a call to mzd_free.
|
||
|
*/
|
||
|
static inline int mzd_owns_blocks(mzd_t const *M) {
|
||
|
return M->blocks && (!mzd_is_windowed(M) || ((M->flags & mzd_flag_windowed_ownsblocks)));
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* \brief Get a pointer the first word.
|
||
|
*
|
||
|
* \param M Matrix
|
||
|
*
|
||
|
* \return a pointer to the first word of the first row.
|
||
|
*/
|
||
|
|
||
|
static inline word* mzd_first_row(mzd_t const *M) {
|
||
|
word* result = M->blocks[0].begin + M->offset_vector;
|
||
|
assert(M->nrows == 0 || result == M->rows[0]);
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* \brief Get a pointer to the first word in block n.
|
||
|
*
|
||
|
* Use mzd_first_row for block number 0.
|
||
|
*
|
||
|
* \param M Matrix
|
||
|
* \param n The block number. Must be larger than 0.
|
||
|
*
|
||
|
* \return a pointer to the first word of the first row in block n.
|
||
|
*/
|
||
|
static inline word* mzd_first_row_next_block(mzd_t const* M, int n) {
|
||
|
assert(n > 0);
|
||
|
return M->blocks[n].begin + M->offset_vector - M->row_offset * M->rowstride;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* \brief Convert row to blocks index.
|
||
|
*
|
||
|
* \param M Matrix.
|
||
|
* \param row The row to convert.
|
||
|
*
|
||
|
* \return the block number that contains this row.
|
||
|
*/
|
||
|
|
||
|
static inline int mzd_row_to_block(mzd_t const* M, rci_t row) {
|
||
|
return (M->row_offset + row) >> M->blockrows_log;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* \brief Total number of rows in this block.
|
||
|
*
|
||
|
* Should be called with a constant n=0, or with
|
||
|
* n > 0 when n is a variable, for optimization
|
||
|
* reasons.
|
||
|
*
|
||
|
* \param M Matrix
|
||
|
* \param n The block number.
|
||
|
*
|
||
|
* \return the total number of rows in this block.
|
||
|
*/
|
||
|
|
||
|
static inline wi_t mzd_rows_in_block(mzd_t const* M, int n) {
|
||
|
if (__M4RI_UNLIKELY(M->flags & mzd_flag_multiple_blocks)) {
|
||
|
if (__M4RI_UNLIKELY(n == 0)) {
|
||
|
return (1 << M->blockrows_log) - M->row_offset;
|
||
|
} else {
|
||
|
int const last_block = mzd_row_to_block(M, M->nrows - 1);
|
||
|
if (n < last_block)
|
||
|
return (1 << M->blockrows_log);
|
||
|
return M->nrows + M->row_offset - (n << M->blockrows_log);
|
||
|
}
|
||
|
}
|
||
|
return n ? 0 : M->nrows;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* \brief Number of rows in this block including r
|
||
|
*
|
||
|
* \param M Matrix
|
||
|
* \param r row
|
||
|
*
|
||
|
* \return the number of rows with index >= r in this block
|
||
|
*/
|
||
|
|
||
|
static inline wi_t mzd_remaining_rows_in_block(mzd_t const* M, rci_t r) {
|
||
|
const int n = mzd_row_to_block(M, r);
|
||
|
r = (r - (n << M->blockrows_log));
|
||
|
if (__M4RI_UNLIKELY(M->flags & mzd_flag_multiple_blocks)) {
|
||
|
if (__M4RI_UNLIKELY(n == 0)) {
|
||
|
return (1 << M->blockrows_log) - M->row_offset - r;
|
||
|
} else {
|
||
|
int const last_block = mzd_row_to_block(M, M->nrows - 1);
|
||
|
if (n < last_block)
|
||
|
return (1 << M->blockrows_log) - r;
|
||
|
return M->nrows + M->row_offset - (n << M->blockrows_log) - r;
|
||
|
}
|
||
|
}
|
||
|
return n ? 0 : M->nrows - r;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* \brief Get pointer to first word of row.
|
||
|
*
|
||
|
* \param M Matrix
|
||
|
* \param row The row index.
|
||
|
*
|
||
|
* \return pointer to first word of the row.
|
||
|
*/
|
||
|
|
||
|
static inline word* mzd_row(mzd_t const* M, rci_t row) {
|
||
|
wi_t big_vector = M->offset_vector + row * M->rowstride;
|
||
|
word* result = M->blocks[0].begin + big_vector;
|
||
|
if (__M4RI_UNLIKELY(M->flags & mzd_flag_multiple_blocks)) {
|
||
|
int const n = (M->row_offset + row) >> M->blockrows_log;
|
||
|
result = M->blocks[n].begin + big_vector - n * (M->blocks[0].size / sizeof(word));
|
||
|
}
|
||
|
assert(result == M->rows[row]);
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* \brief Create a new matrix of dimension r x c.
|
||
|
*
|
||
|
* Use mzd_free to kill it.
|
||
|
*
|
||
|
* \param r Number of rows
|
||
|
* \param c Number of columns
|
||
|
*
|
||
|
*/
|
||
|
|
||
|
mzd_t *mzd_init(rci_t const r, rci_t const c);
|
||
|
|
||
|
/**
|
||
|
* \brief Free a matrix created with mzd_init.
|
||
|
*
|
||
|
* \param A Matrix
|
||
|
*/
|
||
|
|
||
|
void mzd_free(mzd_t *A);
|
||
|
|
||
|
|
||
|
/**
|
||
|
* \brief Create a window/view into the matrix M.
|
||
|
*
|
||
|
* A matrix window for M is a meta structure on the matrix M. It is
|
||
|
* setup to point into the matrix so M \em must \em not be freed while the
|
||
|
* matrix window is used.
|
||
|
*
|
||
|
* This function puts the restriction on the provided parameters that
|
||
|
* all parameters must be within range for M which is not enforced
|
||
|
* currently .
|
||
|
*
|
||
|
* Use mzd_free_window to free the window.
|
||
|
*
|
||
|
* \param M Matrix
|
||
|
* \param lowr Starting row (inclusive)
|
||
|
* \param lowc Starting column (inclusive, must be multiple of m4ri_radix)
|
||
|
* \param highr End row (exclusive)
|
||
|
* \param highc End column (exclusive)
|
||
|
*
|
||
|
*/
|
||
|
|
||
|
mzd_t *mzd_init_window(mzd_t *M, rci_t const lowr, rci_t const lowc, rci_t const highr, rci_t const highc);
|
||
|
|
||
|
/**
|
||
|
* \brief Create a const window/view into a const matrix M.
|
||
|
*
|
||
|
* See mzd_init_window, but for constant M.
|
||
|
*/
|
||
|
|
||
|
static inline mzd_t const *mzd_init_window_const(mzd_t const *M, rci_t const lowr, rci_t const lowc, rci_t const highr, rci_t const highc)
|
||
|
{
|
||
|
return mzd_init_window((mzd_t*)M, lowr, lowc, highr, highc);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* \brief Free a matrix window created with mzd_init_window.
|
||
|
*
|
||
|
* \param A Matrix
|
||
|
*/
|
||
|
|
||
|
#define mzd_free_window mzd_free
|
||
|
|
||
|
/**
|
||
|
* \brief Swap the two rows rowa and rowb starting at startblock.
|
||
|
*
|
||
|
* \param M Matrix with a zero offset.
|
||
|
* \param rowa Row index.
|
||
|
* \param rowb Row index.
|
||
|
* \param startblock Start swapping only in this block.
|
||
|
*/
|
||
|
|
||
|
static inline void _mzd_row_swap(mzd_t *M, rci_t const rowa, rci_t const rowb, wi_t const startblock) {
|
||
|
if ((rowa == rowb) || (startblock >= M->width))
|
||
|
return;
|
||
|
|
||
|
wi_t width = M->width - startblock - 1;
|
||
|
word *a = M->rows[rowa] + startblock;
|
||
|
word *b = M->rows[rowb] + startblock;
|
||
|
word tmp;
|
||
|
word const mask_end = M->high_bitmask;
|
||
|
|
||
|
for(wi_t i = 0; i < width; ++i) {
|
||
|
tmp = a[i];
|
||
|
a[i] = b[i];
|
||
|
b[i] = tmp;
|
||
|
}
|
||
|
tmp = (a[width] ^ b[width]) & mask_end;
|
||
|
a[width] ^= tmp;
|
||
|
b[width] ^= tmp;
|
||
|
|
||
|
__M4RI_DD_ROW(M, rowa);
|
||
|
__M4RI_DD_ROW(M, rowb);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* \brief Swap the two rows rowa and rowb.
|
||
|
*
|
||
|
* \param M Matrix
|
||
|
* \param rowa Row index.
|
||
|
* \param rowb Row index.
|
||
|
*/
|
||
|
|
||
|
static inline void mzd_row_swap(mzd_t *M, rci_t const rowa, rci_t const rowb) {
|
||
|
_mzd_row_swap(M, rowa, rowb, 0);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* \brief copy row j from A to row i from B.
|
||
|
*
|
||
|
* The offsets of A and B must match and the number of columns of A
|
||
|
* must be less than or equal to the number of columns of B.
|
||
|
*
|
||
|
* \param B Target matrix.
|
||
|
* \param i Target row index.
|
||
|
* \param A Source matrix.
|
||
|
* \param j Source row index.
|
||
|
*/
|
||
|
|
||
|
void mzd_copy_row(mzd_t *B, rci_t i, mzd_t const *A, rci_t j);
|
||
|
|
||
|
/**
|
||
|
* \brief Swap the two columns cola and colb.
|
||
|
*
|
||
|
* \param M Matrix.
|
||
|
* \param cola Column index.
|
||
|
* \param colb Column index.
|
||
|
*/
|
||
|
|
||
|
void mzd_col_swap(mzd_t *M, rci_t const cola, rci_t const colb);
|
||
|
|
||
|
/**
|
||
|
* \brief Swap the two columns cola and colb but only between start_row and stop_row.
|
||
|
*
|
||
|
* \param M Matrix.
|
||
|
* \param cola Column index.
|
||
|
* \param colb Column index.
|
||
|
* \param start_row Row index.
|
||
|
* \param stop_row Row index (exclusive).
|
||
|
*/
|
||
|
|
||
|
static inline void mzd_col_swap_in_rows(mzd_t *M, rci_t const cola, rci_t const colb, rci_t const start_row, rci_t const stop_row) {
|
||
|
if (cola == colb)
|
||
|
return;
|
||
|
|
||
|
rci_t const _cola = cola;
|
||
|
rci_t const _colb = colb;
|
||
|
|
||
|
wi_t const a_word = _cola / m4ri_radix;
|
||
|
wi_t const b_word = _colb / m4ri_radix;
|
||
|
|
||
|
int const a_bit = _cola % m4ri_radix;
|
||
|
int const b_bit = _colb % m4ri_radix;
|
||
|
|
||
|
word* RESTRICT ptr = mzd_row(M, start_row);
|
||
|
int max_bit = MAX(a_bit, b_bit);
|
||
|
int count_remaining = stop_row - start_row;
|
||
|
int min_bit = a_bit + b_bit - max_bit;
|
||
|
int block = mzd_row_to_block(M, start_row);
|
||
|
int offset = max_bit - min_bit;
|
||
|
word mask = m4ri_one << min_bit;
|
||
|
int count = MIN(mzd_remaining_rows_in_block(M, start_row), count_remaining);
|
||
|
|
||
|
// Apparently we're calling with start_row == stop_row sometimes (seems a bug to me).
|
||
|
if (count <= 0)
|
||
|
return;
|
||
|
|
||
|
if (a_word == b_word) {
|
||
|
while(1) {
|
||
|
count_remaining -= count;
|
||
|
ptr += a_word;
|
||
|
int fast_count = count / 4;
|
||
|
int rest_count = count - 4 * fast_count;
|
||
|
word xor_v[4];
|
||
|
wi_t const rowstride = M->rowstride;
|
||
|
while (fast_count--) {
|
||
|
xor_v[0] = ptr[0];
|
||
|
xor_v[1] = ptr[rowstride];
|
||
|
xor_v[2] = ptr[2 * rowstride];
|
||
|
xor_v[3] = ptr[3 * rowstride];
|
||
|
xor_v[0] ^= xor_v[0] >> offset;
|
||
|
xor_v[1] ^= xor_v[1] >> offset;
|
||
|
xor_v[2] ^= xor_v[2] >> offset;
|
||
|
xor_v[3] ^= xor_v[3] >> offset;
|
||
|
xor_v[0] &= mask;
|
||
|
xor_v[1] &= mask;
|
||
|
xor_v[2] &= mask;
|
||
|
xor_v[3] &= mask;
|
||
|
xor_v[0] |= xor_v[0] << offset;
|
||
|
xor_v[1] |= xor_v[1] << offset;
|
||
|
xor_v[2] |= xor_v[2] << offset;
|
||
|
xor_v[3] |= xor_v[3] << offset;
|
||
|
ptr[0] ^= xor_v[0];
|
||
|
ptr[rowstride] ^= xor_v[1];
|
||
|
ptr[2 * rowstride] ^= xor_v[2];
|
||
|
ptr[3 * rowstride] ^= xor_v[3];
|
||
|
ptr += 4 * rowstride;
|
||
|
}
|
||
|
while (rest_count--) {
|
||
|
word xor_v = *ptr;
|
||
|
xor_v ^= xor_v >> offset;
|
||
|
xor_v &= mask;
|
||
|
*ptr ^= xor_v | (xor_v << offset);
|
||
|
ptr += rowstride;
|
||
|
}
|
||
|
block++;
|
||
|
if ((count = MIN(mzd_rows_in_block(M, block), count_remaining)) <= 0)
|
||
|
break;
|
||
|
ptr = mzd_first_row_next_block(M, block);
|
||
|
}
|
||
|
} else {
|
||
|
word* RESTRICT min_ptr;
|
||
|
wi_t max_offset;
|
||
|
if (min_bit == a_bit) {
|
||
|
min_ptr = ptr + a_word;
|
||
|
max_offset = b_word - a_word;
|
||
|
} else {
|
||
|
min_ptr = ptr + b_word;
|
||
|
max_offset = a_word - b_word;
|
||
|
}
|
||
|
while(1) {
|
||
|
count_remaining -= count;
|
||
|
wi_t const rowstride = M->rowstride;
|
||
|
while(count--) {
|
||
|
word xor_v = (min_ptr[0] ^ (min_ptr[max_offset] >> offset)) & mask;
|
||
|
min_ptr[0] ^= xor_v;
|
||
|
min_ptr[max_offset] ^= xor_v << offset;
|
||
|
min_ptr += rowstride;
|
||
|
}
|
||
|
block++;
|
||
|
if ((count = MIN(mzd_rows_in_block(M,+block), count_remaining)) <= 0)
|
||
|
break;
|
||
|
ptr = mzd_first_row_next_block(M, block);
|
||
|
if (min_bit == a_bit)
|
||
|
min_ptr = ptr + a_word;
|
||
|
else
|
||
|
min_ptr = ptr + b_word;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
__M4RI_DD_MZD(M);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* \brief Read the bit at position M[row,col].
|
||
|
*
|
||
|
* \param M Matrix
|
||
|
* \param row Row index
|
||
|
* \param col Column index
|
||
|
*
|
||
|
* \note No bounds checks whatsoever are performed.
|
||
|
*
|
||
|
*/
|
||
|
|
||
|
static inline BIT mzd_read_bit(mzd_t const *M, rci_t const row, rci_t const col ) {
|
||
|
return __M4RI_GET_BIT(M->rows[row][col/m4ri_radix], col%m4ri_radix);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* \brief Write the bit value to position M[row,col]
|
||
|
*
|
||
|
* \param M Matrix
|
||
|
* \param row Row index
|
||
|
* \param col Column index
|
||
|
* \param value Either 0 or 1
|
||
|
*
|
||
|
* \note No bounds checks whatsoever are performed.
|
||
|
*
|
||
|
*/
|
||
|
|
||
|
static inline void mzd_write_bit(mzd_t *M, rci_t const row, rci_t const col, BIT const value) {
|
||
|
__M4RI_WRITE_BIT(M->rows[row][col/m4ri_radix], col%m4ri_radix, value);
|
||
|
}
|
||
|
|
||
|
|
||
|
/**
|
||
|
* \brief XOR n bits from values to M starting a position (x,y).
|
||
|
*
|
||
|
* \param M Source matrix.
|
||
|
* \param x Starting row.
|
||
|
* \param y Starting column.
|
||
|
* \param n Number of bits (<= m4ri_radix);
|
||
|
* \param values Word with values;
|
||
|
*/
|
||
|
|
||
|
static inline void mzd_xor_bits(mzd_t const *M, rci_t const x, rci_t const y, int const n, word values) {
|
||
|
int const spot = y % m4ri_radix;
|
||
|
wi_t const block = y / m4ri_radix;
|
||
|
M->rows[x][block] ^= values << spot;
|
||
|
int const space = m4ri_radix - spot;
|
||
|
if (n > space)
|
||
|
M->rows[x][block + 1] ^= values >> space;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* \brief AND n bits from values to M starting a position (x,y).
|
||
|
*
|
||
|
* \param M Source matrix.
|
||
|
* \param x Starting row.
|
||
|
* \param y Starting column.
|
||
|
* \param n Number of bits (<= m4ri_radix);
|
||
|
* \param values Word with values;
|
||
|
*/
|
||
|
|
||
|
static inline void mzd_and_bits(mzd_t const *M, rci_t const x, rci_t const y, int const n, word values) {
|
||
|
/* This is the best way, since this will drop out once we inverse the bits in values: */
|
||
|
values >>= (m4ri_radix - n); /* Move the bits to the lowest columns */
|
||
|
|
||
|
int const spot = y % m4ri_radix;
|
||
|
wi_t const block = y / m4ri_radix;
|
||
|
M->rows[x][block] &= values << spot;
|
||
|
int const space = m4ri_radix - spot;
|
||
|
if (n > space)
|
||
|
M->rows[x][block + 1] &= values >> space;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* \brief Clear n bits in M starting a position (x,y).
|
||
|
*
|
||
|
* \param M Source matrix.
|
||
|
* \param x Starting row.
|
||
|
* \param y Starting column.
|
||
|
* \param n Number of bits (0 < n <= m4ri_radix);
|
||
|
*/
|
||
|
|
||
|
static inline void mzd_clear_bits(mzd_t const *M, rci_t const x, rci_t const y, int const n) {
|
||
|
assert(n>0 && n <= m4ri_radix);
|
||
|
word values = m4ri_ffff >> (m4ri_radix - n);
|
||
|
int const spot = y % m4ri_radix;
|
||
|
wi_t const block = y / m4ri_radix;
|
||
|
M->rows[x][block] &= ~(values << spot);
|
||
|
int const space = m4ri_radix - spot;
|
||
|
if (n > space)
|
||
|
M->rows[x][block + 1] &= ~(values >> space);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* \brief Add the rows sourcerow and destrow and stores the total in the row
|
||
|
* destrow, but only begins at the column coloffset.
|
||
|
*
|
||
|
* \param M Matrix
|
||
|
* \param dstrow Index of target row
|
||
|
* \param srcrow Index of source row
|
||
|
* \param coloffset Start column (0 <= coloffset < M->ncols)
|
||
|
*
|
||
|
* \warning This function expects that there is at least one word worth of work.
|
||
|
*/
|
||
|
|
||
|
static inline void mzd_row_add_offset(mzd_t *M, rci_t dstrow, rci_t srcrow, rci_t coloffset) {
|
||
|
assert(dstrow < M->nrows && srcrow < M->nrows && coloffset < M->ncols);
|
||
|
wi_t const startblock= coloffset/m4ri_radix;
|
||
|
wi_t wide = M->width - startblock;
|
||
|
word *src = M->rows[srcrow] + startblock;
|
||
|
word *dst = M->rows[dstrow] + startblock;
|
||
|
word const mask_begin = __M4RI_RIGHT_BITMASK(m4ri_radix - coloffset % m4ri_radix);
|
||
|
word const mask_end = M->high_bitmask;
|
||
|
|
||
|
*dst++ ^= *src++ & mask_begin;
|
||
|
--wide;
|
||
|
|
||
|
#if __M4RI_HAVE_SSE2
|
||
|
int not_aligned = __M4RI_ALIGNMENT(src,16) != 0; /* 0: Aligned, 1: Not aligned */
|
||
|
if (wide > not_aligned + 1) /* Speed up for small matrices */
|
||
|
{
|
||
|
if (not_aligned) {
|
||
|
*dst++ ^= *src++;
|
||
|
--wide;
|
||
|
}
|
||
|
/* Now wide > 1 */
|
||
|
__m128i* __src = (__m128i*)src;
|
||
|
__m128i* __dst = (__m128i*)dst;
|
||
|
__m128i* const eof = (__m128i*)((unsigned long)(src + wide) & ~0xFUL);
|
||
|
do
|
||
|
{
|
||
|
__m128i xmm1 = _mm_xor_si128(*__dst, *__src);
|
||
|
*__dst++ = xmm1;
|
||
|
}
|
||
|
while(++__src < eof);
|
||
|
src = (word*)__src;
|
||
|
dst = (word*)__dst;
|
||
|
wide = ((sizeof(word)*wide)%16)/sizeof(word);
|
||
|
}
|
||
|
#endif
|
||
|
wi_t i = -1;
|
||
|
while(++i < wide)
|
||
|
dst[i] ^= src[i];
|
||
|
/*
|
||
|
* Revert possibly non-zero excess bits.
|
||
|
* Note that i == wide here, and wide can be 0.
|
||
|
* But really, src[wide - 1] is M->rows[srcrow][M->width - 1] ;)
|
||
|
* We use i - 1 here to let the compiler know these are the same addresses
|
||
|
* that we last accessed, in the previous loop.
|
||
|
*/
|
||
|
dst[i - 1] ^= src[i - 1] & ~mask_end;
|
||
|
|
||
|
__M4RI_DD_ROW(M, dstrow);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* \brief Add the rows sourcerow and destrow and stores the total in
|
||
|
* the row destrow.
|
||
|
*
|
||
|
* \param M Matrix
|
||
|
* \param sourcerow Index of source row
|
||
|
* \param destrow Index of target row
|
||
|
*
|
||
|
* \note this can be done much faster with mzd_combine.
|
||
|
*/
|
||
|
|
||
|
void mzd_row_add(mzd_t *M, rci_t const sourcerow, rci_t const destrow);
|
||
|
|
||
|
/**
|
||
|
* \brief Transpose a matrix.
|
||
|
*
|
||
|
* This function uses the fact that:
|
||
|
\verbatim
|
||
|
[ A B ]T [AT CT]
|
||
|
[ C D ] = [BT DT]
|
||
|
\endverbatim
|
||
|
* and thus rearranges the blocks recursively.
|
||
|
*
|
||
|
* \param DST Preallocated return matrix, may be NULL for automatic creation.
|
||
|
* \param A Matrix
|
||
|
*/
|
||
|
|
||
|
mzd_t *mzd_transpose(mzd_t *DST, mzd_t const *A);
|
||
|
|
||
|
/**
|
||
|
* \brief Naive cubic matrix multiplication.
|
||
|
*
|
||
|
* That is, compute C such that C == AB.
|
||
|
*
|
||
|
* \param C Preallocated product matrix, may be NULL for automatic creation.
|
||
|
* \param A Input matrix A.
|
||
|
* \param B Input matrix B.
|
||
|
*
|
||
|
* \note Normally, if you will multiply several times by b, it is
|
||
|
* smarter to calculate bT yourself, and keep it, and then use the
|
||
|
* function called _mzd_mul_naive
|
||
|
*
|
||
|
*/
|
||
|
mzd_t *mzd_mul_naive(mzd_t *C, mzd_t const *A, mzd_t const *B);
|
||
|
|
||
|
/**
|
||
|
* \brief Naive cubic matrix multiplication and addition
|
||
|
*
|
||
|
* That is, compute C such that C == C + AB.
|
||
|
*
|
||
|
* \param C Preallocated product matrix.
|
||
|
* \param A Input matrix A.
|
||
|
* \param B Input matrix B.
|
||
|
*
|
||
|
* \note Normally, if you will multiply several times by b, it is
|
||
|
* smarter to calculate bT yourself, and keep it, and then use the
|
||
|
* function called _mzd_mul_naive
|
||
|
*/
|
||
|
|
||
|
mzd_t *mzd_addmul_naive(mzd_t *C, mzd_t const *A, mzd_t const *B);
|
||
|
|
||
|
/**
|
||
|
* \brief Naive cubic matrix multiplication with the pre-transposed B.
|
||
|
*
|
||
|
* That is, compute C such that C == AB^t.
|
||
|
*
|
||
|
* \param C Preallocated product matrix.
|
||
|
* \param A Input matrix A.
|
||
|
* \param B Pre-transposed input matrix B.
|
||
|
* \param clear Whether to clear C before accumulating AB
|
||
|
*/
|
||
|
|
||
|
mzd_t *_mzd_mul_naive(mzd_t *C, mzd_t const *A, mzd_t const *B, int const clear);
|
||
|
|
||
|
/**
|
||
|
* \brief Matrix multiplication optimized for v*A where v is a vector.
|
||
|
*
|
||
|
* \param C Preallocated product matrix.
|
||
|
* \param v Input matrix v.
|
||
|
* \param A Input matrix A.
|
||
|
* \param clear If set clear C first, otherwise add result to C.
|
||
|
*
|
||
|
*/
|
||
|
mzd_t *_mzd_mul_va(mzd_t *C, mzd_t const *v, mzd_t const *A, int const clear);
|
||
|
|
||
|
/**
|
||
|
* \brief Fill matrix M with uniformly distributed bits.
|
||
|
*
|
||
|
* \param M Matrix
|
||
|
*
|
||
|
* \todo Allow the user to provide a RNG callback.
|
||
|
*/
|
||
|
|
||
|
void mzd_randomize(mzd_t *M);
|
||
|
|
||
|
/**
|
||
|
* \brief Set the matrix M to the value equivalent to the integer
|
||
|
* value provided.
|
||
|
*
|
||
|
* Specifically, this function does nothing if value%2 == 0 and
|
||
|
* returns the identity matrix if value%2 == 1.
|
||
|
*
|
||
|
* If the matrix is not square then the largest possible square
|
||
|
* submatrix is set to the identity matrix.
|
||
|
*
|
||
|
* \param M Matrix
|
||
|
* \param value Either 0 or 1
|
||
|
*/
|
||
|
|
||
|
void mzd_set_ui(mzd_t *M, unsigned int const value);
|
||
|
|
||
|
/**
|
||
|
* \brief Gaussian elimination.
|
||
|
*
|
||
|
* This will do Gaussian elimination on the matrix m but will start
|
||
|
* not at column 0 necc but at column startcol. If full=FALSE, then it
|
||
|
* will do triangular style elimination, and if full=TRUE, it will do
|
||
|
* Gauss-Jordan style, or full elimination.
|
||
|
*
|
||
|
* \param M Matrix
|
||
|
* \param startcol First column to consider for reduction.
|
||
|
* \param full Gauss-Jordan style or upper triangular form only.
|
||
|
*/
|
||
|
|
||
|
rci_t mzd_gauss_delayed(mzd_t *M, rci_t const startcol, int const full);
|
||
|
|
||
|
/**
|
||
|
* \brief Gaussian elimination.
|
||
|
*
|
||
|
* This will do Gaussian elimination on the matrix m. If full=FALSE,
|
||
|
* then it will do triangular style elimination, and if full=TRUE,
|
||
|
* it will do Gauss-Jordan style, or full elimination.
|
||
|
*
|
||
|
* \param M Matrix
|
||
|
* \param full Gauss-Jordan style or upper triangular form only.
|
||
|
*
|
||
|
* \sa mzd_echelonize_m4ri(), mzd_echelonize_pluq()
|
||
|
*/
|
||
|
|
||
|
rci_t mzd_echelonize_naive(mzd_t *M, int const full);
|
||
|
|
||
|
/**
|
||
|
* \brief Return TRUE if A == B.
|
||
|
*
|
||
|
* \param A Matrix
|
||
|
* \param B Matrix
|
||
|
*/
|
||
|
|
||
|
int mzd_equal(mzd_t const *A, mzd_t const *B);
|
||
|
|
||
|
/**
|
||
|
* \brief Return -1,0,1 if if A < B, A == B or A > B respectively.
|
||
|
*
|
||
|
* \param A Matrix.
|
||
|
* \param B Matrix.
|
||
|
*
|
||
|
* \note This comparison is not well defined mathematically and
|
||
|
* relatively arbitrary since elements of GF(2) don't have an
|
||
|
* ordering.
|
||
|
*/
|
||
|
|
||
|
int mzd_cmp(mzd_t const *A, mzd_t const *B);
|
||
|
|
||
|
/**
|
||
|
* \brief Copy matrix A to DST.
|
||
|
*
|
||
|
* \param DST May be NULL for automatic creation.
|
||
|
* \param A Source matrix.
|
||
|
*/
|
||
|
|
||
|
mzd_t *mzd_copy(mzd_t *DST, mzd_t const *A);
|
||
|
|
||
|
/**
|
||
|
* \brief Concatenate B to A and write the result to C.
|
||
|
*
|
||
|
* That is,
|
||
|
*
|
||
|
\verbatim
|
||
|
[ A ], [ B ] -> [ A B ] = C
|
||
|
\endverbatim
|
||
|
*
|
||
|
* The inputs are not modified but a new matrix is created.
|
||
|
*
|
||
|
* \param C Matrix, may be NULL for automatic creation
|
||
|
* \param A Matrix
|
||
|
* \param B Matrix
|
||
|
*
|
||
|
* \note This is sometimes called augment.
|
||
|
*/
|
||
|
|
||
|
mzd_t *mzd_concat(mzd_t *C, mzd_t const *A, mzd_t const *B);
|
||
|
|
||
|
/**
|
||
|
* \brief Stack A on top of B and write the result to C.
|
||
|
*
|
||
|
* That is,
|
||
|
*
|
||
|
\verbatim
|
||
|
[ A ], [ B ] -> [ A ] = C
|
||
|
[ B ]
|
||
|
\endverbatim
|
||
|
*
|
||
|
* The inputs are not modified but a new matrix is created.
|
||
|
*
|
||
|
* \param C Matrix, may be NULL for automatic creation
|
||
|
* \param A Matrix
|
||
|
* \param B Matrix
|
||
|
*/
|
||
|
|
||
|
mzd_t *mzd_stack(mzd_t *C, mzd_t const *A, mzd_t const *B);
|
||
|
|
||
|
/**
|
||
|
* \brief Copy a submatrix.
|
||
|
*
|
||
|
* Note that the upper bounds are not included.
|
||
|
*
|
||
|
* \param S Preallocated space for submatrix, may be NULL for automatic creation.
|
||
|
* \param M Matrix
|
||
|
* \param lowr start rows
|
||
|
* \param lowc start column
|
||
|
* \param highr stop row (this row is \em not included)
|
||
|
* \param highc stop column (this column is \em not included)
|
||
|
*/
|
||
|
mzd_t *mzd_submatrix(mzd_t *S, mzd_t const *M, rci_t const lowr, rci_t const lowc, rci_t const highr, rci_t const highc);
|
||
|
|
||
|
/**
|
||
|
* \brief Invert the matrix target using Gaussian elimination.
|
||
|
*
|
||
|
* To avoid recomputing the identity matrix over and over again, I may
|
||
|
* be passed in as identity parameter.
|
||
|
*
|
||
|
* \param INV Preallocated space for inversion matrix, may be NULL for automatic creation.
|
||
|
* \param A Matrix to be reduced.
|
||
|
* \param I Identity matrix.
|
||
|
*/
|
||
|
|
||
|
mzd_t *mzd_invert_naive(mzd_t *INV, mzd_t const *A, mzd_t const *I);
|
||
|
|
||
|
/**
|
||
|
* \brief Set C = A+B.
|
||
|
*
|
||
|
* C is also returned. If C is NULL then a new matrix is created which
|
||
|
* must be freed by mzd_free.
|
||
|
*
|
||
|
* \param C Preallocated sum matrix, may be NULL for automatic creation.
|
||
|
* \param A Matrix
|
||
|
* \param B Matrix
|
||
|
*/
|
||
|
|
||
|
mzd_t *mzd_add(mzd_t *C, mzd_t const *A, mzd_t const *B);
|
||
|
|
||
|
/**
|
||
|
* \brief Same as mzd_add but without any checks on the input.
|
||
|
*
|
||
|
* \param C Preallocated sum matrix, may be NULL for automatic creation.
|
||
|
* \param A Matrix
|
||
|
* \param B Matrix
|
||
|
*/
|
||
|
|
||
|
mzd_t *_mzd_add(mzd_t *C, mzd_t const *A, mzd_t const *B);
|
||
|
|
||
|
/**
|
||
|
* \brief Same as mzd_add.
|
||
|
*
|
||
|
* \param C Preallocated difference matrix, may be NULL for automatic creation.
|
||
|
* \param A Matrix
|
||
|
* \param B Matrix
|
||
|
*/
|
||
|
|
||
|
#define mzd_sub mzd_add
|
||
|
|
||
|
/**
|
||
|
* \brief Same as mzd_sub but without any checks on the input.
|
||
|
*
|
||
|
* \param C Preallocated difference matrix, may be NULL for automatic creation.
|
||
|
* \param A Matrix
|
||
|
* \param B Matrix
|
||
|
*/
|
||
|
|
||
|
#define _mzd_sub _mzd_add
|
||
|
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Get n bits starting a position (x,y) from the matrix M.
|
||
|
*
|
||
|
* \param M Source matrix.
|
||
|
* \param x Starting row.
|
||
|
* \param y Starting column.
|
||
|
* \param n Number of bits (<= m4ri_radix);
|
||
|
*/
|
||
|
|
||
|
static inline word mzd_read_bits(mzd_t const *M, rci_t const x, rci_t const y, int const n) {
|
||
|
int const spot = y % m4ri_radix;
|
||
|
wi_t const block = y / m4ri_radix;
|
||
|
int const spill = spot + n - m4ri_radix;
|
||
|
word temp = (spill <= 0) ? M->rows[x][block] << -spill : (M->rows[x][block + 1] << (m4ri_radix - spill)) | (M->rows[x][block] >> spill);
|
||
|
return temp >> (m4ri_radix - n);
|
||
|
}
|
||
|
|
||
|
|
||
|
/**
|
||
|
* \brief a_row[a_startblock:] += b_row[b_startblock:] for offset 0
|
||
|
*
|
||
|
* Adds a_row of A, starting with a_startblock to the end, to
|
||
|
* b_row of B, starting with b_startblock to the end. This gets stored
|
||
|
* in A, in a_row, starting with a_startblock.
|
||
|
*
|
||
|
* \param A destination matrix
|
||
|
* \param a_row destination row for matrix C
|
||
|
* \param a_startblock starting block to work on in matrix C
|
||
|
* \param B source matrix
|
||
|
* \param b_row source row for matrix B
|
||
|
* \param b_startblock starting block to work on in matrix B
|
||
|
*
|
||
|
*/
|
||
|
|
||
|
static inline void mzd_combine_even_in_place(mzd_t *A, rci_t const a_row, wi_t const a_startblock,
|
||
|
mzd_t const *B, rci_t const b_row, wi_t const b_startblock) {
|
||
|
|
||
|
wi_t wide = A->width - a_startblock - 1;
|
||
|
|
||
|
word *a = A->rows[a_row] + a_startblock;
|
||
|
word *b = B->rows[b_row] + b_startblock;
|
||
|
|
||
|
#if __M4RI_HAVE_SSE2
|
||
|
if(wide > 2) {
|
||
|
/** check alignments **/
|
||
|
if (__M4RI_ALIGNMENT(a,16)) {
|
||
|
*a++ ^= *b++;
|
||
|
wide--;
|
||
|
}
|
||
|
|
||
|
if (__M4RI_ALIGNMENT(a, 16) == 0 && __M4RI_ALIGNMENT(b, 16) == 0) {
|
||
|
__m128i *a128 = (__m128i*)a;
|
||
|
__m128i *b128 = (__m128i*)b;
|
||
|
const __m128i *eof = (__m128i*)((unsigned long)(a + wide) & ~0xFUL);
|
||
|
|
||
|
do {
|
||
|
*a128 = _mm_xor_si128(*a128, *b128);
|
||
|
++b128;
|
||
|
++a128;
|
||
|
} while(a128 < eof);
|
||
|
|
||
|
a = (word*)a128;
|
||
|
b = (word*)b128;
|
||
|
wide = ((sizeof(word) * wide) % 16) / sizeof(word);
|
||
|
}
|
||
|
}
|
||
|
#endif // __M4RI_HAVE_SSE2
|
||
|
|
||
|
if (wide > 0) {
|
||
|
wi_t n = (wide + 7) / 8;
|
||
|
switch (wide % 8) {
|
||
|
case 0: do { *(a++) ^= *(b++);
|
||
|
case 7: *(a++) ^= *(b++);
|
||
|
case 6: *(a++) ^= *(b++);
|
||
|
case 5: *(a++) ^= *(b++);
|
||
|
case 4: *(a++) ^= *(b++);
|
||
|
case 3: *(a++) ^= *(b++);
|
||
|
case 2: *(a++) ^= *(b++);
|
||
|
case 1: *(a++) ^= *(b++);
|
||
|
} while (--n > 0);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
*a ^= *b & A->high_bitmask;
|
||
|
|
||
|
__M4RI_DD_MZD(A);
|
||
|
}
|
||
|
|
||
|
|
||
|
/**
|
||
|
* \brief c_row[c_startblock:] = a_row[a_startblock:] + b_row[b_startblock:] for offset 0
|
||
|
*
|
||
|
* Adds a_row of A, starting with a_startblock to the end, to
|
||
|
* b_row of B, starting with b_startblock to the end. This gets stored
|
||
|
* in C, in c_row, starting with c_startblock.
|
||
|
*
|
||
|
* \param C destination matrix
|
||
|
* \param c_row destination row for matrix C
|
||
|
* \param c_startblock starting block to work on in matrix C
|
||
|
* \param A source matrix
|
||
|
* \param a_row source row for matrix A
|
||
|
* \param a_startblock starting block to work on in matrix A
|
||
|
* \param B source matrix
|
||
|
* \param b_row source row for matrix B
|
||
|
* \param b_startblock starting block to work on in matrix B
|
||
|
*
|
||
|
*/
|
||
|
|
||
|
static inline void mzd_combine_even(mzd_t *C, rci_t const c_row, wi_t const c_startblock,
|
||
|
mzd_t const *A, rci_t const a_row, wi_t const a_startblock,
|
||
|
mzd_t const *B, rci_t const b_row, wi_t const b_startblock) {
|
||
|
|
||
|
wi_t wide = A->width - a_startblock - 1;
|
||
|
word *a = A->rows[a_row] + a_startblock;
|
||
|
word *b = B->rows[b_row] + b_startblock;
|
||
|
word *c = C->rows[c_row] + c_startblock;
|
||
|
|
||
|
#if __M4RI_HAVE_SSE2
|
||
|
if(wide > 2) {
|
||
|
/** check alignments **/
|
||
|
if (__M4RI_ALIGNMENT(a,16)) {
|
||
|
*c++ = *b++ ^ *a++;
|
||
|
wide--;
|
||
|
}
|
||
|
|
||
|
if ( (__M4RI_ALIGNMENT(b, 16) | __M4RI_ALIGNMENT(c, 16)) == 0) {
|
||
|
__m128i *a128 = (__m128i*)a;
|
||
|
__m128i *b128 = (__m128i*)b;
|
||
|
__m128i *c128 = (__m128i*)c;
|
||
|
const __m128i *eof = (__m128i*)((unsigned long)(a + wide) & ~0xFUL);
|
||
|
|
||
|
do {
|
||
|
*c128 = _mm_xor_si128(*a128, *b128);
|
||
|
++c128;
|
||
|
++b128;
|
||
|
++a128;
|
||
|
} while(a128 < eof);
|
||
|
|
||
|
a = (word*)a128;
|
||
|
b = (word*)b128;
|
||
|
c = (word*)c128;
|
||
|
wide = ((sizeof(word) * wide) % 16) / sizeof(word);
|
||
|
}
|
||
|
}
|
||
|
#endif // __M4RI_HAVE_SSE2
|
||
|
|
||
|
if (wide > 0) {
|
||
|
wi_t n = (wide + 7) / 8;
|
||
|
switch (wide % 8) {
|
||
|
case 0: do { *(c++) = *(a++) ^ *(b++);
|
||
|
case 7: *(c++) = *(a++) ^ *(b++);
|
||
|
case 6: *(c++) = *(a++) ^ *(b++);
|
||
|
case 5: *(c++) = *(a++) ^ *(b++);
|
||
|
case 4: *(c++) = *(a++) ^ *(b++);
|
||
|
case 3: *(c++) = *(a++) ^ *(b++);
|
||
|
case 2: *(c++) = *(a++) ^ *(b++);
|
||
|
case 1: *(c++) = *(a++) ^ *(b++);
|
||
|
} while (--n > 0);
|
||
|
}
|
||
|
}
|
||
|
*c ^= ((*a ^ *b ^ *c) & C->high_bitmask);
|
||
|
|
||
|
__M4RI_DD_MZD(C);
|
||
|
}
|
||
|
|
||
|
|
||
|
/**
|
||
|
* \brief row3[col3:] = row1[col1:] + row2[col2:]
|
||
|
*
|
||
|
* Adds row1 of SC1, starting with startblock1 to the end, to
|
||
|
* row2 of SC2, starting with startblock2 to the end. This gets stored
|
||
|
* in DST, in row3, starting with startblock3.
|
||
|
*
|
||
|
* \param C destination matrix
|
||
|
* \param c_row destination row for matrix dst
|
||
|
* \param c_startblock starting block to work on in matrix dst
|
||
|
* \param A source matrix
|
||
|
* \param a_row source row for matrix sc1
|
||
|
* \param a_startblock starting block to work on in matrix sc1
|
||
|
* \param B source matrix
|
||
|
* \param b_row source row for matrix sc2
|
||
|
* \param b_startblock starting block to work on in matrix sc2
|
||
|
*
|
||
|
*/
|
||
|
static inline void mzd_combine(mzd_t *C, rci_t const c_row, wi_t const c_startblock,
|
||
|
mzd_t const *A, rci_t const a_row, wi_t const a_startblock,
|
||
|
mzd_t const *B, rci_t const b_row, wi_t const b_startblock) {
|
||
|
|
||
|
if( (C == A) & (a_row == c_row) & (a_startblock == c_startblock) )
|
||
|
mzd_combine_even_in_place(C, c_row, c_startblock, B, b_row, b_startblock);
|
||
|
else
|
||
|
mzd_combine_even(C, c_row, c_startblock, A, a_row, a_startblock, B, b_row, b_startblock);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* \brief Get n bits starting a position (x,y) from the matrix M.
|
||
|
*
|
||
|
* This function is in principle the same as mzd_read_bits,
|
||
|
* but it explicitely returns an 'int' and is used as
|
||
|
* index into an array (Gray code).
|
||
|
*/
|
||
|
|
||
|
static inline int mzd_read_bits_int(mzd_t const *M, rci_t const x, rci_t const y, int const n) {
|
||
|
return __M4RI_CONVERT_TO_INT(mzd_read_bits(M, x, y, n));
|
||
|
}
|
||
|
|
||
|
|
||
|
/**
|
||
|
* \brief Zero test for matrix.
|
||
|
*
|
||
|
* \param A Input matrix.
|
||
|
*
|
||
|
*/
|
||
|
int mzd_is_zero(mzd_t const *A);
|
||
|
|
||
|
/**
|
||
|
* \brief Clear the given row, but only begins at the column coloffset.
|
||
|
*
|
||
|
* \param M Matrix
|
||
|
* \param row Index of row
|
||
|
* \param coloffset Column offset
|
||
|
*/
|
||
|
|
||
|
void mzd_row_clear_offset(mzd_t *M, rci_t const row, rci_t const coloffset);
|
||
|
|
||
|
/**
|
||
|
* \brief Find the next nonzero entry in M starting at start_row and start_col.
|
||
|
*
|
||
|
* This function walks down rows in the inner loop and columns in the
|
||
|
* outer loop. If a nonzero entry is found this function returns 1 and
|
||
|
* zero otherwise.
|
||
|
*
|
||
|
* If and only if a nonzero entry is found r and c are updated.
|
||
|
*
|
||
|
* \param M Matrix
|
||
|
* \param start_row Index of row where to start search
|
||
|
* \param start_col Index of column where to start search
|
||
|
* \param r Row index updated if pivot is found
|
||
|
* \param c Column index updated if pivot is found
|
||
|
*/
|
||
|
|
||
|
int mzd_find_pivot(mzd_t const *M, rci_t start_row, rci_t start_col, rci_t *r, rci_t *c);
|
||
|
|
||
|
|
||
|
/**
|
||
|
* \brief Return the number of nonzero entries divided by nrows *
|
||
|
* ncols
|
||
|
*
|
||
|
* If res = 0 then 100 samples per row are made, if res > 0 the
|
||
|
* function takes res sized steps within each row (res = 1 uses every
|
||
|
* word).
|
||
|
*
|
||
|
* \param A Matrix
|
||
|
* \param res Resolution of sampling (in words)
|
||
|
*/
|
||
|
|
||
|
double mzd_density(mzd_t const *A, wi_t res);
|
||
|
|
||
|
/**
|
||
|
* \brief Return the number of nonzero entries divided by nrows *
|
||
|
* ncols considering only the submatrix starting at (r,c).
|
||
|
*
|
||
|
* If res = 0 then 100 samples per row are made, if res > 0 the
|
||
|
* function takes res sized steps within each row (res = 1 uses every
|
||
|
* word).
|
||
|
*
|
||
|
* \param A Matrix
|
||
|
* \param res Resolution of sampling (in words)
|
||
|
* \param r Row to start counting
|
||
|
* \param c Column to start counting
|
||
|
*/
|
||
|
|
||
|
double _mzd_density(mzd_t const *A, wi_t res, rci_t r, rci_t c);
|
||
|
|
||
|
|
||
|
/**
|
||
|
* \brief Return the first row with all zero entries.
|
||
|
*
|
||
|
* If no such row can be found returns nrows.
|
||
|
*
|
||
|
* \param A Matrix
|
||
|
*/
|
||
|
|
||
|
rci_t mzd_first_zero_row(mzd_t const *A);
|
||
|
|
||
|
/**
|
||
|
* \brief Return hash value for matrix.
|
||
|
*
|
||
|
* \param A Matrix
|
||
|
*/
|
||
|
|
||
|
static inline word mzd_hash(mzd_t const *A) {
|
||
|
word hash = 0;
|
||
|
for (rci_t r = 0; r < A->nrows; ++r)
|
||
|
hash ^= rotate_word(calculate_hash(A->rows[r], A->width), r % m4ri_radix);
|
||
|
return hash;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return upper triangular submatrix of A
|
||
|
*
|
||
|
* \param U Output matrix, if NULL a new matrix will be returned
|
||
|
* \param A Source matrix
|
||
|
*
|
||
|
* \return U
|
||
|
*/
|
||
|
|
||
|
mzd_t *mzd_extract_u(mzd_t *U, mzd_t const *A);
|
||
|
|
||
|
/**
|
||
|
* Return lower triangular submatrix of A
|
||
|
*
|
||
|
* \param L Output matrix, if NULL a new matrix will be returned
|
||
|
* \param A Source matrix
|
||
|
*
|
||
|
* \return L
|
||
|
*/
|
||
|
|
||
|
mzd_t *mzd_extract_l(mzd_t *L, mzd_t const *A);
|
||
|
|
||
|
#endif // M4RI_MZD
|