/** * \file mzd.h * \brief Dense matrices over GF(2) represented as a bit field. * * \author Gregory Bard * \author Martin Albrecht * \author Carlo Wood */ #ifndef M4RI_MZD #define M4RI_MZD /******************************************************************* * * M4RI: Linear Algebra over GF(2) * * Copyright (C) 2007, 2008 Gregory Bard * Copyright (C) 2008-2013 Martin Albrecht * Copyright (C) 2011 Carlo Wood * * Distributed under the terms of the GNU General Public License (GPL) * version 2 or higher. * * This code is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * The full text of the GPL is available at: * * http://www.gnu.org/licenses/ * ********************************************************************/ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include #include #include #include #if __M4RI_HAVE_SSE2 #include #endif #include #include /** * Maximum number of words allocated for one mzd_t block. * * \note This value must fit in an int, even though it's type is size_t. */ #define __M4RI_MAX_MZD_BLOCKSIZE (((size_t)1) << 27) /** * \brief Matrix multiplication block-ing dimension. * * Defines the number of rows of the matrix A that are * processed as one block during the execution of a multiplication * algorithm. */ #define __M4RI_MUL_BLOCKSIZE MIN(((int)sqrt((double)(4 * __M4RI_CPU_L3_CACHE))) / 2, 2048) /** * \brief Data containers containing the values packed into words */ typedef struct { size_t size; /*!< number of words */ word* begin; /*!< first word */ word* end; /*!< last word */ } mzd_block_t; /** * \brief Dense matrices over GF(2). * * The most fundamental data type in this library. */ typedef struct mzd_t { rci_t nrows; /*!< Number of rows. */ rci_t ncols; /*!< Number of columns. */ wi_t width; /*!< Number of words with valid bits: width = ceil(ncols / m4ri_radix) */ /** * Offset in words between rows. * * rowstride = (width < mzd_paddingwidth || (width & 1) == 0) ? width : width + 1; * where width is the width of the underlying non-windowed matrix. */ wi_t rowstride; /** * Offset in words from start of block to first word. * * rows[0] = blocks[0].begin + offset_vector; * This, together with rowstride, makes the rows array obsolete. */ wi_t offset_vector; wi_t row_offset; /*!< Number of rows to the first row counting from the start of the first block. */ /** * Booleans to speed up things. * * The bits have the following meaning: * * 1: Has non-zero excess. * 2: Is windowed, but has zero offset. * 3: Is windowed, but has zero excess. * 4: Is windowed, but owns the blocks allocations. * 5: Spans more than 1 block. */ uint8_t flags; /** * blockrows_log = log2(blockrows); * where blockrows is the number of rows in one block, which is a power of 2. */ uint8_t blockrows_log; word high_bitmask; /*!< Mask for valid bits in the word with the highest index (width - 1). */ mzd_block_t *blocks; /*!< Pointers to the actual blocks of memory containing the values packed into words. */ word **rows; /*!< Address of first word in each row, so the first word of row i is is m->rows[i] */ uint64_t dummy; /*!< ensures sizeof(mzd_t) == 64 */ } mzd_t; /** * \brief The minimum width where padding occurs. */ static wi_t const mzd_paddingwidth = 1; /** * \brief flag when ncols%64 == 0 */ static uint8_t const mzd_flag_nonzero_excess = 0x2; /** * \brief flag for windowed matrix */ static uint8_t const mzd_flag_windowed_zerooffset = 0x4; /** * \brief flag for windowed matrix where ncols%64 == 0 */ static uint8_t const mzd_flag_windowed_zeroexcess = 0x8; /** * \brief flag for windowed matrix wich owns its memory */ static uint8_t const mzd_flag_windowed_ownsblocks = 0x10; /** * \brief flag for multiply blocks */ static uint8_t const mzd_flag_multiple_blocks = 0x20; /** * \brief Test if a matrix is windowed. * * \param M Matrix * * \return a non-zero value if the matrix is windowed, otherwise return zero. */ static inline int mzd_is_windowed(mzd_t const *M) { return M->flags & (mzd_flag_windowed_zerooffset); } /** * \brief Test if this mzd_t should free blocks. * * \param M Matrix * * \return TRUE iff blocks is non-zero and should be freed upon a call to mzd_free. */ static inline int mzd_owns_blocks(mzd_t const *M) { return M->blocks && (!mzd_is_windowed(M) || ((M->flags & mzd_flag_windowed_ownsblocks))); } /** * \brief Get a pointer the first word. * * \param M Matrix * * \return a pointer to the first word of the first row. */ static inline word* mzd_first_row(mzd_t const *M) { word* result = M->blocks[0].begin + M->offset_vector; assert(M->nrows == 0 || result == M->rows[0]); return result; } /** * \brief Get a pointer to the first word in block n. * * Use mzd_first_row for block number 0. * * \param M Matrix * \param n The block number. Must be larger than 0. * * \return a pointer to the first word of the first row in block n. */ static inline word* mzd_first_row_next_block(mzd_t const* M, int n) { assert(n > 0); return M->blocks[n].begin + M->offset_vector - M->row_offset * M->rowstride; } /** * \brief Convert row to blocks index. * * \param M Matrix. * \param row The row to convert. * * \return the block number that contains this row. */ static inline int mzd_row_to_block(mzd_t const* M, rci_t row) { return (M->row_offset + row) >> M->blockrows_log; } /** * \brief Total number of rows in this block. * * Should be called with a constant n=0, or with * n > 0 when n is a variable, for optimization * reasons. * * \param M Matrix * \param n The block number. * * \return the total number of rows in this block. */ static inline wi_t mzd_rows_in_block(mzd_t const* M, int n) { if (__M4RI_UNLIKELY(M->flags & mzd_flag_multiple_blocks)) { if (__M4RI_UNLIKELY(n == 0)) { return (1 << M->blockrows_log) - M->row_offset; } else { int const last_block = mzd_row_to_block(M, M->nrows - 1); if (n < last_block) return (1 << M->blockrows_log); return M->nrows + M->row_offset - (n << M->blockrows_log); } } return n ? 0 : M->nrows; } /** * \brief Number of rows in this block including r * * \param M Matrix * \param r row * * \return the number of rows with index >= r in this block */ static inline wi_t mzd_remaining_rows_in_block(mzd_t const* M, rci_t r) { const int n = mzd_row_to_block(M, r); r = (r - (n << M->blockrows_log)); if (__M4RI_UNLIKELY(M->flags & mzd_flag_multiple_blocks)) { if (__M4RI_UNLIKELY(n == 0)) { return (1 << M->blockrows_log) - M->row_offset - r; } else { int const last_block = mzd_row_to_block(M, M->nrows - 1); if (n < last_block) return (1 << M->blockrows_log) - r; return M->nrows + M->row_offset - (n << M->blockrows_log) - r; } } return n ? 0 : M->nrows - r; } /** * \brief Get pointer to first word of row. * * \param M Matrix * \param row The row index. * * \return pointer to first word of the row. */ static inline word* mzd_row(mzd_t const* M, rci_t row) { wi_t big_vector = M->offset_vector + row * M->rowstride; word* result = M->blocks[0].begin + big_vector; if (__M4RI_UNLIKELY(M->flags & mzd_flag_multiple_blocks)) { int const n = (M->row_offset + row) >> M->blockrows_log; result = M->blocks[n].begin + big_vector - n * (M->blocks[0].size / sizeof(word)); } assert(result == M->rows[row]); return result; } /** * \brief Create a new matrix of dimension r x c. * * Use mzd_free to kill it. * * \param r Number of rows * \param c Number of columns * */ mzd_t *mzd_init(rci_t const r, rci_t const c); /** * \brief Free a matrix created with mzd_init. * * \param A Matrix */ void mzd_free(mzd_t *A); /** * \brief Create a window/view into the matrix M. * * A matrix window for M is a meta structure on the matrix M. It is * setup to point into the matrix so M \em must \em not be freed while the * matrix window is used. * * This function puts the restriction on the provided parameters that * all parameters must be within range for M which is not enforced * currently . * * Use mzd_free_window to free the window. * * \param M Matrix * \param lowr Starting row (inclusive) * \param lowc Starting column (inclusive, must be multiple of m4ri_radix) * \param highr End row (exclusive) * \param highc End column (exclusive) * */ mzd_t *mzd_init_window(mzd_t *M, rci_t const lowr, rci_t const lowc, rci_t const highr, rci_t const highc); /** * \brief Create a const window/view into a const matrix M. * * See mzd_init_window, but for constant M. */ static inline mzd_t const *mzd_init_window_const(mzd_t const *M, rci_t const lowr, rci_t const lowc, rci_t const highr, rci_t const highc) { return mzd_init_window((mzd_t*)M, lowr, lowc, highr, highc); } /** * \brief Free a matrix window created with mzd_init_window. * * \param A Matrix */ #define mzd_free_window mzd_free /** * \brief Swap the two rows rowa and rowb starting at startblock. * * \param M Matrix with a zero offset. * \param rowa Row index. * \param rowb Row index. * \param startblock Start swapping only in this block. */ static inline void _mzd_row_swap(mzd_t *M, rci_t const rowa, rci_t const rowb, wi_t const startblock) { if ((rowa == rowb) || (startblock >= M->width)) return; wi_t width = M->width - startblock - 1; word *a = M->rows[rowa] + startblock; word *b = M->rows[rowb] + startblock; word tmp; word const mask_end = M->high_bitmask; for(wi_t i = 0; i < width; ++i) { tmp = a[i]; a[i] = b[i]; b[i] = tmp; } tmp = (a[width] ^ b[width]) & mask_end; a[width] ^= tmp; b[width] ^= tmp; __M4RI_DD_ROW(M, rowa); __M4RI_DD_ROW(M, rowb); } /** * \brief Swap the two rows rowa and rowb. * * \param M Matrix * \param rowa Row index. * \param rowb Row index. */ static inline void mzd_row_swap(mzd_t *M, rci_t const rowa, rci_t const rowb) { _mzd_row_swap(M, rowa, rowb, 0); } /** * \brief copy row j from A to row i from B. * * The offsets of A and B must match and the number of columns of A * must be less than or equal to the number of columns of B. * * \param B Target matrix. * \param i Target row index. * \param A Source matrix. * \param j Source row index. */ void mzd_copy_row(mzd_t *B, rci_t i, mzd_t const *A, rci_t j); /** * \brief Swap the two columns cola and colb. * * \param M Matrix. * \param cola Column index. * \param colb Column index. */ void mzd_col_swap(mzd_t *M, rci_t const cola, rci_t const colb); /** * \brief Swap the two columns cola and colb but only between start_row and stop_row. * * \param M Matrix. * \param cola Column index. * \param colb Column index. * \param start_row Row index. * \param stop_row Row index (exclusive). */ static inline void mzd_col_swap_in_rows(mzd_t *M, rci_t const cola, rci_t const colb, rci_t const start_row, rci_t const stop_row) { if (cola == colb) return; rci_t const _cola = cola; rci_t const _colb = colb; wi_t const a_word = _cola / m4ri_radix; wi_t const b_word = _colb / m4ri_radix; int const a_bit = _cola % m4ri_radix; int const b_bit = _colb % m4ri_radix; word* RESTRICT ptr = mzd_row(M, start_row); int max_bit = MAX(a_bit, b_bit); int count_remaining = stop_row - start_row; int min_bit = a_bit + b_bit - max_bit; int block = mzd_row_to_block(M, start_row); int offset = max_bit - min_bit; word mask = m4ri_one << min_bit; int count = MIN(mzd_remaining_rows_in_block(M, start_row), count_remaining); // Apparently we're calling with start_row == stop_row sometimes (seems a bug to me). if (count <= 0) return; if (a_word == b_word) { while(1) { count_remaining -= count; ptr += a_word; int fast_count = count / 4; int rest_count = count - 4 * fast_count; word xor_v[4]; wi_t const rowstride = M->rowstride; while (fast_count--) { xor_v[0] = ptr[0]; xor_v[1] = ptr[rowstride]; xor_v[2] = ptr[2 * rowstride]; xor_v[3] = ptr[3 * rowstride]; xor_v[0] ^= xor_v[0] >> offset; xor_v[1] ^= xor_v[1] >> offset; xor_v[2] ^= xor_v[2] >> offset; xor_v[3] ^= xor_v[3] >> offset; xor_v[0] &= mask; xor_v[1] &= mask; xor_v[2] &= mask; xor_v[3] &= mask; xor_v[0] |= xor_v[0] << offset; xor_v[1] |= xor_v[1] << offset; xor_v[2] |= xor_v[2] << offset; xor_v[3] |= xor_v[3] << offset; ptr[0] ^= xor_v[0]; ptr[rowstride] ^= xor_v[1]; ptr[2 * rowstride] ^= xor_v[2]; ptr[3 * rowstride] ^= xor_v[3]; ptr += 4 * rowstride; } while (rest_count--) { word xor_v = *ptr; xor_v ^= xor_v >> offset; xor_v &= mask; *ptr ^= xor_v | (xor_v << offset); ptr += rowstride; } block++; if ((count = MIN(mzd_rows_in_block(M, block), count_remaining)) <= 0) break; ptr = mzd_first_row_next_block(M, block); } } else { word* RESTRICT min_ptr; wi_t max_offset; if (min_bit == a_bit) { min_ptr = ptr + a_word; max_offset = b_word - a_word; } else { min_ptr = ptr + b_word; max_offset = a_word - b_word; } while(1) { count_remaining -= count; wi_t const rowstride = M->rowstride; while(count--) { word xor_v = (min_ptr[0] ^ (min_ptr[max_offset] >> offset)) & mask; min_ptr[0] ^= xor_v; min_ptr[max_offset] ^= xor_v << offset; min_ptr += rowstride; } block++; if ((count = MIN(mzd_rows_in_block(M,+block), count_remaining)) <= 0) break; ptr = mzd_first_row_next_block(M, block); if (min_bit == a_bit) min_ptr = ptr + a_word; else min_ptr = ptr + b_word; } } __M4RI_DD_MZD(M); } /** * \brief Read the bit at position M[row,col]. * * \param M Matrix * \param row Row index * \param col Column index * * \note No bounds checks whatsoever are performed. * */ static inline BIT mzd_read_bit(mzd_t const *M, rci_t const row, rci_t const col ) { return __M4RI_GET_BIT(M->rows[row][col/m4ri_radix], col%m4ri_radix); } /** * \brief Write the bit value to position M[row,col] * * \param M Matrix * \param row Row index * \param col Column index * \param value Either 0 or 1 * * \note No bounds checks whatsoever are performed. * */ static inline void mzd_write_bit(mzd_t *M, rci_t const row, rci_t const col, BIT const value) { __M4RI_WRITE_BIT(M->rows[row][col/m4ri_radix], col%m4ri_radix, value); } /** * \brief XOR n bits from values to M starting a position (x,y). * * \param M Source matrix. * \param x Starting row. * \param y Starting column. * \param n Number of bits (<= m4ri_radix); * \param values Word with values; */ static inline void mzd_xor_bits(mzd_t const *M, rci_t const x, rci_t const y, int const n, word values) { int const spot = y % m4ri_radix; wi_t const block = y / m4ri_radix; M->rows[x][block] ^= values << spot; int const space = m4ri_radix - spot; if (n > space) M->rows[x][block + 1] ^= values >> space; } /** * \brief AND n bits from values to M starting a position (x,y). * * \param M Source matrix. * \param x Starting row. * \param y Starting column. * \param n Number of bits (<= m4ri_radix); * \param values Word with values; */ static inline void mzd_and_bits(mzd_t const *M, rci_t const x, rci_t const y, int const n, word values) { /* This is the best way, since this will drop out once we inverse the bits in values: */ values >>= (m4ri_radix - n); /* Move the bits to the lowest columns */ int const spot = y % m4ri_radix; wi_t const block = y / m4ri_radix; M->rows[x][block] &= values << spot; int const space = m4ri_radix - spot; if (n > space) M->rows[x][block + 1] &= values >> space; } /** * \brief Clear n bits in M starting a position (x,y). * * \param M Source matrix. * \param x Starting row. * \param y Starting column. * \param n Number of bits (0 < n <= m4ri_radix); */ static inline void mzd_clear_bits(mzd_t const *M, rci_t const x, rci_t const y, int const n) { assert(n>0 && n <= m4ri_radix); word values = m4ri_ffff >> (m4ri_radix - n); int const spot = y % m4ri_radix; wi_t const block = y / m4ri_radix; M->rows[x][block] &= ~(values << spot); int const space = m4ri_radix - spot; if (n > space) M->rows[x][block + 1] &= ~(values >> space); } /** * \brief Add the rows sourcerow and destrow and stores the total in the row * destrow, but only begins at the column coloffset. * * \param M Matrix * \param dstrow Index of target row * \param srcrow Index of source row * \param coloffset Start column (0 <= coloffset < M->ncols) * * \warning This function expects that there is at least one word worth of work. */ static inline void mzd_row_add_offset(mzd_t *M, rci_t dstrow, rci_t srcrow, rci_t coloffset) { assert(dstrow < M->nrows && srcrow < M->nrows && coloffset < M->ncols); wi_t const startblock= coloffset/m4ri_radix; wi_t wide = M->width - startblock; word *src = M->rows[srcrow] + startblock; word *dst = M->rows[dstrow] + startblock; word const mask_begin = __M4RI_RIGHT_BITMASK(m4ri_radix - coloffset % m4ri_radix); word const mask_end = M->high_bitmask; *dst++ ^= *src++ & mask_begin; --wide; #if __M4RI_HAVE_SSE2 int not_aligned = __M4RI_ALIGNMENT(src,16) != 0; /* 0: Aligned, 1: Not aligned */ if (wide > not_aligned + 1) /* Speed up for small matrices */ { if (not_aligned) { *dst++ ^= *src++; --wide; } /* Now wide > 1 */ __m128i* __src = (__m128i*)src; __m128i* __dst = (__m128i*)dst; __m128i* const eof = (__m128i*)((unsigned long)(src + wide) & ~0xFUL); do { __m128i xmm1 = _mm_xor_si128(*__dst, *__src); *__dst++ = xmm1; } while(++__src < eof); src = (word*)__src; dst = (word*)__dst; wide = ((sizeof(word)*wide)%16)/sizeof(word); } #endif wi_t i = -1; while(++i < wide) dst[i] ^= src[i]; /* * Revert possibly non-zero excess bits. * Note that i == wide here, and wide can be 0. * But really, src[wide - 1] is M->rows[srcrow][M->width - 1] ;) * We use i - 1 here to let the compiler know these are the same addresses * that we last accessed, in the previous loop. */ dst[i - 1] ^= src[i - 1] & ~mask_end; __M4RI_DD_ROW(M, dstrow); } /** * \brief Add the rows sourcerow and destrow and stores the total in * the row destrow. * * \param M Matrix * \param sourcerow Index of source row * \param destrow Index of target row * * \note this can be done much faster with mzd_combine. */ void mzd_row_add(mzd_t *M, rci_t const sourcerow, rci_t const destrow); /** * \brief Transpose a matrix. * * This function uses the fact that: \verbatim [ A B ]T [AT CT] [ C D ] = [BT DT] \endverbatim * and thus rearranges the blocks recursively. * * \param DST Preallocated return matrix, may be NULL for automatic creation. * \param A Matrix */ mzd_t *mzd_transpose(mzd_t *DST, mzd_t const *A); /** * \brief Naive cubic matrix multiplication. * * That is, compute C such that C == AB. * * \param C Preallocated product matrix, may be NULL for automatic creation. * \param A Input matrix A. * \param B Input matrix B. * * \note Normally, if you will multiply several times by b, it is * smarter to calculate bT yourself, and keep it, and then use the * function called _mzd_mul_naive * */ mzd_t *mzd_mul_naive(mzd_t *C, mzd_t const *A, mzd_t const *B); /** * \brief Naive cubic matrix multiplication and addition * * That is, compute C such that C == C + AB. * * \param C Preallocated product matrix. * \param A Input matrix A. * \param B Input matrix B. * * \note Normally, if you will multiply several times by b, it is * smarter to calculate bT yourself, and keep it, and then use the * function called _mzd_mul_naive */ mzd_t *mzd_addmul_naive(mzd_t *C, mzd_t const *A, mzd_t const *B); /** * \brief Naive cubic matrix multiplication with the pre-transposed B. * * That is, compute C such that C == AB^t. * * \param C Preallocated product matrix. * \param A Input matrix A. * \param B Pre-transposed input matrix B. * \param clear Whether to clear C before accumulating AB */ mzd_t *_mzd_mul_naive(mzd_t *C, mzd_t const *A, mzd_t const *B, int const clear); /** * \brief Matrix multiplication optimized for v*A where v is a vector. * * \param C Preallocated product matrix. * \param v Input matrix v. * \param A Input matrix A. * \param clear If set clear C first, otherwise add result to C. * */ mzd_t *_mzd_mul_va(mzd_t *C, mzd_t const *v, mzd_t const *A, int const clear); /** * \brief Fill matrix M with uniformly distributed bits. * * \param M Matrix * * \todo Allow the user to provide a RNG callback. */ void mzd_randomize(mzd_t *M); /** * \brief Set the matrix M to the value equivalent to the integer * value provided. * * Specifically, this function does nothing if value%2 == 0 and * returns the identity matrix if value%2 == 1. * * If the matrix is not square then the largest possible square * submatrix is set to the identity matrix. * * \param M Matrix * \param value Either 0 or 1 */ void mzd_set_ui(mzd_t *M, unsigned int const value); /** * \brief Gaussian elimination. * * This will do Gaussian elimination on the matrix m but will start * not at column 0 necc but at column startcol. If full=FALSE, then it * will do triangular style elimination, and if full=TRUE, it will do * Gauss-Jordan style, or full elimination. * * \param M Matrix * \param startcol First column to consider for reduction. * \param full Gauss-Jordan style or upper triangular form only. */ rci_t mzd_gauss_delayed(mzd_t *M, rci_t const startcol, int const full); /** * \brief Gaussian elimination. * * This will do Gaussian elimination on the matrix m. If full=FALSE, * then it will do triangular style elimination, and if full=TRUE, * it will do Gauss-Jordan style, or full elimination. * * \param M Matrix * \param full Gauss-Jordan style or upper triangular form only. * * \sa mzd_echelonize_m4ri(), mzd_echelonize_pluq() */ rci_t mzd_echelonize_naive(mzd_t *M, int const full); /** * \brief Return TRUE if A == B. * * \param A Matrix * \param B Matrix */ int mzd_equal(mzd_t const *A, mzd_t const *B); /** * \brief Return -1,0,1 if if A < B, A == B or A > B respectively. * * \param A Matrix. * \param B Matrix. * * \note This comparison is not well defined mathematically and * relatively arbitrary since elements of GF(2) don't have an * ordering. */ int mzd_cmp(mzd_t const *A, mzd_t const *B); /** * \brief Copy matrix A to DST. * * \param DST May be NULL for automatic creation. * \param A Source matrix. */ mzd_t *mzd_copy(mzd_t *DST, mzd_t const *A); /** * \brief Concatenate B to A and write the result to C. * * That is, * \verbatim [ A ], [ B ] -> [ A B ] = C \endverbatim * * The inputs are not modified but a new matrix is created. * * \param C Matrix, may be NULL for automatic creation * \param A Matrix * \param B Matrix * * \note This is sometimes called augment. */ mzd_t *mzd_concat(mzd_t *C, mzd_t const *A, mzd_t const *B); /** * \brief Stack A on top of B and write the result to C. * * That is, * \verbatim [ A ], [ B ] -> [ A ] = C [ B ] \endverbatim * * The inputs are not modified but a new matrix is created. * * \param C Matrix, may be NULL for automatic creation * \param A Matrix * \param B Matrix */ mzd_t *mzd_stack(mzd_t *C, mzd_t const *A, mzd_t const *B); /** * \brief Copy a submatrix. * * Note that the upper bounds are not included. * * \param S Preallocated space for submatrix, may be NULL for automatic creation. * \param M Matrix * \param lowr start rows * \param lowc start column * \param highr stop row (this row is \em not included) * \param highc stop column (this column is \em not included) */ mzd_t *mzd_submatrix(mzd_t *S, mzd_t const *M, rci_t const lowr, rci_t const lowc, rci_t const highr, rci_t const highc); /** * \brief Invert the matrix target using Gaussian elimination. * * To avoid recomputing the identity matrix over and over again, I may * be passed in as identity parameter. * * \param INV Preallocated space for inversion matrix, may be NULL for automatic creation. * \param A Matrix to be reduced. * \param I Identity matrix. */ mzd_t *mzd_invert_naive(mzd_t *INV, mzd_t const *A, mzd_t const *I); /** * \brief Set C = A+B. * * C is also returned. If C is NULL then a new matrix is created which * must be freed by mzd_free. * * \param C Preallocated sum matrix, may be NULL for automatic creation. * \param A Matrix * \param B Matrix */ mzd_t *mzd_add(mzd_t *C, mzd_t const *A, mzd_t const *B); /** * \brief Same as mzd_add but without any checks on the input. * * \param C Preallocated sum matrix, may be NULL for automatic creation. * \param A Matrix * \param B Matrix */ mzd_t *_mzd_add(mzd_t *C, mzd_t const *A, mzd_t const *B); /** * \brief Same as mzd_add. * * \param C Preallocated difference matrix, may be NULL for automatic creation. * \param A Matrix * \param B Matrix */ #define mzd_sub mzd_add /** * \brief Same as mzd_sub but without any checks on the input. * * \param C Preallocated difference matrix, may be NULL for automatic creation. * \param A Matrix * \param B Matrix */ #define _mzd_sub _mzd_add /** * Get n bits starting a position (x,y) from the matrix M. * * \param M Source matrix. * \param x Starting row. * \param y Starting column. * \param n Number of bits (<= m4ri_radix); */ static inline word mzd_read_bits(mzd_t const *M, rci_t const x, rci_t const y, int const n) { int const spot = y % m4ri_radix; wi_t const block = y / m4ri_radix; int const spill = spot + n - m4ri_radix; word temp = (spill <= 0) ? M->rows[x][block] << -spill : (M->rows[x][block + 1] << (m4ri_radix - spill)) | (M->rows[x][block] >> spill); return temp >> (m4ri_radix - n); } /** * \brief a_row[a_startblock:] += b_row[b_startblock:] for offset 0 * * Adds a_row of A, starting with a_startblock to the end, to * b_row of B, starting with b_startblock to the end. This gets stored * in A, in a_row, starting with a_startblock. * * \param A destination matrix * \param a_row destination row for matrix C * \param a_startblock starting block to work on in matrix C * \param B source matrix * \param b_row source row for matrix B * \param b_startblock starting block to work on in matrix B * */ static inline void mzd_combine_even_in_place(mzd_t *A, rci_t const a_row, wi_t const a_startblock, mzd_t const *B, rci_t const b_row, wi_t const b_startblock) { wi_t wide = A->width - a_startblock - 1; word *a = A->rows[a_row] + a_startblock; word *b = B->rows[b_row] + b_startblock; #if __M4RI_HAVE_SSE2 if(wide > 2) { /** check alignments **/ if (__M4RI_ALIGNMENT(a,16)) { *a++ ^= *b++; wide--; } if (__M4RI_ALIGNMENT(a, 16) == 0 && __M4RI_ALIGNMENT(b, 16) == 0) { __m128i *a128 = (__m128i*)a; __m128i *b128 = (__m128i*)b; const __m128i *eof = (__m128i*)((unsigned long)(a + wide) & ~0xFUL); do { *a128 = _mm_xor_si128(*a128, *b128); ++b128; ++a128; } while(a128 < eof); a = (word*)a128; b = (word*)b128; wide = ((sizeof(word) * wide) % 16) / sizeof(word); } } #endif // __M4RI_HAVE_SSE2 if (wide > 0) { wi_t n = (wide + 7) / 8; switch (wide % 8) { case 0: do { *(a++) ^= *(b++); case 7: *(a++) ^= *(b++); case 6: *(a++) ^= *(b++); case 5: *(a++) ^= *(b++); case 4: *(a++) ^= *(b++); case 3: *(a++) ^= *(b++); case 2: *(a++) ^= *(b++); case 1: *(a++) ^= *(b++); } while (--n > 0); } } *a ^= *b & A->high_bitmask; __M4RI_DD_MZD(A); } /** * \brief c_row[c_startblock:] = a_row[a_startblock:] + b_row[b_startblock:] for offset 0 * * Adds a_row of A, starting with a_startblock to the end, to * b_row of B, starting with b_startblock to the end. This gets stored * in C, in c_row, starting with c_startblock. * * \param C destination matrix * \param c_row destination row for matrix C * \param c_startblock starting block to work on in matrix C * \param A source matrix * \param a_row source row for matrix A * \param a_startblock starting block to work on in matrix A * \param B source matrix * \param b_row source row for matrix B * \param b_startblock starting block to work on in matrix B * */ static inline void mzd_combine_even(mzd_t *C, rci_t const c_row, wi_t const c_startblock, mzd_t const *A, rci_t const a_row, wi_t const a_startblock, mzd_t const *B, rci_t const b_row, wi_t const b_startblock) { wi_t wide = A->width - a_startblock - 1; word *a = A->rows[a_row] + a_startblock; word *b = B->rows[b_row] + b_startblock; word *c = C->rows[c_row] + c_startblock; #if __M4RI_HAVE_SSE2 if(wide > 2) { /** check alignments **/ if (__M4RI_ALIGNMENT(a,16)) { *c++ = *b++ ^ *a++; wide--; } if ( (__M4RI_ALIGNMENT(b, 16) | __M4RI_ALIGNMENT(c, 16)) == 0) { __m128i *a128 = (__m128i*)a; __m128i *b128 = (__m128i*)b; __m128i *c128 = (__m128i*)c; const __m128i *eof = (__m128i*)((unsigned long)(a + wide) & ~0xFUL); do { *c128 = _mm_xor_si128(*a128, *b128); ++c128; ++b128; ++a128; } while(a128 < eof); a = (word*)a128; b = (word*)b128; c = (word*)c128; wide = ((sizeof(word) * wide) % 16) / sizeof(word); } } #endif // __M4RI_HAVE_SSE2 if (wide > 0) { wi_t n = (wide + 7) / 8; switch (wide % 8) { case 0: do { *(c++) = *(a++) ^ *(b++); case 7: *(c++) = *(a++) ^ *(b++); case 6: *(c++) = *(a++) ^ *(b++); case 5: *(c++) = *(a++) ^ *(b++); case 4: *(c++) = *(a++) ^ *(b++); case 3: *(c++) = *(a++) ^ *(b++); case 2: *(c++) = *(a++) ^ *(b++); case 1: *(c++) = *(a++) ^ *(b++); } while (--n > 0); } } *c ^= ((*a ^ *b ^ *c) & C->high_bitmask); __M4RI_DD_MZD(C); } /** * \brief row3[col3:] = row1[col1:] + row2[col2:] * * Adds row1 of SC1, starting with startblock1 to the end, to * row2 of SC2, starting with startblock2 to the end. This gets stored * in DST, in row3, starting with startblock3. * * \param C destination matrix * \param c_row destination row for matrix dst * \param c_startblock starting block to work on in matrix dst * \param A source matrix * \param a_row source row for matrix sc1 * \param a_startblock starting block to work on in matrix sc1 * \param B source matrix * \param b_row source row for matrix sc2 * \param b_startblock starting block to work on in matrix sc2 * */ static inline void mzd_combine(mzd_t *C, rci_t const c_row, wi_t const c_startblock, mzd_t const *A, rci_t const a_row, wi_t const a_startblock, mzd_t const *B, rci_t const b_row, wi_t const b_startblock) { if( (C == A) & (a_row == c_row) & (a_startblock == c_startblock) ) mzd_combine_even_in_place(C, c_row, c_startblock, B, b_row, b_startblock); else mzd_combine_even(C, c_row, c_startblock, A, a_row, a_startblock, B, b_row, b_startblock); return; } /** * \brief Get n bits starting a position (x,y) from the matrix M. * * This function is in principle the same as mzd_read_bits, * but it explicitely returns an 'int' and is used as * index into an array (Gray code). */ static inline int mzd_read_bits_int(mzd_t const *M, rci_t const x, rci_t const y, int const n) { return __M4RI_CONVERT_TO_INT(mzd_read_bits(M, x, y, n)); } /** * \brief Zero test for matrix. * * \param A Input matrix. * */ int mzd_is_zero(mzd_t const *A); /** * \brief Clear the given row, but only begins at the column coloffset. * * \param M Matrix * \param row Index of row * \param coloffset Column offset */ void mzd_row_clear_offset(mzd_t *M, rci_t const row, rci_t const coloffset); /** * \brief Find the next nonzero entry in M starting at start_row and start_col. * * This function walks down rows in the inner loop and columns in the * outer loop. If a nonzero entry is found this function returns 1 and * zero otherwise. * * If and only if a nonzero entry is found r and c are updated. * * \param M Matrix * \param start_row Index of row where to start search * \param start_col Index of column where to start search * \param r Row index updated if pivot is found * \param c Column index updated if pivot is found */ int mzd_find_pivot(mzd_t const *M, rci_t start_row, rci_t start_col, rci_t *r, rci_t *c); /** * \brief Return the number of nonzero entries divided by nrows * * ncols * * If res = 0 then 100 samples per row are made, if res > 0 the * function takes res sized steps within each row (res = 1 uses every * word). * * \param A Matrix * \param res Resolution of sampling (in words) */ double mzd_density(mzd_t const *A, wi_t res); /** * \brief Return the number of nonzero entries divided by nrows * * ncols considering only the submatrix starting at (r,c). * * If res = 0 then 100 samples per row are made, if res > 0 the * function takes res sized steps within each row (res = 1 uses every * word). * * \param A Matrix * \param res Resolution of sampling (in words) * \param r Row to start counting * \param c Column to start counting */ double _mzd_density(mzd_t const *A, wi_t res, rci_t r, rci_t c); /** * \brief Return the first row with all zero entries. * * If no such row can be found returns nrows. * * \param A Matrix */ rci_t mzd_first_zero_row(mzd_t const *A); /** * \brief Return hash value for matrix. * * \param A Matrix */ static inline word mzd_hash(mzd_t const *A) { word hash = 0; for (rci_t r = 0; r < A->nrows; ++r) hash ^= rotate_word(calculate_hash(A->rows[r], A->width), r % m4ri_radix); return hash; } /** * Return upper triangular submatrix of A * * \param U Output matrix, if NULL a new matrix will be returned * \param A Source matrix * * \return U */ mzd_t *mzd_extract_u(mzd_t *U, mzd_t const *A); /** * Return lower triangular submatrix of A * * \param L Output matrix, if NULL a new matrix will be returned * \param A Source matrix * * \return L */ mzd_t *mzd_extract_l(mzd_t *L, mzd_t const *A); #endif // M4RI_MZD