/* * Functions for adding vectors. * * \author Martin Albrecht * */ #ifndef M4RI_XOR_H #define M4RI_XOR_H /******************************************************************* * * M4RI: Linear Algebra over GF(2) * * Copyright (C) 2008-2013 Martin Albrecht * * Distributed under the terms of the GNU General Public License (GPL) * version 2 or higher. * * This code is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * The full text of the GPL is available at: * * http://www.gnu.org/licenses/ * ********************************************************************/ #include #if __M4RI_HAVE_SSE2 #include #endif #include /** * Compute c[i] += t1[i] for 0 <= i < wide * */ static inline void _mzd_combine(word *c, word const *t1, wi_t wide_in) { wi_t wide = wide_in; #if __M4RI_HAVE_SSE2 /* assuming c, t1 are alligned the same way */ if (__M4RI_ALIGNMENT(c,16)==8 && wide) { *c++ ^= *t1++; wide--; } __m128i *__c = (__m128i*)c; __m128i *__t1 = (__m128i*)t1; const __m128i *eof = (__m128i*)((unsigned long)(c + wide) & ~0xFUL); __m128i xmm1; while(__c < eof-1) { xmm1 = _mm_xor_si128(*__c, *__t1++); *__c++ = xmm1; xmm1 = _mm_xor_si128(*__c, *__t1++); *__c++ = xmm1; } if(__c < eof) { xmm1 = _mm_xor_si128(*__c, *__t1++); *__c++ = xmm1; } c = (word*)__c; t1 = (word*)__t1; wide = ((sizeof(word) * wide) % 16) / sizeof(word); if(!wide) { __M4RI_DD_RAWROW(c, wide_in); return; } #endif // __M4RI_HAVE_SSE2 wi_t n = (wide + 7) / 8; switch (wide % 8) { case 0: do { *c++ ^= *t1++; case 7: *c++ ^= *t1++; case 6: *c++ ^= *t1++; case 5: *c++ ^= *t1++; case 4: *c++ ^= *t1++; case 3: *c++ ^= *t1++; case 2: *c++ ^= *t1++; case 1: *c++ ^= *t1++; } while (--n > 0); } __M4RI_DD_RAWROW(c, wide_in); } #define N 2 #include "xor_template.h" #undef N #define N 3 #include "xor_template.h" #undef N #define N 4 #include "xor_template.h" #undef N #define N 5 #include "xor_template.h" #undef N #define N 6 #include "xor_template.h" #undef N #define N 7 #include "xor_template.h" #undef N #define N 8 #include "xor_template.h" #undef N #endif // M4RI_XOR_H