// This code is licensed under the AGPLv3.
#include <string.h>
+#include <emmintrin.h>
+HEADER #include <stdint.h>
HEADER #define streq(a,b) (!strcmp((a),(b)))
return n;
}
+// For big buffers.
+PUBLIC_FN size_t count_char_occurences_in_buf(char *b, size_t bl, char c) {
+ char *be = b+bl;
+ size_t res = 0;
+
+ #ifdef __SSE2__
+ #include <emmintrin.h>
+
+ // do it the simple way until we get to the next 16-byte-aligned address
+ while ((((uint64_t)b)&0xf) && b<be) if (*(b++)==c) res++;
+
+ // the aligned end is the last 8-byte-aligned byte IN this buffer
+ char *bea = (char *) (((uint64_t)be-1)&~0xf);
+ // prepare a 128-bit value that contains 16 times `c`
+ __m128i cx;
+ memset(&cx, c, 16);
+ // we have an 16-byte-aligned buffer ready – let's do it!
+ __m128i *bi = (__m128i *)b;
+ while (((char*)bi)<bea) {
+ // This intrinsic does a byte-wise compare, storing the results byte-wise,
+ // too. 0xff means equal, 0x00 means not equal.
+ __m128i r = _mm_cmpeq_epi8(cx, *bi);
+ int64_t *r_64 = (int64_t*)&r;
+ if ((r_64[0]|r_64[1])) {
+ char *r_8 = (char *)&r;
+ // we have at least one hit in those 16 chars. narrow it down to eight,
+ // then check those eight
+ if (r_64[0]) {
+ if (r_8[ 0]) res++; if (r_8[ 1]) res++;
+ if (r_8[ 2]) res++; if (r_8[ 3]) res++;
+ if (r_8[ 4]) res++; if (r_8[ 5]) res++;
+ if (r_8[ 6]) res++; if (r_8[ 7]) res++;
+ }
+ if (r_64[1]) {
+ if (r_8[ 8]) res++; if (r_8[ 9]) res++;
+ if (r_8[10]) res++; if (r_8[11]) res++;
+ if (r_8[12]) res++; if (r_8[13]) res++;
+ if (r_8[14]) res++; if (r_8[15]) res++;
+ }
+ }
+ bi++;
+ }
+
+ // do the last few bytes the slow way, too
+ b = (char *)bi;
+ #endif
+
+ // this is also the fallback in case the CPU can't do this
+ while (b<be) if (*(b++)==c) res++;
+
+ return res;
+}
+
+// For big buffers.
+PUBLIC_FN int count_and_replace_char_occurences_in_buf(char *b, size_t bl, char c, char new_c) {
+ char *be = b+bl;
+ int res = 0;
+
+ #ifdef __SSE2__
+ #include <emmintrin.h>
+
+ // do it the simple way until we get to the next 16-byte-aligned address
+ while ((((uint64_t)b)&0xf) && b<be) if (*(b++)==c) res++;
+
+ // the aligned end is the last 8-byte-aligned byte IN this buffer
+ char *bea = (char *) (((uint64_t)be-1)&~0xf);
+ // prepare a 128-bit value that contains 16 times `c`
+ __m128i cx;
+ memset(&cx, c, 16);
+ // we have an 16-byte-aligned buffer ready – let's do it!
+ __m128i *bi = (__m128i *)b;
+ while (((char*)bi)<bea) {
+ // This intrinsic does a byte-wise compare, storing the results byte-wise,
+ // too. 0xff means equal, 0x00 means not equal.
+ __m128i r = _mm_cmpeq_epi8(cx, *bi);
+ int64_t *r_64 = (int64_t*)&r;
+ if ((r_64[0]|r_64[1])) {
+ char *r_8 = (char *)&r;
+ // we have at least one hit in those 16 chars. narrow it down to eight,
+ // then check those eight
+ if (r_64[0]) {
+ if (r_8[ 0]) r_8[ 0]=new_c, res++; if (r_8[ 1]) r_8[ 1]=new_c, res++;
+ if (r_8[ 2]) r_8[ 2]=new_c, res++; if (r_8[ 3]) r_8[ 3]=new_c, res++;
+ if (r_8[ 0]) r_8[ 4]=new_c, res++; if (r_8[ 1]) r_8[ 5]=new_c, res++;
+ if (r_8[ 2]) r_8[ 6]=new_c, res++; if (r_8[ 3]) r_8[ 7]=new_c, res++;
+ }
+ if (r_64[1]) {
+ if (r_8[ 8]) r_8[ 8]=new_c, res++; if (r_8[ 9]) r_8[ 9]=new_c, res++;
+ if (r_8[10]) r_8[10]=new_c, res++; if (r_8[11]) r_8[11]=new_c, res++;
+ if (r_8[12]) r_8[12]=new_c, res++; if (r_8[13]) r_8[13]=new_c, res++;
+ if (r_8[14]) r_8[14]=new_c, res++; if (r_8[15]) r_8[15]=new_c, res++;
+ }
+ }
+ bi++;
+ }
+
+ // do the last few bytes the slow way, too
+ b = (char *)bi;
+ #endif
+
+ // this is also the fallback in case the CPU can't do this
+ while (b<be) if (*(b++)==c) res++;
+
+ return res;
+}
+
// memcpy plus terminating nullbyte
PUBLIC_FN void *memcpyn(void *d, const void *s, size_t n) {
memcpy(d, s, n);