Optimize x86-64 rawmemchr and add test
This commit is contained in:
parent
d9a4d2ab27
commit
855d156018
@ -1,5 +1,10 @@
|
|||||||
2011-10-19 Ulrich Drepper <drepper@gmail.com>
|
2011-10-19 Ulrich Drepper <drepper@gmail.com>
|
||||||
|
|
||||||
|
* sysdeps/x86_64/multiarch/rawmemchr.S: Small optimization to safe an
|
||||||
|
instruction.
|
||||||
|
* string/Makefile (strop-tests): Add rawmemchr.
|
||||||
|
* string/test-rawmemchr.c: New file.
|
||||||
|
|
||||||
* sysdeps/x86_64/multiarch/init-arch.h: Define bit_AVX and index_AVX.
|
* sysdeps/x86_64/multiarch/init-arch.h: Define bit_AVX and index_AVX.
|
||||||
* sysdeps/x86_64/multiarch/strcmp-sse42.S: New file. Split out from...
|
* sysdeps/x86_64/multiarch/strcmp-sse42.S: New file. Split out from...
|
||||||
* sysdeps/x86_64/multiarch/strcmp.S: ...here. Include strcmp-sse42.S
|
* sysdeps/x86_64/multiarch/strcmp.S: ...here. Include strcmp-sse42.S
|
||||||
|
@ -50,7 +50,7 @@ strop-tests := memchr memcmp memcpy memmove mempcpy memset memccpy \
|
|||||||
stpcpy stpncpy strcat strchr strcmp strcpy strcspn \
|
stpcpy stpncpy strcat strchr strcmp strcpy strcspn \
|
||||||
strlen strncmp strncpy strpbrk strrchr strspn memmem \
|
strlen strncmp strncpy strpbrk strrchr strspn memmem \
|
||||||
strstr strcasestr strnlen strcasecmp strncasecmp \
|
strstr strcasestr strnlen strcasecmp strncasecmp \
|
||||||
strncat
|
strncat rawmemchr
|
||||||
tests := tester inl-tester noinl-tester testcopy test-ffs \
|
tests := tester inl-tester noinl-tester testcopy test-ffs \
|
||||||
tst-strlen stratcliff tst-svc tst-inlcall \
|
tst-strlen stratcliff tst-svc tst-inlcall \
|
||||||
bug-strncat1 bug-strspn1 bug-strpbrk1 tst-bswap \
|
bug-strncat1 bug-strspn1 bug-strpbrk1 tst-bswap \
|
||||||
|
189
string/test-rawmemchr.c
Normal file
189
string/test-rawmemchr.c
Normal file
@ -0,0 +1,189 @@
|
|||||||
|
/* Test and measure memchr functions.
|
||||||
|
Copyright (C) 1999,2002,2003,2005,2009,2011 Free Software Foundation, Inc.
|
||||||
|
This file is part of the GNU C Library.
|
||||||
|
Written by Jakub Jelinek <jakub@redhat.com>, 1999.
|
||||||
|
|
||||||
|
The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU Lesser General Public
|
||||||
|
License as published by the Free Software Foundation; either
|
||||||
|
version 2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
The GNU C Library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public
|
||||||
|
License along with the GNU C Library; if not, write to the Free
|
||||||
|
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||||
|
02111-1307 USA. */
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
|
#define TEST_MAIN
|
||||||
|
#include "test-string.h"
|
||||||
|
|
||||||
|
typedef char *(*proto_t) (const char *, int);
|
||||||
|
char *simple_rawmemchr (const char *, int);
|
||||||
|
|
||||||
|
IMPL (simple_rawmemchr, 0)
|
||||||
|
IMPL (rawmemchr, 1)
|
||||||
|
|
||||||
|
char *
|
||||||
|
simple_rawmemchr (const char *s, int c)
|
||||||
|
{
|
||||||
|
while (1)
|
||||||
|
if (*s++ == (char) c)
|
||||||
|
return (char *) s - 1;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
do_one_test (impl_t *impl, const char *s, int c, char *exp_res)
|
||||||
|
{
|
||||||
|
char *res = CALL (impl, s, c);
|
||||||
|
if (res != exp_res)
|
||||||
|
{
|
||||||
|
error (0, 0, "Wrong result in function %s %p %p", impl->name,
|
||||||
|
res, exp_res);
|
||||||
|
ret = 1;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (HP_TIMING_AVAIL)
|
||||||
|
{
|
||||||
|
hp_timing_t start __attribute ((unused));
|
||||||
|
hp_timing_t stop __attribute ((unused));
|
||||||
|
hp_timing_t best_time = ~ (hp_timing_t) 0;
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
for (i = 0; i < 32; ++i)
|
||||||
|
{
|
||||||
|
HP_TIMING_NOW (start);
|
||||||
|
CALL (impl, s, c);
|
||||||
|
HP_TIMING_NOW (stop);
|
||||||
|
HP_TIMING_BEST (best_time, start, stop);
|
||||||
|
}
|
||||||
|
|
||||||
|
printf ("\t%zd", (size_t) best_time);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
do_test (size_t align, size_t pos, size_t len, int seek_char)
|
||||||
|
{
|
||||||
|
size_t i;
|
||||||
|
char *result;
|
||||||
|
|
||||||
|
align &= 7;
|
||||||
|
if (align + len >= page_size)
|
||||||
|
return;
|
||||||
|
|
||||||
|
for (i = 0; i < len; ++i)
|
||||||
|
{
|
||||||
|
buf1[align + i] = 1 + 23 * i % 127;
|
||||||
|
if (buf1[align + i] == seek_char)
|
||||||
|
buf1[align + i] = seek_char + 1;
|
||||||
|
}
|
||||||
|
buf1[align + len] = 0;
|
||||||
|
|
||||||
|
assert (pos < len);
|
||||||
|
|
||||||
|
buf1[align + pos] = seek_char;
|
||||||
|
buf1[align + len] = -seek_char;
|
||||||
|
result = (char *) (buf1 + align + pos);
|
||||||
|
|
||||||
|
if (HP_TIMING_AVAIL)
|
||||||
|
printf ("Length %4zd, alignment %2zd:", pos, align);
|
||||||
|
|
||||||
|
FOR_EACH_IMPL (impl, 0)
|
||||||
|
do_one_test (impl, (char *) (buf1 + align), seek_char, result);
|
||||||
|
|
||||||
|
if (HP_TIMING_AVAIL)
|
||||||
|
putchar ('\n');
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
do_random_tests (void)
|
||||||
|
{
|
||||||
|
size_t i, j, n, align, pos, len;
|
||||||
|
int seek_char;
|
||||||
|
char *result;
|
||||||
|
unsigned char *p = buf1 + page_size - 512;
|
||||||
|
|
||||||
|
for (n = 0; n < ITERATIONS; n++)
|
||||||
|
{
|
||||||
|
align = random () & 15;
|
||||||
|
pos = random () & 511;
|
||||||
|
if (pos + align >= 512)
|
||||||
|
pos = 511 - align - (random () & 7);
|
||||||
|
len = random () & 511;
|
||||||
|
if (len + align >= 512)
|
||||||
|
len = 512 - align - (random () & 7);
|
||||||
|
if (pos >= len)
|
||||||
|
continue;
|
||||||
|
seek_char = random () & 255;
|
||||||
|
j = len + align + 64;
|
||||||
|
if (j > 512)
|
||||||
|
j = 512;
|
||||||
|
|
||||||
|
for (i = 0; i < j; i++)
|
||||||
|
{
|
||||||
|
if (i == pos + align)
|
||||||
|
p[i] = seek_char;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
p[i] = random () & 255;
|
||||||
|
if (i < pos + align && p[i] == seek_char)
|
||||||
|
p[i] = seek_char + 13;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
assert (pos < len);
|
||||||
|
size_t r = random ();
|
||||||
|
if ((r & 31) == 0)
|
||||||
|
len = ~(uintptr_t) (p + align) - ((r >> 5) & 31);
|
||||||
|
result = (char *) (p + pos + align);
|
||||||
|
|
||||||
|
FOR_EACH_IMPL (impl, 1)
|
||||||
|
if (CALL (impl, (char *) (p + align), seek_char) != result)
|
||||||
|
{
|
||||||
|
error (0, 0, "Iteration %zd - wrong result in function %s (%zd, %d, %zd, %zd) %p != %p, p %p",
|
||||||
|
n, impl->name, align, seek_char, len, pos,
|
||||||
|
CALL (impl, (char *) (p + align), seek_char),
|
||||||
|
result, p);
|
||||||
|
ret = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
test_main (void)
|
||||||
|
{
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
test_init ();
|
||||||
|
|
||||||
|
printf ("%20s", "");
|
||||||
|
FOR_EACH_IMPL (impl, 0)
|
||||||
|
printf ("\t%s", impl->name);
|
||||||
|
putchar ('\n');
|
||||||
|
|
||||||
|
for (i = 1; i < 7; ++i)
|
||||||
|
{
|
||||||
|
do_test (0, 16 << i, 2048, 23);
|
||||||
|
do_test (i, 64, 256, 23);
|
||||||
|
do_test (0, 16 << i, 2048, 0);
|
||||||
|
do_test (i, 64, 256, 0);
|
||||||
|
}
|
||||||
|
for (i = 1; i < 32; ++i)
|
||||||
|
{
|
||||||
|
do_test (0, i, i + 1, 23);
|
||||||
|
do_test (0, i, i + 1, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
do_random_tests ();
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
#include "../test-skeleton.c"
|
@ -43,19 +43,18 @@ strong_alias (rawmemchr, __rawmemchr)
|
|||||||
|
|
||||||
|
|
||||||
.section .text.sse4.2,"ax",@progbits
|
.section .text.sse4.2,"ax",@progbits
|
||||||
.align 16
|
.align 16
|
||||||
.type __rawmemchr_sse42, @function
|
.type __rawmemchr_sse42, @function
|
||||||
__rawmemchr_sse42:
|
__rawmemchr_sse42:
|
||||||
cfi_startproc
|
cfi_startproc
|
||||||
CALL_MCOUNT
|
CALL_MCOUNT
|
||||||
movd %esi, %xmm1
|
movd %esi, %xmm1
|
||||||
movq %rdi, %rcx
|
movq %rdi, %rcx
|
||||||
punpcklbw %xmm1, %xmm1
|
pxor %xmm2, %xmm2
|
||||||
andq $~15, %rdi
|
andq $~15, %rdi
|
||||||
punpcklbw %xmm1, %xmm1
|
|
||||||
orl $0xffffffff, %esi
|
orl $0xffffffff, %esi
|
||||||
|
pshufb %xmm2, %xmm1
|
||||||
movdqa (%rdi), %xmm0
|
movdqa (%rdi), %xmm0
|
||||||
pshufd $0, %xmm1, %xmm1
|
|
||||||
subq %rdi, %rcx
|
subq %rdi, %rcx
|
||||||
pcmpeqb %xmm1, %xmm0
|
pcmpeqb %xmm1, %xmm0
|
||||||
shl %cl, %esi
|
shl %cl, %esi
|
||||||
|
Loading…
x
Reference in New Issue
Block a user