Improve generic strcspn performance

Improve strcspn performance using a much faster algorithm.  It is kept simple
so it works well on most targets.  It is generally at least 10 times faster
than the existing implementation on bench-strcspn on a few AArch64
implementations, and for some tests 100 times as fast (repeatedly calling
strchr on a small string is extremely slow...).

In fact the string/bits/string2.h inlines make no longer sense, as GCC
already uses strlen if reject is an empty string, strchrnul is 5 times as
fast as __strcspn_c1, while __strcspn_c2 and __strcspn_c3 are slower than
the strcspn main loop for large strings (though reject length 2-4 could be
special cased in the future to gain even more performance).

Tested on x86_64, i686, and aarch64.

	* string/Version (libc): Add GLIBC_2.24.
	* string/strcspn.c (strcspn): Rewrite function.
	* string/bits/string2.h (strcspn): Use __builtin_strcspn.
	(__strcspn_c1): Remove inline function.
	(__strcspn_c2): Likewise.
	(__strcspn_c3): Likewise.
	* string/string-inline.c
	[SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strcspn_c1): Add
	compatibility symbol.
	[SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strcspn_c2):
	Likewise.
	[SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strcspn_c3):
	Likewise.
	* sysdeps/i386/string-inlines.c: Include generic string-inlines.c.
This commit is contained in:
Wilco Dijkstra 2016-03-25 16:44:26 -03:00 committed by Adhemerval Zanella
parent d8a012c5c9
commit d3496c9f4f
6 changed files with 102 additions and 96 deletions

View File

@ -1,3 +1,21 @@
2016-04-01 Wilco Dijkstra <wdijkstr@arm.com>
Adhemerval Zanella <adhemerval.zanella@linaro.org>
* string/Version (libc): Add GLIBC_2.24.
* string/strcspn.c (strcspn): Rewrite function.
* string/bits/string2.h (strcspn): Use __builtin_strcspn.
(__strcspn_c1): Remove inline function.
(__strcspn_c2): Likewise.
(__strcspn_c3): Likewise.
* string/string-inline.c
[SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strcspn_c1): Add
compatibility symbol.
[SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strcspn_c2):
Likewise.
[SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strcspn_c3):
Likewise.
* sysdeps/i386/string-inlines.c: Include generic string-inlines.c.
2016-04-01 Stefan Liebler <stli@linux.vnet.ibm.com>
* sysdeps/s390/s390-32/dl-trampoline.h (_dl_runtime_resolve):

View File

@ -80,4 +80,6 @@ libc {
GLIBC_2.6 {
strerror_l;
}
GLIBC_2.24 {
}
}

View File

@ -905,77 +905,10 @@ __stpcpy_small (char *__dest,
/* Return the length of the initial segment of S which
consists entirely of characters not in REJECT. */
#if !defined _HAVE_STRING_ARCH_strcspn || defined _FORCE_INLINES
# ifndef _HAVE_STRING_ARCH_strcspn
# if __GNUC_PREREQ (3, 2)
# define strcspn(s, reject) \
__extension__ \
({ char __r0, __r1, __r2; \
(__builtin_constant_p (reject) && __string2_1bptr_p (reject) \
? ((__builtin_constant_p (s) && __string2_1bptr_p (s)) \
? __builtin_strcspn (s, reject) \
: ((__r0 = ((const char *) (reject))[0], __r0 == '\0') \
? strlen (s) \
: ((__r1 = ((const char *) (reject))[1], __r1 == '\0') \
? __strcspn_c1 (s, __r0) \
: ((__r2 = ((const char *) (reject))[2], __r2 == '\0') \
? __strcspn_c2 (s, __r0, __r1) \
: (((const char *) (reject))[3] == '\0' \
? __strcspn_c3 (s, __r0, __r1, __r2) \
: __builtin_strcspn (s, reject)))))) \
: __builtin_strcspn (s, reject)); })
# else
# define strcspn(s, reject) \
__extension__ \
({ char __r0, __r1, __r2; \
(__builtin_constant_p (reject) && __string2_1bptr_p (reject) \
? ((__r0 = ((const char *) (reject))[0], __r0 == '\0') \
? strlen (s) \
: ((__r1 = ((const char *) (reject))[1], __r1 == '\0') \
? __strcspn_c1 (s, __r0) \
: ((__r2 = ((const char *) (reject))[2], __r2 == '\0') \
? __strcspn_c2 (s, __r0, __r1) \
: (((const char *) (reject))[3] == '\0' \
? __strcspn_c3 (s, __r0, __r1, __r2) \
: strcspn (s, reject))))) \
: strcspn (s, reject)); })
# endif
#ifndef _HAVE_STRING_ARCH_strcspn
# if __GNUC_PREREQ (3, 2)
# define strcspn(s, reject) __builtin_strcspn (s, reject)
# endif
__STRING_INLINE size_t __strcspn_c1 (const char *__s, int __reject);
__STRING_INLINE size_t
__strcspn_c1 (const char *__s, int __reject)
{
size_t __result = 0;
while (__s[__result] != '\0' && __s[__result] != __reject)
++__result;
return __result;
}
__STRING_INLINE size_t __strcspn_c2 (const char *__s, int __reject1,
int __reject2);
__STRING_INLINE size_t
__strcspn_c2 (const char *__s, int __reject1, int __reject2)
{
size_t __result = 0;
while (__s[__result] != '\0' && __s[__result] != __reject1
&& __s[__result] != __reject2)
++__result;
return __result;
}
__STRING_INLINE size_t __strcspn_c3 (const char *__s, int __reject1,
int __reject2, int __reject3);
__STRING_INLINE size_t
__strcspn_c3 (const char *__s, int __reject1, int __reject2,
int __reject3)
{
size_t __result = 0;
while (__s[__result] != '\0' && __s[__result] != __reject1
&& __s[__result] != __reject2 && __s[__result] != __reject3)
++__result;
return __result;
}
#endif

View File

@ -16,6 +16,7 @@
<http://www.gnu.org/licenses/>. */
#include <string.h>
#include <stdint.h>
#undef strcspn
@ -26,16 +27,45 @@
/* Return the length of the maximum initial segment of S
which contains no characters from REJECT. */
size_t
STRCSPN (const char *s, const char *reject)
STRCSPN (const char *str, const char *reject)
{
size_t count = 0;
if (__glibc_unlikely (reject[0] == '\0') ||
__glibc_unlikely (reject[1] == '\0'))
return __strchrnul (str, reject [0]) - str;
while (*s != '\0')
if (strchr (reject, *s++) == NULL)
++count;
else
return count;
/* Use multiple small memsets to enable inlining on most targets. */
unsigned char table[256];
unsigned char *p = memset (table, 0, 64);
memset (p + 64, 0, 64);
memset (p + 128, 0, 64);
memset (p + 192, 0, 64);
return count;
unsigned char *s = (unsigned char*) reject;
unsigned char tmp;
do
p[tmp = *s++] = 1;
while (tmp);
s = (unsigned char*) str;
if (p[s[0]]) return 0;
if (p[s[1]]) return 1;
if (p[s[2]]) return 2;
if (p[s[3]]) return 3;
s = (unsigned char *) ((uintptr_t)(s) & ~3);
unsigned int c0, c1, c2, c3;
do
{
s += 4;
c0 = p[s[0]];
c1 = p[s[1]];
c2 = p[s[2]];
c3 = p[s[3]];
}
while ((c0 | c1 | c2 | c3) == 0);
size_t count = s - (unsigned char *) str;
return (c0 | c1) != 0 ? count - c0 + 1 : count - c2 + 3;
}
libc_hidden_builtin_def (strcspn)

View File

@ -32,3 +32,43 @@
#undef __NO_INLINE__
#include <bits/string.h>
#include <bits/string2.h>
#include "shlib-compat.h"
#if SHLIB_COMPAT (libc, GLIBC_2_1_1, GLIBC_2_24)
/* The inline functions are not used from GLIBC 2.24 and forward, however
they are required to provide the symbols through string-inlines.c
(if inlining is not possible for compatibility reasons). */
size_t
__old_strcspn_c1 (const char *__s, int __reject)
{
size_t __result = 0;
while (__s[__result] != '\0' && __s[__result] != __reject)
++__result;
return __result;
}
compat_symbol (libc, __old_strcspn_c1, __strcspn_c1, GLIBC_2_1_1);
size_t
__old_strcspn_c2 (const char *__s, int __reject1, int __reject2)
{
size_t __result = 0;
while (__s[__result] != '\0' && __s[__result] != __reject1
&& __s[__result] != __reject2)
++__result;
return __result;
}
compat_symbol (libc, __old_strcspn_c2, __strcspn_c2, GLIBC_2_1_1);
size_t
__old_strcspn_c3 (const char *__s, int __reject1, int __reject2,
int __reject3)
{
size_t __result = 0;
while (__s[__result] != '\0' && __s[__result] != __reject1
&& __s[__result] != __reject2 && __s[__result] != __reject3)
++__result;
return __result;
}
compat_symbol (libc, __old_strcspn_c3, __strcspn_c3, GLIBC_2_1_1);
#endif

View File

@ -15,27 +15,10 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
/* <bits/string.h> and <bits/string2.h> declare some extern inline
functions. These functions are declared additionally here if
inlining is not possible. */
#undef __USE_STRING_INLINES
#define __USE_STRING_INLINES
#define _FORCE_INLINES
#define __STRING_INLINE /* empty */
#define __NO_INLINE__
/* This is to avoid PLT entries for the x86 version. */
#define __memcpy_g __memcpy_g_internal
#define __strchr_g __strchr_g_internal
#include <string.h>
#undef index
#undef rindex
#undef __NO_INLINE__
#include <bits/string.h>
#include <bits/string2.h>
#include <string/string-inlines.c>
void *
(__memcpy_c) (void *d, const void *s, size_t n)