17968 lines
506 KiB
Diff
17968 lines
506 KiB
Diff
diff --git a/NEWS b/NEWS
|
||
index 485b8ddffa..d138a45519 100644
|
||
--- a/NEWS
|
||
+++ b/NEWS
|
||
@@ -5,6 +5,30 @@ See the end for copying conditions.
|
||
Please send GNU C library bug reports via <https://sourceware.org/bugzilla/>
|
||
using `glibc' in the "product" field.
|
||
|
||
+The following bugs are resolved with this release:
|
||
+
|
||
+ [20019] NULL pointer dereference in libc.so.6 IFUNC due to uninitialized GOT
|
||
+ [26224] iconv hangs when converting some invalid inputs from several IBM
|
||
+ character sets (CVE-2020-27618)
|
||
+ [26534] libm.so 2.32 SIGILL in pow() due to FMA4 instruction on non-FMA4
|
||
+ system
|
||
+ [26555] string: strerrorname_np does not return the documented value
|
||
+ [26600] Transaction ID collisions cause slow DNS lookups in getaddrinfo
|
||
+ [26636] libc: 32-bit shmctl(IPC_INFO) crashes when shminfo struct is
|
||
+ at the end of a memory mapping
|
||
+ [26637] libc: semctl SEM_STAT_ANY fails to pass the buffer specified
|
||
+ by the caller to the kernel
|
||
+ [26639] libc: msgctl IPC_INFO and MSG_INFO return garbage
|
||
+ [26853] aarch64: Missing unwind information in statically linked startup code
|
||
+ [26932] libc: sh: Multiple floating point functions defined as stubs only
|
||
+ [27130] "rep movsb" performance issue
|
||
+ [27177] GLIBC_TUNABLES=glibc.cpu.x86_ibt=on:glibc.cpu.x86_shstk=on doesn't work
|
||
+ [27457] vzeroupper use in AVX2 multiarch string functions cause HTM aborts
|
||
+ [27974] Overflow bug in some implementation of wcsnlen, wmemchr, and wcsncat
|
||
+ [28524] Conversion from ISO-2022-JP-3 with iconv may emit spurious NULs
|
||
+ [28607] Masked signals are delivered on thread exit
|
||
+ [28755] overflow bug in wcsncmp_avx2 and wcsncmp_evex
|
||
+
|
||
Version 2.32
|
||
|
||
Major new features:
|
||
@@ -185,6 +209,14 @@ Security related changes:
|
||
Dytrych of the Cisco Security Assessment and Penetration Team (See
|
||
TALOS-2020-1019).
|
||
|
||
+ CVE-2020-27618: An infinite loop has been fixed in the iconv program when
|
||
+ invoked with input containing redundant shift sequences in the IBM1364,
|
||
+ IBM1371, IBM1388, IBM1390, or IBM1399 character sets.
|
||
+
|
||
+ CVE-2021-33574: The mq_notify function has a potential use-after-free
|
||
+ issue when using a notification type of SIGEV_THREAD and a thread
|
||
+ attribute with a non-default affinity mask.
|
||
+
|
||
The following bugs are resolved with this release:
|
||
|
||
[9809] localedata: ckb_IQ: new Kurdish Sorani locale
|
||
diff --git a/Rules b/Rules
|
||
index 8b771f6095..beab969fde 100644
|
||
--- a/Rules
|
||
+++ b/Rules
|
||
@@ -155,6 +155,7 @@ xtests: tests $(xtests-special)
|
||
else
|
||
tests: $(tests:%=$(objpfx)%.out) $(tests-internal:%=$(objpfx)%.out) \
|
||
$(tests-container:%=$(objpfx)%.out) \
|
||
+ $(tests-mcheck:%=$(objpfx)%-mcheck.out) \
|
||
$(tests-special) $(tests-printers-out)
|
||
xtests: tests $(xtests:%=$(objpfx)%.out) $(xtests-special)
|
||
endif
|
||
@@ -165,7 +166,7 @@ ifeq ($(run-built-tests),no)
|
||
tests-expected =
|
||
else
|
||
tests-expected = $(tests) $(tests-internal) $(tests-printers) \
|
||
- $(tests-container)
|
||
+ $(tests-container) $(tests-mcheck:%=%-mcheck)
|
||
endif
|
||
tests:
|
||
$(..)scripts/merge-test-results.sh -s $(objpfx) $(subdir) \
|
||
@@ -191,6 +192,7 @@ else
|
||
binaries-pie-tests =
|
||
binaries-pie-notests =
|
||
endif
|
||
+binaries-mcheck-tests = $(tests-mcheck:%=%-mcheck)
|
||
else
|
||
binaries-all-notests =
|
||
binaries-all-tests = $(tests) $(tests-internal) $(xtests) $(test-srcs)
|
||
@@ -200,6 +202,7 @@ binaries-static-tests =
|
||
binaries-static =
|
||
binaries-pie-tests =
|
||
binaries-pie-notests =
|
||
+binaries-mcheck-tests =
|
||
endif
|
||
|
||
binaries-pie = $(binaries-pie-tests) $(binaries-pie-notests)
|
||
@@ -223,6 +226,14 @@ $(addprefix $(objpfx),$(binaries-shared-tests)): %: %.o \
|
||
$(+link-tests)
|
||
endif
|
||
|
||
+ifneq "$(strip $(binaries-mcheck-tests))" ""
|
||
+$(addprefix $(objpfx),$(binaries-mcheck-tests)): %-mcheck: %.o \
|
||
+ $(link-extra-libs-tests) \
|
||
+ $(sort $(filter $(common-objpfx)lib%,$(link-libc))) \
|
||
+ $(addprefix $(csu-objpfx),start.o) $(+preinit) $(+postinit)
|
||
+ $(+link-tests)
|
||
+endif
|
||
+
|
||
ifneq "$(strip $(binaries-pie-tests))" ""
|
||
$(addprefix $(objpfx),$(binaries-pie-tests)): %: %.o \
|
||
$(link-extra-libs-tests) \
|
||
@@ -253,6 +264,12 @@ $(addprefix $(objpfx),$(binaries-static-tests)): %: %.o \
|
||
$(+link-static-tests)
|
||
endif
|
||
|
||
+# All mcheck tests will be run with MALLOC_CHECK_=3
|
||
+define mcheck-ENVS
|
||
+$(1)-mcheck-ENV = MALLOC_CHECK_=3
|
||
+endef
|
||
+$(foreach t,$(tests-mcheck),$(eval $(call mcheck-ENVS,$(t))))
|
||
+
|
||
ifneq "$(strip $(tests) $(tests-internal) $(xtests) $(test-srcs))" ""
|
||
# These are the implicit rules for making test outputs
|
||
# from the test programs and whatever input files are present.
|
||
diff --git a/debug/Makefile b/debug/Makefile
|
||
index 3a60d7af7a..0036edd187 100644
|
||
--- a/debug/Makefile
|
||
+++ b/debug/Makefile
|
||
@@ -51,7 +51,7 @@ routines = backtrace backtracesyms backtracesymsfd noophooks \
|
||
explicit_bzero_chk \
|
||
stack_chk_fail fortify_fail \
|
||
$(static-only-routines)
|
||
-static-only-routines := warning-nop stack_chk_fail_local
|
||
+static-only-routines := stack_chk_fail_local
|
||
|
||
# Don't add stack_chk_fail_local.o to libc.a since __stack_chk_fail_local
|
||
# is an alias of __stack_chk_fail in stack_chk_fail.o.
|
||
diff --git a/debug/warning-nop.c b/debug/warning-nop.c
|
||
deleted file mode 100644
|
||
index 4ab7e182b7..0000000000
|
||
--- a/debug/warning-nop.c
|
||
+++ /dev/null
|
||
@@ -1,70 +0,0 @@
|
||
-/* Dummy nop functions to elicit link-time warnings.
|
||
- Copyright (C) 2005-2020 Free Software Foundation, Inc.
|
||
- This file is part of the GNU C Library.
|
||
-
|
||
- The GNU C Library is free software; you can redistribute it and/or
|
||
- modify it under the terms of the GNU Lesser General Public
|
||
- License as published by the Free Software Foundation; either
|
||
- version 2.1 of the License, or (at your option) any later version.
|
||
-
|
||
- In addition to the permissions in the GNU Lesser General Public
|
||
- License, the Free Software Foundation gives you unlimited
|
||
- permission to link the compiled version of this file with other
|
||
- programs, and to distribute those programs without any restriction
|
||
- coming from the use of this file. (The GNU Lesser General Public
|
||
- License restrictions do apply in other respects; for example, they
|
||
- cover modification of the file, and distribution when not linked
|
||
- into another program.)
|
||
-
|
||
- Note that people who make modified versions of this file are not
|
||
- obligated to grant this special exception for their modified
|
||
- versions; it is their choice whether to do so. The GNU Lesser
|
||
- General Public License gives permission to release a modified
|
||
- version without this exception; this exception also makes it
|
||
- possible to release a modified version which carries forward this
|
||
- exception.
|
||
-
|
||
- The GNU C Library is distributed in the hope that it will be useful,
|
||
- but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
- Lesser General Public License for more details.
|
||
-
|
||
- You should have received a copy of the GNU Lesser General Public
|
||
- License along with the GNU C Library; if not, see
|
||
- <https://www.gnu.org/licenses/>. */
|
||
-
|
||
-#include <sys/cdefs.h>
|
||
-
|
||
-static void
|
||
-__attribute__ ((used))
|
||
-nop (void)
|
||
-{
|
||
-}
|
||
-
|
||
-/* Don't insert any other #include's before this #undef! */
|
||
-
|
||
-#undef __warndecl
|
||
-#define __warndecl(name, msg) \
|
||
- extern void name (void) __attribute__ ((alias ("nop"))) attribute_hidden; \
|
||
- link_warning (name, msg)
|
||
-
|
||
-#undef __USE_FORTIFY_LEVEL
|
||
-#define __USE_FORTIFY_LEVEL 99
|
||
-
|
||
-/* Following here we need an #include for each public header file
|
||
- that uses __warndecl. */
|
||
-
|
||
-/* Define away to avoid warnings with compilers that do not have these
|
||
- builtins. */
|
||
-#define __builtin___memcpy_chk(dest, src, len, bos) NULL
|
||
-#define __builtin___memmove_chk(dest, src, len, bos) NULL
|
||
-#define __builtin___mempcpy_chk(dest, src, len, bos) NULL
|
||
-#define __builtin___memset_chk(dest, ch, len, bos) NULL
|
||
-#define __builtin___stpcpy_chk(dest, src, bos) NULL
|
||
-#define __builtin___strcat_chk(dest, src, bos) NULL
|
||
-#define __builtin___strcpy_chk(dest, src, bos) NULL
|
||
-#define __builtin___strncat_chk(dest, src, len, bos) NULL
|
||
-#define __builtin___strncpy_chk(dest, src, len, bos) NULL
|
||
-#define __builtin_object_size(bos, level) 0
|
||
-
|
||
-#include <string.h>
|
||
diff --git a/elf/Makefile b/elf/Makefile
|
||
index 0b78721848..3ba7f4ecfc 100644
|
||
--- a/elf/Makefile
|
||
+++ b/elf/Makefile
|
||
@@ -1381,6 +1381,8 @@ CFLAGS-ifuncmain7pie.c += $(pie-ccflag)
|
||
CFLAGS-ifuncmain9pie.c += $(pie-ccflag)
|
||
CFLAGS-tst-ifunc-textrel.c += $(pic-ccflag)
|
||
|
||
+LDFLAGS-ifuncmain6pie = -Wl,-z,lazy
|
||
+
|
||
$(objpfx)ifuncmain1pie: $(objpfx)ifuncmod1.so
|
||
$(objpfx)ifuncmain1staticpie: $(objpfx)ifuncdep1pic.o
|
||
$(objpfx)ifuncmain1vispie: $(objpfx)ifuncmod1.so
|
||
@@ -1630,8 +1632,6 @@ $(objpfx)tst-nodelete-dlclose.out: $(objpfx)tst-nodelete-dlclose-dso.so \
|
||
|
||
tst-env-setuid-ENV = MALLOC_CHECK_=2 MALLOC_MMAP_THRESHOLD_=4096 \
|
||
LD_HWCAP_MASK=0x1
|
||
-tst-env-setuid-tunables-ENV = \
|
||
- GLIBC_TUNABLES=glibc.malloc.check=2:glibc.malloc.mmap_threshold=4096
|
||
|
||
$(objpfx)tst-debug1: $(libdl)
|
||
$(objpfx)tst-debug1.out: $(objpfx)tst-debug1mod1.so
|
||
diff --git a/elf/dl-load.c b/elf/dl-load.c
|
||
index e39980fb19..71867e7c1a 100644
|
||
--- a/elf/dl-load.c
|
||
+++ b/elf/dl-load.c
|
||
@@ -855,10 +855,12 @@ lose (int code, int fd, const char *name, char *realname, struct link_map *l,
|
||
|
||
/* Process PT_GNU_PROPERTY program header PH in module L after
|
||
PT_LOAD segments are mapped. Only one NT_GNU_PROPERTY_TYPE_0
|
||
- note is handled which contains processor specific properties. */
|
||
+ note is handled which contains processor specific properties.
|
||
+ FD is -1 for the kernel mapped main executable otherwise it is
|
||
+ the fd used for loading module L. */
|
||
|
||
void
|
||
-_dl_process_pt_gnu_property (struct link_map *l, const ElfW(Phdr) *ph)
|
||
+_dl_process_pt_gnu_property (struct link_map *l, int fd, const ElfW(Phdr) *ph)
|
||
{
|
||
const ElfW(Nhdr) *note = (const void *) (ph->p_vaddr + l->l_addr);
|
||
const ElfW(Addr) size = ph->p_memsz;
|
||
@@ -905,7 +907,7 @@ _dl_process_pt_gnu_property (struct link_map *l, const ElfW(Phdr) *ph)
|
||
last_type = type;
|
||
|
||
/* Target specific property processing. */
|
||
- if (_dl_process_gnu_property (l, type, datasz, ptr) == 0)
|
||
+ if (_dl_process_gnu_property (l, fd, type, datasz, ptr) == 0)
|
||
return;
|
||
|
||
/* Check the next property item. */
|
||
@@ -1251,21 +1253,6 @@ _dl_map_object_from_fd (const char *name, const char *origname, int fd,
|
||
maplength, has_holes, loader);
|
||
if (__glibc_unlikely (errstring != NULL))
|
||
goto call_lose;
|
||
-
|
||
- /* Process program headers again after load segments are mapped in
|
||
- case processing requires accessing those segments. Scan program
|
||
- headers backward so that PT_NOTE can be skipped if PT_GNU_PROPERTY
|
||
- exits. */
|
||
- for (ph = &phdr[l->l_phnum]; ph != phdr; --ph)
|
||
- switch (ph[-1].p_type)
|
||
- {
|
||
- case PT_NOTE:
|
||
- _dl_process_pt_note (l, &ph[-1]);
|
||
- break;
|
||
- case PT_GNU_PROPERTY:
|
||
- _dl_process_pt_gnu_property (l, &ph[-1]);
|
||
- break;
|
||
- }
|
||
}
|
||
|
||
if (l->l_ld == 0)
|
||
@@ -1377,6 +1364,21 @@ cannot enable executable stack as shared object requires");
|
||
if (l->l_tls_initimage != NULL)
|
||
l->l_tls_initimage = (char *) l->l_tls_initimage + l->l_addr;
|
||
|
||
+ /* Process program headers again after load segments are mapped in
|
||
+ case processing requires accessing those segments. Scan program
|
||
+ headers backward so that PT_NOTE can be skipped if PT_GNU_PROPERTY
|
||
+ exits. */
|
||
+ for (ph = &l->l_phdr[l->l_phnum]; ph != l->l_phdr; --ph)
|
||
+ switch (ph[-1].p_type)
|
||
+ {
|
||
+ case PT_NOTE:
|
||
+ _dl_process_pt_note (l, fd, &ph[-1]);
|
||
+ break;
|
||
+ case PT_GNU_PROPERTY:
|
||
+ _dl_process_pt_gnu_property (l, fd, &ph[-1]);
|
||
+ break;
|
||
+ }
|
||
+
|
||
/* We are done mapping in the file. We no longer need the descriptor. */
|
||
if (__glibc_unlikely (__close_nocancel (fd) != 0))
|
||
{
|
||
diff --git a/elf/dl-open.c b/elf/dl-open.c
|
||
index 8769e47051..55b39e1bbe 100644
|
||
--- a/elf/dl-open.c
|
||
+++ b/elf/dl-open.c
|
||
@@ -887,7 +887,7 @@ no more namespaces available for dlmopen()"));
|
||
/* Avoid keeping around a dangling reference to the libc.so link
|
||
map in case it has been cached in libc_map. */
|
||
if (!args.libc_already_loaded)
|
||
- GL(dl_ns)[nsid].libc_map = NULL;
|
||
+ GL(dl_ns)[args.nsid].libc_map = NULL;
|
||
|
||
/* Remove the object from memory. It may be in an inconsistent
|
||
state if relocation failed, for example. */
|
||
diff --git a/elf/dl-tunables.c b/elf/dl-tunables.c
|
||
index 26e6e26612..15b29bcb90 100644
|
||
--- a/elf/dl-tunables.c
|
||
+++ b/elf/dl-tunables.c
|
||
@@ -177,6 +177,7 @@ parse_tunables (char *tunestr, char *valstring)
|
||
return;
|
||
|
||
char *p = tunestr;
|
||
+ size_t off = 0;
|
||
|
||
while (true)
|
||
{
|
||
@@ -190,7 +191,11 @@ parse_tunables (char *tunestr, char *valstring)
|
||
/* If we reach the end of the string before getting a valid name-value
|
||
pair, bail out. */
|
||
if (p[len] == '\0')
|
||
- return;
|
||
+ {
|
||
+ if (__libc_enable_secure)
|
||
+ tunestr[off] = '\0';
|
||
+ return;
|
||
+ }
|
||
|
||
/* We did not find a valid name-value pair before encountering the
|
||
colon. */
|
||
@@ -216,35 +221,28 @@ parse_tunables (char *tunestr, char *valstring)
|
||
|
||
if (tunable_is_name (cur->name, name))
|
||
{
|
||
- /* If we are in a secure context (AT_SECURE) then ignore the tunable
|
||
- unless it is explicitly marked as secure. Tunable values take
|
||
- precedence over their envvar aliases. */
|
||
+ /* If we are in a secure context (AT_SECURE) then ignore the
|
||
+ tunable unless it is explicitly marked as secure. Tunable
|
||
+ values take precedence over their envvar aliases. We write
|
||
+ the tunables that are not SXID_ERASE back to TUNESTR, thus
|
||
+ dropping all SXID_ERASE tunables and any invalid or
|
||
+ unrecognized tunables. */
|
||
if (__libc_enable_secure)
|
||
{
|
||
- if (cur->security_level == TUNABLE_SECLEVEL_SXID_ERASE)
|
||
+ if (cur->security_level != TUNABLE_SECLEVEL_SXID_ERASE)
|
||
{
|
||
- if (p[len] == '\0')
|
||
- {
|
||
- /* Last tunable in the valstring. Null-terminate and
|
||
- return. */
|
||
- *name = '\0';
|
||
- return;
|
||
- }
|
||
- else
|
||
- {
|
||
- /* Remove the current tunable from the string. We do
|
||
- this by overwriting the string starting from NAME
|
||
- (which is where the current tunable begins) with
|
||
- the remainder of the string. We then have P point
|
||
- to NAME so that we continue in the correct
|
||
- position in the valstring. */
|
||
- char *q = &p[len + 1];
|
||
- p = name;
|
||
- while (*q != '\0')
|
||
- *name++ = *q++;
|
||
- name[0] = '\0';
|
||
- len = 0;
|
||
- }
|
||
+ if (off > 0)
|
||
+ tunestr[off++] = ':';
|
||
+
|
||
+ const char *n = cur->name;
|
||
+
|
||
+ while (*n != '\0')
|
||
+ tunestr[off++] = *n++;
|
||
+
|
||
+ tunestr[off++] = '=';
|
||
+
|
||
+ for (size_t j = 0; j < len; j++)
|
||
+ tunestr[off++] = value[j];
|
||
}
|
||
|
||
if (cur->security_level != TUNABLE_SECLEVEL_NONE)
|
||
@@ -257,9 +255,7 @@ parse_tunables (char *tunestr, char *valstring)
|
||
}
|
||
}
|
||
|
||
- if (p[len] == '\0')
|
||
- return;
|
||
- else
|
||
+ if (p[len] != '\0')
|
||
p += len + 1;
|
||
}
|
||
}
|
||
diff --git a/elf/ifuncmain6pie.c b/elf/ifuncmain6pie.c
|
||
index 04faeb86ef..4a01906836 100644
|
||
--- a/elf/ifuncmain6pie.c
|
||
+++ b/elf/ifuncmain6pie.c
|
||
@@ -9,7 +9,6 @@
|
||
#include "ifunc-sel.h"
|
||
|
||
typedef int (*foo_p) (void);
|
||
-extern foo_p foo_ptr;
|
||
|
||
static int
|
||
one (void)
|
||
@@ -28,20 +27,17 @@ foo_ifunc (void)
|
||
}
|
||
|
||
extern int foo (void);
|
||
-extern foo_p get_foo (void);
|
||
+extern int call_foo (void);
|
||
extern foo_p get_foo_p (void);
|
||
|
||
-foo_p my_foo_ptr = foo;
|
||
+foo_p foo_ptr = foo;
|
||
|
||
int
|
||
main (void)
|
||
{
|
||
foo_p p;
|
||
|
||
- p = get_foo ();
|
||
- if (p != foo)
|
||
- abort ();
|
||
- if ((*p) () != -30)
|
||
+ if (call_foo () != -30)
|
||
abort ();
|
||
|
||
p = get_foo_p ();
|
||
@@ -52,12 +48,8 @@ main (void)
|
||
|
||
if (foo_ptr != foo)
|
||
abort ();
|
||
- if (my_foo_ptr != foo)
|
||
- abort ();
|
||
if ((*foo_ptr) () != -30)
|
||
abort ();
|
||
- if ((*my_foo_ptr) () != -30)
|
||
- abort ();
|
||
if (foo () != -30)
|
||
abort ();
|
||
|
||
diff --git a/elf/ifuncmod6.c b/elf/ifuncmod6.c
|
||
index 2e16c1d06d..2f6d0715e6 100644
|
||
--- a/elf/ifuncmod6.c
|
||
+++ b/elf/ifuncmod6.c
|
||
@@ -4,7 +4,7 @@ extern int foo (void);
|
||
|
||
typedef int (*foo_p) (void);
|
||
|
||
-foo_p foo_ptr = foo;
|
||
+extern foo_p foo_ptr;
|
||
|
||
foo_p
|
||
get_foo_p (void)
|
||
@@ -12,8 +12,8 @@ get_foo_p (void)
|
||
return foo_ptr;
|
||
}
|
||
|
||
-foo_p
|
||
-get_foo (void)
|
||
+int
|
||
+call_foo (void)
|
||
{
|
||
- return foo;
|
||
+ return foo ();
|
||
}
|
||
diff --git a/elf/rtld.c b/elf/rtld.c
|
||
index 5b882163fa..14a42ed00a 100644
|
||
--- a/elf/rtld.c
|
||
+++ b/elf/rtld.c
|
||
@@ -1534,10 +1534,10 @@ of this helper program; chances are you did not intend to run this program.\n\
|
||
switch (ph[-1].p_type)
|
||
{
|
||
case PT_NOTE:
|
||
- _dl_process_pt_note (main_map, &ph[-1]);
|
||
+ _dl_process_pt_note (main_map, -1, &ph[-1]);
|
||
break;
|
||
case PT_GNU_PROPERTY:
|
||
- _dl_process_pt_gnu_property (main_map, &ph[-1]);
|
||
+ _dl_process_pt_gnu_property (main_map, -1, &ph[-1]);
|
||
break;
|
||
}
|
||
|
||
diff --git a/elf/tst-env-setuid-tunables.c b/elf/tst-env-setuid-tunables.c
|
||
index 971d5892b1..ca0c8c245c 100644
|
||
--- a/elf/tst-env-setuid-tunables.c
|
||
+++ b/elf/tst-env-setuid-tunables.c
|
||
@@ -25,35 +25,76 @@
|
||
#include "config.h"
|
||
#undef _LIBC
|
||
|
||
-#define test_parent test_parent_tunables
|
||
-#define test_child test_child_tunables
|
||
-
|
||
-static int test_child_tunables (void);
|
||
-static int test_parent_tunables (void);
|
||
-
|
||
-#include "tst-env-setuid.c"
|
||
-
|
||
-#define CHILD_VALSTRING_VALUE "glibc.malloc.mmap_threshold=4096"
|
||
-#define PARENT_VALSTRING_VALUE \
|
||
- "glibc.malloc.check=2:glibc.malloc.mmap_threshold=4096"
|
||
+#include <errno.h>
|
||
+#include <fcntl.h>
|
||
+#include <stdlib.h>
|
||
+#include <stdint.h>
|
||
+#include <stdio.h>
|
||
+#include <string.h>
|
||
+#include <sys/stat.h>
|
||
+#include <sys/wait.h>
|
||
+#include <unistd.h>
|
||
+#include <intprops.h>
|
||
+#include <array_length.h>
|
||
+
|
||
+#include <support/check.h>
|
||
+#include <support/support.h>
|
||
+#include <support/test-driver.h>
|
||
+#include <support/capture_subprocess.h>
|
||
+
|
||
+const char *teststrings[] =
|
||
+{
|
||
+ "glibc.malloc.check=2:glibc.malloc.mmap_threshold=4096",
|
||
+ "glibc.malloc.check=2:glibc.malloc.check=2:glibc.malloc.mmap_threshold=4096",
|
||
+ "glibc.malloc.check=2:glibc.malloc.mmap_threshold=4096:glibc.malloc.check=2",
|
||
+ "glibc.malloc.perturb=0x800",
|
||
+ "glibc.malloc.perturb=0x800:glibc.malloc.mmap_threshold=4096",
|
||
+ "glibc.malloc.perturb=0x800:not_valid.malloc.check=2:glibc.malloc.mmap_threshold=4096",
|
||
+ "glibc.not_valid.check=2:glibc.malloc.mmap_threshold=4096",
|
||
+ "not_valid.malloc.check=2:glibc.malloc.mmap_threshold=4096",
|
||
+ "glibc.malloc.garbage=2:glibc.maoc.mmap_threshold=4096:glibc.malloc.check=2",
|
||
+ "glibc.malloc.check=4:glibc.malloc.garbage=2:glibc.maoc.mmap_threshold=4096",
|
||
+ ":glibc.malloc.garbage=2:glibc.malloc.check=1",
|
||
+ "glibc.malloc.check=1:glibc.malloc.check=2",
|
||
+ "not_valid.malloc.check=2",
|
||
+ "glibc.not_valid.check=2",
|
||
+};
|
||
+
|
||
+const char *resultstrings[] =
|
||
+{
|
||
+ "glibc.malloc.mmap_threshold=4096",
|
||
+ "glibc.malloc.mmap_threshold=4096",
|
||
+ "glibc.malloc.mmap_threshold=4096",
|
||
+ "glibc.malloc.perturb=0x800",
|
||
+ "glibc.malloc.perturb=0x800:glibc.malloc.mmap_threshold=4096",
|
||
+ "glibc.malloc.perturb=0x800:glibc.malloc.mmap_threshold=4096",
|
||
+ "glibc.malloc.mmap_threshold=4096",
|
||
+ "glibc.malloc.mmap_threshold=4096",
|
||
+ "",
|
||
+ "",
|
||
+ "",
|
||
+ "",
|
||
+ "",
|
||
+ "",
|
||
+};
|
||
|
||
static int
|
||
-test_child_tunables (void)
|
||
+test_child (int off)
|
||
{
|
||
const char *val = getenv ("GLIBC_TUNABLES");
|
||
|
||
#if HAVE_TUNABLES
|
||
- if (val != NULL && strcmp (val, CHILD_VALSTRING_VALUE) == 0)
|
||
+ if (val != NULL && strcmp (val, resultstrings[off]) == 0)
|
||
return 0;
|
||
|
||
if (val != NULL)
|
||
- printf ("Unexpected GLIBC_TUNABLES VALUE %s\n", val);
|
||
+ printf ("[%d] Unexpected GLIBC_TUNABLES VALUE %s\n", off, val);
|
||
|
||
return 1;
|
||
#else
|
||
if (val != NULL)
|
||
{
|
||
- printf ("GLIBC_TUNABLES not cleared\n");
|
||
+ printf ("[%d] GLIBC_TUNABLES not cleared\n", off);
|
||
return 1;
|
||
}
|
||
return 0;
|
||
@@ -61,15 +102,48 @@ test_child_tunables (void)
|
||
}
|
||
|
||
static int
|
||
-test_parent_tunables (void)
|
||
+do_test (int argc, char **argv)
|
||
{
|
||
- const char *val = getenv ("GLIBC_TUNABLES");
|
||
+ /* Setgid child process. */
|
||
+ if (argc == 2)
|
||
+ {
|
||
+ if (getgid () == getegid ())
|
||
+ /* This can happen if the file system is mounted nosuid. */
|
||
+ FAIL_UNSUPPORTED ("SGID failed: GID and EGID match (%jd)\n",
|
||
+ (intmax_t) getgid ());
|
||
|
||
- if (val != NULL && strcmp (val, PARENT_VALSTRING_VALUE) == 0)
|
||
- return 0;
|
||
+ int ret = test_child (atoi (argv[1]));
|
||
|
||
- if (val != NULL)
|
||
- printf ("Unexpected GLIBC_TUNABLES VALUE %s\n", val);
|
||
+ if (ret != 0)
|
||
+ exit (1);
|
||
|
||
- return 1;
|
||
+ exit (EXIT_SUCCESS);
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ int ret = 0;
|
||
+
|
||
+ /* Spawn tests. */
|
||
+ for (int i = 0; i < array_length (teststrings); i++)
|
||
+ {
|
||
+ char buf[INT_BUFSIZE_BOUND (int)];
|
||
+
|
||
+ printf ("Spawned test for %s (%d)\n", teststrings[i], i);
|
||
+ snprintf (buf, sizeof (buf), "%d\n", i);
|
||
+ if (setenv ("GLIBC_TUNABLES", teststrings[i], 1) != 0)
|
||
+ exit (1);
|
||
+
|
||
+ int status = support_capture_subprogram_self_sgid (buf);
|
||
+
|
||
+ /* Bail out early if unsupported. */
|
||
+ if (WEXITSTATUS (status) == EXIT_UNSUPPORTED)
|
||
+ return EXIT_UNSUPPORTED;
|
||
+
|
||
+ ret |= status;
|
||
+ }
|
||
+ return ret;
|
||
+ }
|
||
}
|
||
+
|
||
+#define TEST_FUNCTION_ARGV do_test
|
||
+#include <support/test-driver.c>
|
||
diff --git a/elf/tst-env-setuid.c b/elf/tst-env-setuid.c
|
||
index 41dc79e83a..2dbccdb69e 100644
|
||
--- a/elf/tst-env-setuid.c
|
||
+++ b/elf/tst-env-setuid.c
|
||
@@ -29,173 +29,12 @@
|
||
#include <sys/wait.h>
|
||
#include <unistd.h>
|
||
|
||
+#include <support/check.h>
|
||
#include <support/support.h>
|
||
#include <support/test-driver.h>
|
||
+#include <support/capture_subprocess.h>
|
||
|
||
static char SETGID_CHILD[] = "setgid-child";
|
||
-#define CHILD_STATUS 42
|
||
-
|
||
-/* Return a GID which is not our current GID, but is present in the
|
||
- supplementary group list. */
|
||
-static gid_t
|
||
-choose_gid (void)
|
||
-{
|
||
- const int count = 64;
|
||
- gid_t groups[count];
|
||
- int ret = getgroups (count, groups);
|
||
- if (ret < 0)
|
||
- {
|
||
- printf ("getgroups: %m\n");
|
||
- exit (1);
|
||
- }
|
||
- gid_t current = getgid ();
|
||
- for (int i = 0; i < ret; ++i)
|
||
- {
|
||
- if (groups[i] != current)
|
||
- return groups[i];
|
||
- }
|
||
- return 0;
|
||
-}
|
||
-
|
||
-/* Spawn and execute a program and verify that it returns the CHILD_STATUS. */
|
||
-static pid_t
|
||
-do_execve (char **args)
|
||
-{
|
||
- pid_t kid = vfork ();
|
||
-
|
||
- if (kid < 0)
|
||
- {
|
||
- printf ("vfork: %m\n");
|
||
- return -1;
|
||
- }
|
||
-
|
||
- if (kid == 0)
|
||
- {
|
||
- /* Child process. */
|
||
- execve (args[0], args, environ);
|
||
- _exit (-errno);
|
||
- }
|
||
-
|
||
- if (kid < 0)
|
||
- return 1;
|
||
-
|
||
- int status;
|
||
-
|
||
- if (waitpid (kid, &status, 0) < 0)
|
||
- {
|
||
- printf ("waitpid: %m\n");
|
||
- return 1;
|
||
- }
|
||
-
|
||
- if (WEXITSTATUS (status) == EXIT_UNSUPPORTED)
|
||
- return EXIT_UNSUPPORTED;
|
||
-
|
||
- if (!WIFEXITED (status) || WEXITSTATUS (status) != CHILD_STATUS)
|
||
- {
|
||
- printf ("Unexpected exit status %d from child process\n",
|
||
- WEXITSTATUS (status));
|
||
- return 1;
|
||
- }
|
||
- return 0;
|
||
-}
|
||
-
|
||
-/* Copies the executable into a restricted directory, so that we can
|
||
- safely make it SGID with the TARGET group ID. Then runs the
|
||
- executable. */
|
||
-static int
|
||
-run_executable_sgid (gid_t target)
|
||
-{
|
||
- char *dirname = xasprintf ("%s/tst-tunables-setuid.%jd",
|
||
- test_dir, (intmax_t) getpid ());
|
||
- char *execname = xasprintf ("%s/bin", dirname);
|
||
- int infd = -1;
|
||
- int outfd = -1;
|
||
- int ret = 0;
|
||
- if (mkdir (dirname, 0700) < 0)
|
||
- {
|
||
- printf ("mkdir: %m\n");
|
||
- goto err;
|
||
- }
|
||
- infd = open ("/proc/self/exe", O_RDONLY);
|
||
- if (infd < 0)
|
||
- {
|
||
- printf ("open (/proc/self/exe): %m\n");
|
||
- goto err;
|
||
- }
|
||
- outfd = open (execname, O_WRONLY | O_CREAT | O_EXCL, 0700);
|
||
- if (outfd < 0)
|
||
- {
|
||
- printf ("open (%s): %m\n", execname);
|
||
- goto err;
|
||
- }
|
||
- char buf[4096];
|
||
- for (;;)
|
||
- {
|
||
- ssize_t rdcount = read (infd, buf, sizeof (buf));
|
||
- if (rdcount < 0)
|
||
- {
|
||
- printf ("read: %m\n");
|
||
- goto err;
|
||
- }
|
||
- if (rdcount == 0)
|
||
- break;
|
||
- char *p = buf;
|
||
- char *end = buf + rdcount;
|
||
- while (p != end)
|
||
- {
|
||
- ssize_t wrcount = write (outfd, buf, end - p);
|
||
- if (wrcount == 0)
|
||
- errno = ENOSPC;
|
||
- if (wrcount <= 0)
|
||
- {
|
||
- printf ("write: %m\n");
|
||
- goto err;
|
||
- }
|
||
- p += wrcount;
|
||
- }
|
||
- }
|
||
- if (fchown (outfd, getuid (), target) < 0)
|
||
- {
|
||
- printf ("fchown (%s): %m\n", execname);
|
||
- goto err;
|
||
- }
|
||
- if (fchmod (outfd, 02750) < 0)
|
||
- {
|
||
- printf ("fchmod (%s): %m\n", execname);
|
||
- goto err;
|
||
- }
|
||
- if (close (outfd) < 0)
|
||
- {
|
||
- printf ("close (outfd): %m\n");
|
||
- goto err;
|
||
- }
|
||
- if (close (infd) < 0)
|
||
- {
|
||
- printf ("close (infd): %m\n");
|
||
- goto err;
|
||
- }
|
||
-
|
||
- char *args[] = {execname, SETGID_CHILD, NULL};
|
||
-
|
||
- ret = do_execve (args);
|
||
-
|
||
-err:
|
||
- if (outfd >= 0)
|
||
- close (outfd);
|
||
- if (infd >= 0)
|
||
- close (infd);
|
||
- if (execname)
|
||
- {
|
||
- unlink (execname);
|
||
- free (execname);
|
||
- }
|
||
- if (dirname)
|
||
- {
|
||
- rmdir (dirname);
|
||
- free (dirname);
|
||
- }
|
||
- return ret;
|
||
-}
|
||
|
||
#ifndef test_child
|
||
static int
|
||
@@ -256,40 +95,32 @@ do_test (int argc, char **argv)
|
||
if (argc == 2 && strcmp (argv[1], SETGID_CHILD) == 0)
|
||
{
|
||
if (getgid () == getegid ())
|
||
- {
|
||
- /* This can happen if the file system is mounted nosuid. */
|
||
- fprintf (stderr, "SGID failed: GID and EGID match (%jd)\n",
|
||
- (intmax_t) getgid ());
|
||
- exit (EXIT_UNSUPPORTED);
|
||
- }
|
||
+ /* This can happen if the file system is mounted nosuid. */
|
||
+ FAIL_UNSUPPORTED ("SGID failed: GID and EGID match (%jd)\n",
|
||
+ (intmax_t) getgid ());
|
||
|
||
int ret = test_child ();
|
||
|
||
if (ret != 0)
|
||
exit (1);
|
||
|
||
- exit (CHILD_STATUS);
|
||
+ exit (EXIT_SUCCESS);
|
||
}
|
||
else
|
||
{
|
||
if (test_parent () != 0)
|
||
exit (1);
|
||
|
||
- /* Try running a setgid program. */
|
||
- gid_t target = choose_gid ();
|
||
- if (target == 0)
|
||
- {
|
||
- fprintf (stderr,
|
||
- "Could not find a suitable GID for user %jd, skipping test\n",
|
||
- (intmax_t) getuid ());
|
||
- exit (0);
|
||
- }
|
||
+ int status = support_capture_subprogram_self_sgid (SETGID_CHILD);
|
||
|
||
- return run_executable_sgid (target);
|
||
- }
|
||
+ if (WEXITSTATUS (status) == EXIT_UNSUPPORTED)
|
||
+ return EXIT_UNSUPPORTED;
|
||
+
|
||
+ if (!WIFEXITED (status))
|
||
+ FAIL_EXIT1 ("Unexpected exit status %d from child process\n", status);
|
||
|
||
- /* Something went wrong and our argv was corrupted. */
|
||
- _exit (1);
|
||
+ return 0;
|
||
+ }
|
||
}
|
||
|
||
#define TEST_FUNCTION_ARGV do_test
|
||
diff --git a/iconv/Versions b/iconv/Versions
|
||
index 8a5f4cf780..d51af52fa3 100644
|
||
--- a/iconv/Versions
|
||
+++ b/iconv/Versions
|
||
@@ -6,7 +6,9 @@ libc {
|
||
GLIBC_PRIVATE {
|
||
# functions shared with iconv program
|
||
__gconv_get_alias_db; __gconv_get_cache; __gconv_get_modules_db;
|
||
- __gconv_open; __gconv_create_spec;
|
||
+
|
||
+ # functions used elsewhere in glibc
|
||
+ __gconv_open; __gconv_create_spec; __gconv_destroy_spec;
|
||
|
||
# function used by the gconv modules
|
||
__gconv_transliterate;
|
||
diff --git a/iconv/gconv_charset.c b/iconv/gconv_charset.c
|
||
index 6ccd0773cc..4ba0aa99f5 100644
|
||
--- a/iconv/gconv_charset.c
|
||
+++ b/iconv/gconv_charset.c
|
||
@@ -216,3 +216,13 @@ out:
|
||
return ret;
|
||
}
|
||
libc_hidden_def (__gconv_create_spec)
|
||
+
|
||
+
|
||
+void
|
||
+__gconv_destroy_spec (struct gconv_spec *conv_spec)
|
||
+{
|
||
+ free (conv_spec->fromcode);
|
||
+ free (conv_spec->tocode);
|
||
+ return;
|
||
+}
|
||
+libc_hidden_def (__gconv_destroy_spec)
|
||
diff --git a/iconv/gconv_charset.h b/iconv/gconv_charset.h
|
||
index b39b09aea1..e9c122cf7e 100644
|
||
--- a/iconv/gconv_charset.h
|
||
+++ b/iconv/gconv_charset.h
|
||
@@ -48,33 +48,6 @@
|
||
#define GCONV_IGNORE_ERRORS_SUFFIX "IGNORE"
|
||
|
||
|
||
-/* This function accepts the charset names of the source and destination of the
|
||
- conversion and populates *conv_spec with an equivalent conversion
|
||
- specification that may later be used by __gconv_open. The charset names
|
||
- might contain options in the form of suffixes that alter the conversion,
|
||
- e.g. "ISO-10646/UTF-8/TRANSLIT". It processes the charset names, ignoring
|
||
- and truncating any suffix options in fromcode, and processing and truncating
|
||
- any suffix options in tocode. Supported suffix options ("TRANSLIT" or
|
||
- "IGNORE") when found in tocode lead to the corresponding flag in *conv_spec
|
||
- to be set to true. Unrecognized suffix options are silently discarded. If
|
||
- the function succeeds, it returns conv_spec back to the caller. It returns
|
||
- NULL upon failure. */
|
||
-struct gconv_spec *
|
||
-__gconv_create_spec (struct gconv_spec *conv_spec, const char *fromcode,
|
||
- const char *tocode);
|
||
-libc_hidden_proto (__gconv_create_spec)
|
||
-
|
||
-
|
||
-/* This function frees all heap memory allocated by __gconv_create_spec. */
|
||
-static void __attribute__ ((unused))
|
||
-gconv_destroy_spec (struct gconv_spec *conv_spec)
|
||
-{
|
||
- free (conv_spec->fromcode);
|
||
- free (conv_spec->tocode);
|
||
- return;
|
||
-}
|
||
-
|
||
-
|
||
/* This function copies in-order, characters from the source 's' that are
|
||
either alpha-numeric or one in one of these: "_-.,:/" - into the destination
|
||
'wp' while dropping all other characters. In the process, it converts all
|
||
diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h
|
||
index e86938dae7..f721ce30ff 100644
|
||
--- a/iconv/gconv_int.h
|
||
+++ b/iconv/gconv_int.h
|
||
@@ -152,6 +152,27 @@ extern int __gconv_open (struct gconv_spec *conv_spec,
|
||
__gconv_t *handle, int flags);
|
||
libc_hidden_proto (__gconv_open)
|
||
|
||
+/* This function accepts the charset names of the source and destination of the
|
||
+ conversion and populates *conv_spec with an equivalent conversion
|
||
+ specification that may later be used by __gconv_open. The charset names
|
||
+ might contain options in the form of suffixes that alter the conversion,
|
||
+ e.g. "ISO-10646/UTF-8/TRANSLIT". It processes the charset names, ignoring
|
||
+ and truncating any suffix options in fromcode, and processing and truncating
|
||
+ any suffix options in tocode. Supported suffix options ("TRANSLIT" or
|
||
+ "IGNORE") when found in tocode lead to the corresponding flag in *conv_spec
|
||
+ to be set to true. Unrecognized suffix options are silently discarded. If
|
||
+ the function succeeds, it returns conv_spec back to the caller. It returns
|
||
+ NULL upon failure. */
|
||
+extern struct gconv_spec *
|
||
+__gconv_create_spec (struct gconv_spec *conv_spec, const char *fromcode,
|
||
+ const char *tocode);
|
||
+libc_hidden_proto (__gconv_create_spec)
|
||
+
|
||
+/* This function frees all heap memory allocated by __gconv_create_spec. */
|
||
+extern void
|
||
+__gconv_destroy_spec (struct gconv_spec *conv_spec);
|
||
+libc_hidden_proto (__gconv_destroy_spec)
|
||
+
|
||
/* Free resources associated with transformation descriptor CD. */
|
||
extern int __gconv_close (__gconv_t cd)
|
||
attribute_hidden;
|
||
diff --git a/iconv/iconv_open.c b/iconv/iconv_open.c
|
||
index dd54bc12e0..5b30055c04 100644
|
||
--- a/iconv/iconv_open.c
|
||
+++ b/iconv/iconv_open.c
|
||
@@ -39,7 +39,7 @@ iconv_open (const char *tocode, const char *fromcode)
|
||
|
||
int res = __gconv_open (&conv_spec, &cd, 0);
|
||
|
||
- gconv_destroy_spec (&conv_spec);
|
||
+ __gconv_destroy_spec (&conv_spec);
|
||
|
||
if (__builtin_expect (res, __GCONV_OK) != __GCONV_OK)
|
||
{
|
||
diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c
|
||
index b4334faa57..d59979759c 100644
|
||
--- a/iconv/iconv_prog.c
|
||
+++ b/iconv/iconv_prog.c
|
||
@@ -184,7 +184,7 @@ main (int argc, char *argv[])
|
||
/* Let's see whether we have these coded character sets. */
|
||
res = __gconv_open (&conv_spec, &cd, 0);
|
||
|
||
- gconv_destroy_spec (&conv_spec);
|
||
+ __gconv_destroy_spec (&conv_spec);
|
||
|
||
if (res != __GCONV_OK)
|
||
{
|
||
diff --git a/iconv/tst-iconv_prog.sh b/iconv/tst-iconv_prog.sh
|
||
index 8298136b7f..d8db7b335c 100644
|
||
--- a/iconv/tst-iconv_prog.sh
|
||
+++ b/iconv/tst-iconv_prog.sh
|
||
@@ -102,12 +102,16 @@ hangarray=(
|
||
"\x00\x80;-c;IBM1161;UTF-8//TRANSLIT//IGNORE"
|
||
"\x00\xdb;-c;IBM1162;UTF-8//TRANSLIT//IGNORE"
|
||
"\x00\x70;-c;IBM12712;UTF-8//TRANSLIT//IGNORE"
|
||
-# These are known hangs that are yet to be fixed:
|
||
-# "\x00\x0f;-c;IBM1364;UTF-8"
|
||
-# "\x00\x0f;-c;IBM1371;UTF-8"
|
||
-# "\x00\x0f;-c;IBM1388;UTF-8"
|
||
-# "\x00\x0f;-c;IBM1390;UTF-8"
|
||
-# "\x00\x0f;-c;IBM1399;UTF-8"
|
||
+"\x00\x0f;-c;IBM1364;UTF-8"
|
||
+"\x0e\x0e;-c;IBM1364;UTF-8"
|
||
+"\x00\x0f;-c;IBM1371;UTF-8"
|
||
+"\x0e\x0e;-c;IBM1371;UTF-8"
|
||
+"\x00\x0f;-c;IBM1388;UTF-8"
|
||
+"\x0e\x0e;-c;IBM1388;UTF-8"
|
||
+"\x00\x0f;-c;IBM1390;UTF-8"
|
||
+"\x0e\x0e;-c;IBM1390;UTF-8"
|
||
+"\x00\x0f;-c;IBM1399;UTF-8"
|
||
+"\x0e\x0e;-c;IBM1399;UTF-8"
|
||
"\x00\x53;-c;IBM16804;UTF-8//TRANSLIT//IGNORE"
|
||
"\x00\x41;-c;IBM274;UTF-8//TRANSLIT//IGNORE"
|
||
"\x00\x41;-c;IBM275;UTF-8//TRANSLIT//IGNORE"
|
||
diff --git a/iconvdata/Makefile b/iconvdata/Makefile
|
||
index 4ec2741cdc..b67b4feeb4 100644
|
||
--- a/iconvdata/Makefile
|
||
+++ b/iconvdata/Makefile
|
||
@@ -1,4 +1,5 @@
|
||
# Copyright (C) 1997-2020 Free Software Foundation, Inc.
|
||
+# Copyright (C) The GNU Toolchain Authors.
|
||
# This file is part of the GNU C Library.
|
||
|
||
# The GNU C Library is free software; you can redistribute it and/or
|
||
@@ -73,7 +74,8 @@ modules.so := $(addsuffix .so, $(modules))
|
||
ifeq (yes,$(build-shared))
|
||
tests = bug-iconv1 bug-iconv2 tst-loading tst-e2big tst-iconv4 bug-iconv4 \
|
||
tst-iconv6 bug-iconv5 bug-iconv6 tst-iconv7 bug-iconv8 bug-iconv9 \
|
||
- bug-iconv10 bug-iconv11 bug-iconv12 tst-iconv-big5-hkscs-to-2ucs4
|
||
+ bug-iconv10 bug-iconv11 bug-iconv12 tst-iconv-big5-hkscs-to-2ucs4 \
|
||
+ bug-iconv13 bug-iconv14 bug-iconv15
|
||
ifeq ($(have-thread-library),yes)
|
||
tests += bug-iconv3
|
||
endif
|
||
@@ -321,6 +323,10 @@ $(objpfx)bug-iconv10.out: $(objpfx)gconv-modules \
|
||
$(addprefix $(objpfx),$(modules.so))
|
||
$(objpfx)bug-iconv12.out: $(objpfx)gconv-modules \
|
||
$(addprefix $(objpfx),$(modules.so))
|
||
+$(objpfx)bug-iconv14.out: $(objpfx)gconv-modules \
|
||
+ $(addprefix $(objpfx),$(modules.so))
|
||
+$(objpfx)bug-iconv15.out: $(addprefix $(objpfx), $(gconv-modules)) \
|
||
+ $(addprefix $(objpfx),$(modules.so))
|
||
|
||
$(objpfx)iconv-test.out: run-iconv-test.sh $(objpfx)gconv-modules \
|
||
$(addprefix $(objpfx),$(modules.so)) \
|
||
diff --git a/iconvdata/bug-iconv13.c b/iconvdata/bug-iconv13.c
|
||
new file mode 100644
|
||
index 0000000000..87aaff398e
|
||
--- /dev/null
|
||
+++ b/iconvdata/bug-iconv13.c
|
||
@@ -0,0 +1,53 @@
|
||
+/* bug 24973: Test EUC-KR module
|
||
+ Copyright (C) 2020 Free Software Foundation, Inc.
|
||
+ This file is part of the GNU C Library.
|
||
+
|
||
+ The GNU C Library is free software; you can redistribute it and/or
|
||
+ modify it under the terms of the GNU Lesser General Public
|
||
+ License as published by the Free Software Foundation; either
|
||
+ version 2.1 of the License, or (at your option) any later version.
|
||
+
|
||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
+ Lesser General Public License for more details.
|
||
+
|
||
+ You should have received a copy of the GNU Lesser General Public
|
||
+ License along with the GNU C Library; if not, see
|
||
+ <https://www.gnu.org/licenses/>. */
|
||
+
|
||
+#include <errno.h>
|
||
+#include <iconv.h>
|
||
+#include <stdio.h>
|
||
+#include <support/check.h>
|
||
+
|
||
+static int
|
||
+do_test (void)
|
||
+{
|
||
+ iconv_t cd = iconv_open ("UTF-8//IGNORE", "EUC-KR");
|
||
+ TEST_VERIFY_EXIT (cd != (iconv_t) -1);
|
||
+
|
||
+ /* 0xfe (->0x7e : row 94) and 0xc9 (->0x49 : row 41) are user-defined
|
||
+ areas, which are not allowed and should be skipped over due to
|
||
+ //IGNORE. The trailing 0xfe also is an incomplete sequence, which
|
||
+ should be checked first. */
|
||
+ char input[4] = { '\xc9', '\xa1', '\0', '\xfe' };
|
||
+ char *inptr = input;
|
||
+ size_t insize = sizeof (input);
|
||
+ char output[4];
|
||
+ char *outptr = output;
|
||
+ size_t outsize = sizeof (output);
|
||
+
|
||
+ /* This used to crash due to buffer overrun. */
|
||
+ TEST_VERIFY (iconv (cd, &inptr, &insize, &outptr, &outsize) == (size_t) -1);
|
||
+ TEST_VERIFY (errno == EINVAL);
|
||
+ /* The conversion should produce one character, the converted null
|
||
+ character. */
|
||
+ TEST_VERIFY (sizeof (output) - outsize == 1);
|
||
+
|
||
+ TEST_VERIFY_EXIT (iconv_close (cd) != -1);
|
||
+
|
||
+ return 0;
|
||
+}
|
||
+
|
||
+#include <support/test-driver.c>
|
||
diff --git a/iconvdata/bug-iconv14.c b/iconvdata/bug-iconv14.c
|
||
new file mode 100644
|
||
index 0000000000..902f140fa9
|
||
--- /dev/null
|
||
+++ b/iconvdata/bug-iconv14.c
|
||
@@ -0,0 +1,127 @@
|
||
+/* Assertion in ISO-2022-JP-3 due to two-character sequence (bug 27256).
|
||
+ Copyright (C) 2021 Free Software Foundation, Inc.
|
||
+ This file is part of the GNU C Library.
|
||
+
|
||
+ The GNU C Library is free software; you can redistribute it and/or
|
||
+ modify it under the terms of the GNU Lesser General Public
|
||
+ License as published by the Free Software Foundation; either
|
||
+ version 2.1 of the License, or (at your option) any later version.
|
||
+
|
||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
+ Lesser General Public License for more details.
|
||
+
|
||
+ You should have received a copy of the GNU Lesser General Public
|
||
+ License along with the GNU C Library; if not, see
|
||
+ <https://www.gnu.org/licenses/>. */
|
||
+
|
||
+#include <iconv.h>
|
||
+#include <string.h>
|
||
+#include <errno.h>
|
||
+#include <support/check.h>
|
||
+
|
||
+/* Use an escape sequence to return to the initial state. */
|
||
+static void
|
||
+with_escape_sequence (void)
|
||
+{
|
||
+ iconv_t c = iconv_open ("UTF-8", "ISO-2022-JP-3");
|
||
+ TEST_VERIFY_EXIT (c != (iconv_t) -1);
|
||
+
|
||
+ char in[] = "\e$(O+D\e(B";
|
||
+ char *inbuf = in;
|
||
+ size_t inleft = strlen (in);
|
||
+ char out[3]; /* Space for one output character. */
|
||
+ char *outbuf;
|
||
+ size_t outleft;
|
||
+
|
||
+ outbuf = out;
|
||
+ outleft = sizeof (out);
|
||
+ TEST_COMPARE (iconv (c, &inbuf, &inleft, &outbuf, &outleft), (size_t) -1);
|
||
+ TEST_COMPARE (errno, E2BIG);
|
||
+ TEST_COMPARE (inleft, 3);
|
||
+ TEST_COMPARE (inbuf - in, strlen (in) - 3);
|
||
+ TEST_COMPARE (outleft, sizeof (out) - 2);
|
||
+ TEST_COMPARE (outbuf - out, 2);
|
||
+ TEST_COMPARE (out[0] & 0xff, 0xc3);
|
||
+ TEST_COMPARE (out[1] & 0xff, 0xa6);
|
||
+
|
||
+ /* Return to the initial shift state, producing the pending
|
||
+ character. */
|
||
+ outbuf = out;
|
||
+ outleft = sizeof (out);
|
||
+ TEST_COMPARE (iconv (c, &inbuf, &inleft, &outbuf, &outleft), 0);
|
||
+ TEST_COMPARE (inleft, 0);
|
||
+ TEST_COMPARE (inbuf - in, strlen (in));
|
||
+ TEST_COMPARE (outleft, sizeof (out) - 2);
|
||
+ TEST_COMPARE (outbuf - out, 2);
|
||
+ TEST_COMPARE (out[0] & 0xff, 0xcc);
|
||
+ TEST_COMPARE (out[1] & 0xff, 0x80);
|
||
+
|
||
+ /* Nothing should be flushed the second time. */
|
||
+ outbuf = out;
|
||
+ outleft = sizeof (out);
|
||
+ TEST_COMPARE (iconv (c, NULL, 0, &outbuf, &outleft), 0);
|
||
+ TEST_COMPARE (outleft, sizeof (out));
|
||
+ TEST_COMPARE (outbuf - out, 0);
|
||
+ TEST_COMPARE (out[0] & 0xff, 0xcc);
|
||
+ TEST_COMPARE (out[1] & 0xff, 0x80);
|
||
+
|
||
+ TEST_COMPARE (iconv_close (c), 0);
|
||
+}
|
||
+
|
||
+/* Use an explicit flush to return to the initial state. */
|
||
+static void
|
||
+with_flush (void)
|
||
+{
|
||
+ iconv_t c = iconv_open ("UTF-8", "ISO-2022-JP-3");
|
||
+ TEST_VERIFY_EXIT (c != (iconv_t) -1);
|
||
+
|
||
+ char in[] = "\e$(O+D";
|
||
+ char *inbuf = in;
|
||
+ size_t inleft = strlen (in);
|
||
+ char out[3]; /* Space for one output character. */
|
||
+ char *outbuf;
|
||
+ size_t outleft;
|
||
+
|
||
+ outbuf = out;
|
||
+ outleft = sizeof (out);
|
||
+ TEST_COMPARE (iconv (c, &inbuf, &inleft, &outbuf, &outleft), (size_t) -1);
|
||
+ TEST_COMPARE (errno, E2BIG);
|
||
+ TEST_COMPARE (inleft, 0);
|
||
+ TEST_COMPARE (inbuf - in, strlen (in));
|
||
+ TEST_COMPARE (outleft, sizeof (out) - 2);
|
||
+ TEST_COMPARE (outbuf - out, 2);
|
||
+ TEST_COMPARE (out[0] & 0xff, 0xc3);
|
||
+ TEST_COMPARE (out[1] & 0xff, 0xa6);
|
||
+
|
||
+ /* Flush the pending character. */
|
||
+ outbuf = out;
|
||
+ outleft = sizeof (out);
|
||
+ TEST_COMPARE (iconv (c, NULL, 0, &outbuf, &outleft), 0);
|
||
+ TEST_COMPARE (outleft, sizeof (out) - 2);
|
||
+ TEST_COMPARE (outbuf - out, 2);
|
||
+ TEST_COMPARE (out[0] & 0xff, 0xcc);
|
||
+ TEST_COMPARE (out[1] & 0xff, 0x80);
|
||
+
|
||
+ /* Nothing should be flushed the second time. */
|
||
+ outbuf = out;
|
||
+ outleft = sizeof (out);
|
||
+ TEST_COMPARE (iconv (c, NULL, 0, &outbuf, &outleft), 0);
|
||
+ TEST_COMPARE (outleft, sizeof (out));
|
||
+ TEST_COMPARE (outbuf - out, 0);
|
||
+ TEST_COMPARE (out[0] & 0xff, 0xcc);
|
||
+ TEST_COMPARE (out[1] & 0xff, 0x80);
|
||
+
|
||
+ TEST_COMPARE (iconv_close (c), 0);
|
||
+}
|
||
+
|
||
+static int
|
||
+do_test (void)
|
||
+{
|
||
+ with_escape_sequence ();
|
||
+ with_flush ();
|
||
+ return 0;
|
||
+}
|
||
+
|
||
+#include <support/test-driver.c>
|
||
diff --git a/iconvdata/bug-iconv15.c b/iconvdata/bug-iconv15.c
|
||
new file mode 100644
|
||
index 0000000000..cc04bd0313
|
||
--- /dev/null
|
||
+++ b/iconvdata/bug-iconv15.c
|
||
@@ -0,0 +1,60 @@
|
||
+/* Bug 28524: Conversion from ISO-2022-JP-3 with iconv
|
||
+ may emit spurious NUL character on state reset.
|
||
+ Copyright (C) The GNU Toolchain Authors.
|
||
+ This file is part of the GNU C Library.
|
||
+
|
||
+ The GNU C Library is free software; you can redistribute it and/or
|
||
+ modify it under the terms of the GNU Lesser General Public
|
||
+ License as published by the Free Software Foundation; either
|
||
+ version 2.1 of the License, or (at your option) any later version.
|
||
+
|
||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
+ Lesser General Public License for more details.
|
||
+
|
||
+ You should have received a copy of the GNU Lesser General Public
|
||
+ License along with the GNU C Library; if not, see
|
||
+ <https://www.gnu.org/licenses/>. */
|
||
+
|
||
+#include <stddef.h>
|
||
+#include <iconv.h>
|
||
+#include <support/check.h>
|
||
+
|
||
+static int
|
||
+do_test (void)
|
||
+{
|
||
+ char in[] = "\x1b(I";
|
||
+ char *inbuf = in;
|
||
+ size_t inleft = sizeof (in) - 1;
|
||
+ char out[1];
|
||
+ char *outbuf = out;
|
||
+ size_t outleft = sizeof (out);
|
||
+ iconv_t cd;
|
||
+
|
||
+ cd = iconv_open ("UTF8", "ISO-2022-JP-3");
|
||
+ TEST_VERIFY_EXIT (cd != (iconv_t) -1);
|
||
+
|
||
+ /* First call to iconv should alter internal state.
|
||
+ Now, JISX0201_Kana_set is selected and
|
||
+ state value != ASCII_set. */
|
||
+ TEST_VERIFY (iconv (cd, &inbuf, &inleft, &outbuf, &outleft) != (size_t) -1);
|
||
+
|
||
+ /* No bytes should have been added to
|
||
+ the output buffer at this point. */
|
||
+ TEST_VERIFY (outbuf == out);
|
||
+ TEST_VERIFY (outleft == sizeof (out));
|
||
+
|
||
+ /* Second call shall emit spurious NUL character in unpatched glibc. */
|
||
+ TEST_VERIFY (iconv (cd, NULL, NULL, &outbuf, &outleft) != (size_t) -1);
|
||
+
|
||
+ /* No characters are expected to be produced. */
|
||
+ TEST_VERIFY (outbuf == out);
|
||
+ TEST_VERIFY (outleft == sizeof (out));
|
||
+
|
||
+ TEST_VERIFY_EXIT (iconv_close (cd) != -1);
|
||
+
|
||
+ return 0;
|
||
+}
|
||
+
|
||
+#include <support/test-driver.c>
|
||
diff --git a/iconvdata/euc-kr.c b/iconvdata/euc-kr.c
|
||
index b0d56cf3ee..1045bae926 100644
|
||
--- a/iconvdata/euc-kr.c
|
||
+++ b/iconvdata/euc-kr.c
|
||
@@ -80,11 +80,7 @@ euckr_from_ucs4 (uint32_t ch, unsigned char *cp)
|
||
\
|
||
if (ch <= 0x9f) \
|
||
++inptr; \
|
||
- /* 0xfe(->0x7e : row 94) and 0xc9(->0x59 : row 41) are \
|
||
- user-defined areas. */ \
|
||
- else if (__builtin_expect (ch == 0xa0, 0) \
|
||
- || __builtin_expect (ch > 0xfe, 0) \
|
||
- || __builtin_expect (ch == 0xc9, 0)) \
|
||
+ else if (__glibc_unlikely (ch == 0xa0)) \
|
||
{ \
|
||
/* This is illegal. */ \
|
||
STANDARD_FROM_LOOP_ERR_HANDLER (1); \
|
||
diff --git a/iconvdata/ibm1364.c b/iconvdata/ibm1364.c
|
||
index 49e7267ab4..521f0825b7 100644
|
||
--- a/iconvdata/ibm1364.c
|
||
+++ b/iconvdata/ibm1364.c
|
||
@@ -158,24 +158,14 @@ enum
|
||
\
|
||
if (__builtin_expect (ch, 0) == SO) \
|
||
{ \
|
||
- /* Shift OUT, change to DBCS converter. */ \
|
||
- if (curcs == db) \
|
||
- { \
|
||
- result = __GCONV_ILLEGAL_INPUT; \
|
||
- break; \
|
||
- } \
|
||
+ /* Shift OUT, change to DBCS converter (redundant escape okay). */ \
|
||
curcs = db; \
|
||
++inptr; \
|
||
continue; \
|
||
} \
|
||
if (__builtin_expect (ch, 0) == SI) \
|
||
{ \
|
||
- /* Shift IN, change to SBCS converter. */ \
|
||
- if (curcs == sb) \
|
||
- { \
|
||
- result = __GCONV_ILLEGAL_INPUT; \
|
||
- break; \
|
||
- } \
|
||
+ /* Shift IN, change to SBCS converter (redundant escape okay). */ \
|
||
curcs = sb; \
|
||
++inptr; \
|
||
continue; \
|
||
diff --git a/iconvdata/iso-2022-jp-3.c b/iconvdata/iso-2022-jp-3.c
|
||
index 8c3b7e627e..c7b470db61 100644
|
||
--- a/iconvdata/iso-2022-jp-3.c
|
||
+++ b/iconvdata/iso-2022-jp-3.c
|
||
@@ -1,5 +1,6 @@
|
||
/* Conversion module for ISO-2022-JP-3.
|
||
Copyright (C) 1998-2020 Free Software Foundation, Inc.
|
||
+ Copyright (C) The GNU Toolchain Authors.
|
||
This file is part of the GNU C Library.
|
||
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998,
|
||
and Bruno Haible <bruno@clisp.org>, 2002.
|
||
@@ -67,10 +68,15 @@ enum
|
||
CURRENT_SEL_MASK = 7 << 3
|
||
};
|
||
|
||
-/* During UCS-4 to ISO-2022-JP-3 conversion, the COUNT element of the state
|
||
- also contains the last two bytes to be output, shifted by 6 bits, and a
|
||
- one-bit indicator whether they must be preceded by the shift sequence,
|
||
- in bit 22. */
|
||
+/* During UCS-4 to ISO-2022-JP-3 conversion, the COUNT element of the
|
||
+ state also contains the last two bytes to be output, shifted by 6
|
||
+ bits, and a one-bit indicator whether they must be preceded by the
|
||
+ shift sequence, in bit 22. During ISO-2022-JP-3 to UCS-4
|
||
+ conversion, COUNT may also contain a non-zero pending wide
|
||
+ character, shifted by six bits. This happens for certain inputs in
|
||
+ JISX0213_1_2004_set and JISX0213_2_set if the second wide character
|
||
+ in a combining sequence cannot be written because the buffer is
|
||
+ full. */
|
||
|
||
/* Since this is a stateful encoding we have to provide code which resets
|
||
the output state to the initial state. This has to be done during the
|
||
@@ -80,10 +86,27 @@ enum
|
||
{ \
|
||
if (FROM_DIRECTION) \
|
||
{ \
|
||
- /* It's easy, we don't have to emit anything, we just reset the \
|
||
- state for the input. */ \
|
||
- data->__statep->__count &= 7; \
|
||
- data->__statep->__count |= ASCII_set; \
|
||
+ uint32_t ch = data->__statep->__count >> 6; \
|
||
+ \
|
||
+ if (__glibc_unlikely (ch != 0)) \
|
||
+ { \
|
||
+ if (__glibc_likely (outbuf + 4 <= outend)) \
|
||
+ { \
|
||
+ /* Write out the last character. */ \
|
||
+ put32u (outbuf, ch); \
|
||
+ outbuf += 4; \
|
||
+ data->__statep->__count &= 7; \
|
||
+ data->__statep->__count |= ASCII_set; \
|
||
+ } \
|
||
+ else \
|
||
+ /* We don't have enough room in the output buffer. */ \
|
||
+ status = __GCONV_FULL_OUTPUT; \
|
||
+ } \
|
||
+ else \
|
||
+ { \
|
||
+ data->__statep->__count &= 7; \
|
||
+ data->__statep->__count |= ASCII_set; \
|
||
+ } \
|
||
} \
|
||
else \
|
||
{ \
|
||
@@ -151,7 +174,21 @@ enum
|
||
#define LOOPFCT FROM_LOOP
|
||
#define BODY \
|
||
{ \
|
||
- uint32_t ch = *inptr; \
|
||
+ uint32_t ch; \
|
||
+ \
|
||
+ /* Output any pending character. */ \
|
||
+ ch = set >> 6; \
|
||
+ if (__glibc_unlikely (ch != 0)) \
|
||
+ { \
|
||
+ put32 (outptr, ch); \
|
||
+ outptr += 4; \
|
||
+ /* Remove the pending character, but preserve state bits. */ \
|
||
+ set &= (1 << 6) - 1; \
|
||
+ continue; \
|
||
+ } \
|
||
+ \
|
||
+ /* Otherwise read the next input byte. */ \
|
||
+ ch = *inptr; \
|
||
\
|
||
/* Recognize escape sequences. */ \
|
||
if (__glibc_unlikely (ch == ESC)) \
|
||
@@ -297,21 +334,25 @@ enum
|
||
uint32_t u1 = __jisx0213_to_ucs_combining[ch - 1][0]; \
|
||
uint32_t u2 = __jisx0213_to_ucs_combining[ch - 1][1]; \
|
||
\
|
||
+ inptr += 2; \
|
||
+ \
|
||
+ put32 (outptr, u1); \
|
||
+ outptr += 4; \
|
||
+ \
|
||
/* See whether we have room for two characters. */ \
|
||
- if (outptr + 8 <= outend) \
|
||
+ if (outptr + 4 <= outend) \
|
||
{ \
|
||
- inptr += 2; \
|
||
- put32 (outptr, u1); \
|
||
- outptr += 4; \
|
||
put32 (outptr, u2); \
|
||
outptr += 4; \
|
||
continue; \
|
||
} \
|
||
- else \
|
||
- { \
|
||
- result = __GCONV_FULL_OUTPUT; \
|
||
- break; \
|
||
- } \
|
||
+ \
|
||
+ /* Otherwise store only the first character now, and \
|
||
+ put the second one into the queue. */ \
|
||
+ set |= u2 << 6; \
|
||
+ /* Tell the caller why we terminate the loop. */ \
|
||
+ result = __GCONV_FULL_OUTPUT; \
|
||
+ break; \
|
||
} \
|
||
\
|
||
inptr += 2; \
|
||
diff --git a/iconvdata/ksc5601.h b/iconvdata/ksc5601.h
|
||
index d3eb3a4ff8..f5cdc72797 100644
|
||
--- a/iconvdata/ksc5601.h
|
||
+++ b/iconvdata/ksc5601.h
|
||
@@ -50,15 +50,15 @@ ksc5601_to_ucs4 (const unsigned char **s, size_t avail, unsigned char offset)
|
||
unsigned char ch2;
|
||
int idx;
|
||
|
||
+ if (avail < 2)
|
||
+ return 0;
|
||
+
|
||
/* row 94(0x7e) and row 41(0x49) are user-defined area in KS C 5601 */
|
||
|
||
if (ch < offset || (ch - offset) <= 0x20 || (ch - offset) >= 0x7e
|
||
|| (ch - offset) == 0x49)
|
||
return __UNKNOWN_10646_CHAR;
|
||
|
||
- if (avail < 2)
|
||
- return 0;
|
||
-
|
||
ch2 = (*s)[1];
|
||
if (ch2 < offset || (ch2 - offset) <= 0x20 || (ch2 - offset) >= 0x7f)
|
||
return __UNKNOWN_10646_CHAR;
|
||
diff --git a/intl/dcigettext.c b/intl/dcigettext.c
|
||
index 2e7c662bc7..bd332e71da 100644
|
||
--- a/intl/dcigettext.c
|
||
+++ b/intl/dcigettext.c
|
||
@@ -1120,15 +1120,18 @@ _nl_find_msg (struct loaded_l10nfile *domain_file,
|
||
|
||
# ifdef _LIBC
|
||
|
||
- struct gconv_spec conv_spec
|
||
- = { .fromcode = norm_add_slashes (charset, ""),
|
||
- .tocode = norm_add_slashes (outcharset, ""),
|
||
- /* We always want to use transliteration. */
|
||
- .translit = true,
|
||
- .ignore = false
|
||
- };
|
||
+ struct gconv_spec conv_spec;
|
||
+
|
||
+ __gconv_create_spec (&conv_spec, charset, outcharset);
|
||
+
|
||
+ /* We always want to use transliteration. */
|
||
+ conv_spec.translit = true;
|
||
+
|
||
int r = __gconv_open (&conv_spec, &convd->conv,
|
||
GCONV_AVOID_NOCONV);
|
||
+
|
||
+ __gconv_destroy_spec (&conv_spec);
|
||
+
|
||
if (__builtin_expect (r != __GCONV_OK, 0))
|
||
{
|
||
/* If the output encoding is the same there is
|
||
diff --git a/intl/tst-codeset.c b/intl/tst-codeset.c
|
||
index fd70432eca..e9f6e5e09f 100644
|
||
--- a/intl/tst-codeset.c
|
||
+++ b/intl/tst-codeset.c
|
||
@@ -22,13 +22,11 @@
|
||
#include <stdio.h>
|
||
#include <stdlib.h>
|
||
#include <string.h>
|
||
+#include <support/check.h>
|
||
|
||
static int
|
||
do_test (void)
|
||
{
|
||
- char *s;
|
||
- int result = 0;
|
||
-
|
||
unsetenv ("LANGUAGE");
|
||
unsetenv ("OUTPUT_CHARSET");
|
||
setlocale (LC_ALL, "de_DE.ISO-8859-1");
|
||
@@ -36,25 +34,21 @@ do_test (void)
|
||
bindtextdomain ("codeset", OBJPFX "domaindir");
|
||
|
||
/* Here we expect output in ISO-8859-1. */
|
||
- s = gettext ("cheese");
|
||
- if (strcmp (s, "K\344se"))
|
||
- {
|
||
- printf ("call 1 returned: %s\n", s);
|
||
- result = 1;
|
||
- }
|
||
+ TEST_COMPARE_STRING (gettext ("cheese"), "K\344se");
|
||
|
||
+ /* Here we expect output in UTF-8. */
|
||
bind_textdomain_codeset ("codeset", "UTF-8");
|
||
+ TEST_COMPARE_STRING (gettext ("cheese"), "K\303\244se");
|
||
|
||
- /* Here we expect output in UTF-8. */
|
||
- s = gettext ("cheese");
|
||
- if (strcmp (s, "K\303\244se"))
|
||
- {
|
||
- printf ("call 2 returned: %s\n", s);
|
||
- result = 1;
|
||
- }
|
||
-
|
||
- return result;
|
||
+ /* `a with umlaut' is transliterated to `ae'. */
|
||
+ bind_textdomain_codeset ("codeset", "ASCII//TRANSLIT");
|
||
+ TEST_COMPARE_STRING (gettext ("cheese"), "Kaese");
|
||
+
|
||
+ /* Transliteration also works by default even if not set. */
|
||
+ bind_textdomain_codeset ("codeset", "ASCII");
|
||
+ TEST_COMPARE_STRING (gettext ("cheese"), "Kaese");
|
||
+
|
||
+ return 0;
|
||
}
|
||
|
||
-#define TEST_FUNCTION do_test ()
|
||
-#include "../test-skeleton.c"
|
||
+#include <support/test-driver.c>
|
||
diff --git a/malloc/Makefile b/malloc/Makefile
|
||
index e22cbde22d..5093e8730e 100644
|
||
--- a/malloc/Makefile
|
||
+++ b/malloc/Makefile
|
||
@@ -62,6 +62,16 @@ endif
|
||
tests += $(tests-static)
|
||
test-srcs = tst-mtrace
|
||
|
||
+# These tests either are run with MALLOC_CHECK_=3 by default or do not work
|
||
+# with MALLOC_CHECK_=3 because they expect a specific failure.
|
||
+tests-exclude-mcheck = tst-mcheck tst-malloc-usable \
|
||
+ tst-interpose-nothread tst-interpose-static-nothread \
|
||
+ tst-interpose-static-thread tst-malloc-too-large \
|
||
+ tst-mxfast tst-safe-linking
|
||
+
|
||
+# Run all tests with MALLOC_CHECK_=3
|
||
+tests-mcheck = $(filter-out $(tests-exclude-mcheck),$(tests))
|
||
+
|
||
routines = malloc morecore mcheck mtrace obstack reallocarray \
|
||
scratch_buffer_grow scratch_buffer_grow_preserve \
|
||
scratch_buffer_set_array_size \
|
||
@@ -100,6 +110,11 @@ $(objpfx)tst-malloc-thread-exit: $(shared-thread-library)
|
||
$(objpfx)tst-malloc-thread-fail: $(shared-thread-library)
|
||
$(objpfx)tst-malloc-fork-deadlock: $(shared-thread-library)
|
||
$(objpfx)tst-malloc-stats-cancellation: $(shared-thread-library)
|
||
+$(objpfx)tst-malloc-backtrace-mcheck: $(shared-thread-library)
|
||
+$(objpfx)tst-malloc-thread-exit-mcheck: $(shared-thread-library)
|
||
+$(objpfx)tst-malloc-thread-fail-mcheck: $(shared-thread-library)
|
||
+$(objpfx)tst-malloc-fork-deadlock-mcheck: $(shared-thread-library)
|
||
+$(objpfx)tst-malloc-stats-cancellation-mcheck: $(shared-thread-library)
|
||
|
||
# Export the __malloc_initialize_hook variable to libc.so.
|
||
LDFLAGS-tst-mallocstate = -rdynamic
|
||
@@ -239,6 +254,8 @@ $(tests:%=$(objpfx)%.o): CPPFLAGS += -DTEST_NO_MALLOPT
|
||
$(objpfx)tst-interpose-nothread: $(objpfx)tst-interpose-aux-nothread.o
|
||
$(objpfx)tst-interpose-thread: \
|
||
$(objpfx)tst-interpose-aux-thread.o $(shared-thread-library)
|
||
+$(objpfx)tst-interpose-thread-mcheck: \
|
||
+ $(objpfx)tst-interpose-aux-thread.o $(shared-thread-library)
|
||
$(objpfx)tst-interpose-static-nothread: $(objpfx)tst-interpose-aux-nothread.o
|
||
$(objpfx)tst-interpose-static-thread: \
|
||
$(objpfx)tst-interpose-aux-thread.o $(static-thread-library)
|
||
@@ -256,3 +273,6 @@ $(objpfx)tst-dynarray-fail-mem.out: $(objpfx)tst-dynarray-fail.out
|
||
$(objpfx)tst-malloc-tcache-leak: $(shared-thread-library)
|
||
$(objpfx)tst-malloc_info: $(shared-thread-library)
|
||
$(objpfx)tst-mallocfork2: $(shared-thread-library)
|
||
+$(objpfx)tst-malloc-tcache-leak-mcheck: $(shared-thread-library)
|
||
+$(objpfx)tst-malloc_info-mcheck: $(shared-thread-library)
|
||
+$(objpfx)tst-mallocfork2-mcheck: $(shared-thread-library)
|
||
diff --git a/manual/tunables.texi b/manual/tunables.texi
|
||
index 23ef0d40e7..d72d7a5ec0 100644
|
||
--- a/manual/tunables.texi
|
||
+++ b/manual/tunables.texi
|
||
@@ -432,7 +432,11 @@ set shared cache size in bytes for use in memory and string routines.
|
||
|
||
@deftp Tunable glibc.cpu.x86_non_temporal_threshold
|
||
The @code{glibc.cpu.x86_non_temporal_threshold} tunable allows the user
|
||
-to set threshold in bytes for non temporal store.
|
||
+to set threshold in bytes for non temporal store. Non temporal stores
|
||
+give a hint to the hardware to move data directly to memory without
|
||
+displacing other data from the cache. This tunable is used by some
|
||
+platforms to determine when to use non temporal stores in operations
|
||
+like memmove and memcpy.
|
||
|
||
This tunable is specific to i386 and x86-64.
|
||
@end deftp
|
||
diff --git a/misc/sys/cdefs.h b/misc/sys/cdefs.h
|
||
index 19d9cc5cfe..38221d0b2a 100644
|
||
--- a/misc/sys/cdefs.h
|
||
+++ b/misc/sys/cdefs.h
|
||
@@ -124,13 +124,10 @@
|
||
#define __bos0(ptr) __builtin_object_size (ptr, 0)
|
||
|
||
#if __GNUC_PREREQ (4,3)
|
||
-# define __warndecl(name, msg) \
|
||
- extern void name (void) __attribute__((__warning__ (msg)))
|
||
# define __warnattr(msg) __attribute__((__warning__ (msg)))
|
||
# define __errordecl(name, msg) \
|
||
extern void name (void) __attribute__((__error__ (msg)))
|
||
#else
|
||
-# define __warndecl(name, msg) extern void name (void)
|
||
# define __warnattr(msg)
|
||
# define __errordecl(name, msg) extern void name (void)
|
||
#endif
|
||
diff --git a/nptl/pthread_create.c b/nptl/pthread_create.c
|
||
index 2cba3da38c..c217cda608 100644
|
||
--- a/nptl/pthread_create.c
|
||
+++ b/nptl/pthread_create.c
|
||
@@ -416,8 +416,6 @@ START_THREAD_DEFN
|
||
unwind_buf.priv.data.prev = NULL;
|
||
unwind_buf.priv.data.cleanup = NULL;
|
||
|
||
- __libc_signal_restore_set (&pd->sigmask);
|
||
-
|
||
/* Allow setxid from now onwards. */
|
||
if (__glibc_unlikely (atomic_exchange_acq (&pd->setxid_futex, 0) == -2))
|
||
futex_wake (&pd->setxid_futex, 1, FUTEX_PRIVATE);
|
||
@@ -427,6 +425,8 @@ START_THREAD_DEFN
|
||
/* Store the new cleanup handler info. */
|
||
THREAD_SETMEM (pd, cleanup_jmp_buf, &unwind_buf);
|
||
|
||
+ __libc_signal_restore_set (&pd->sigmask);
|
||
+
|
||
/* We are either in (a) or (b), and in either case we either own
|
||
PD already (2) or are about to own PD (1), and so our only
|
||
restriction would be that we can't free PD until we know we
|
||
diff --git a/nscd/netgroupcache.c b/nscd/netgroupcache.c
|
||
index 88c69d1e9c..381aa721ef 100644
|
||
--- a/nscd/netgroupcache.c
|
||
+++ b/nscd/netgroupcache.c
|
||
@@ -248,7 +248,7 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
|
||
: NULL);
|
||
ndomain = (ndomain ? newbuf + ndomaindiff
|
||
: NULL);
|
||
- buffer = newbuf;
|
||
+ *tofreep = buffer = newbuf;
|
||
}
|
||
|
||
nhost = memcpy (buffer + bufused,
|
||
@@ -319,7 +319,7 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
|
||
else if (status == NSS_STATUS_TRYAGAIN && e == ERANGE)
|
||
{
|
||
buflen *= 2;
|
||
- buffer = xrealloc (buffer, buflen);
|
||
+ *tofreep = buffer = xrealloc (buffer, buflen);
|
||
}
|
||
else if (status == NSS_STATUS_RETURN
|
||
|| status == NSS_STATUS_NOTFOUND
|
||
diff --git a/nss/tst-nss-files-hosts-long.root/etc/nsswitch.conf b/nss/tst-nss-files-hosts-long.root/etc/nsswitch.conf
|
||
new file mode 100644
|
||
index 0000000000..5b0c6a4199
|
||
--- /dev/null
|
||
+++ b/nss/tst-nss-files-hosts-long.root/etc/nsswitch.conf
|
||
@@ -0,0 +1 @@
|
||
+hosts: files
|
||
diff --git a/posix/bits/unistd.h b/posix/bits/unistd.h
|
||
index 725a83eb0d..7e5bb6fb1e 100644
|
||
--- a/posix/bits/unistd.h
|
||
+++ b/posix/bits/unistd.h
|
||
@@ -193,10 +193,9 @@ __NTH (readlinkat (int __fd, const char *__restrict __path,
|
||
#endif
|
||
|
||
extern char *__getcwd_chk (char *__buf, size_t __size, size_t __buflen)
|
||
- __THROW __wur __attr_access ((__write_only__, 1, 2));
|
||
+ __THROW __wur;
|
||
extern char *__REDIRECT_NTH (__getcwd_alias,
|
||
- (char *__buf, size_t __size), getcwd)
|
||
- __wur __attr_access ((__write_only__, 1, 2));
|
||
+ (char *__buf, size_t __size), getcwd) __wur;
|
||
extern char *__REDIRECT_NTH (__getcwd_chk_warn,
|
||
(char *__buf, size_t __size, size_t __buflen),
|
||
__getcwd_chk)
|
||
diff --git a/posix/unistd.h b/posix/unistd.h
|
||
index 32b8161619..acf9ee7e79 100644
|
||
--- a/posix/unistd.h
|
||
+++ b/posix/unistd.h
|
||
@@ -517,8 +517,7 @@ extern int fchdir (int __fd) __THROW __wur;
|
||
an array is allocated with `malloc'; the array is SIZE
|
||
bytes long, unless SIZE == 0, in which case it is as
|
||
big as necessary. */
|
||
-extern char *getcwd (char *__buf, size_t __size) __THROW __wur
|
||
- __attr_access ((__write_only__, 1, 2));
|
||
+extern char *getcwd (char *__buf, size_t __size) __THROW __wur;
|
||
|
||
#ifdef __USE_GNU
|
||
/* Return a malloc'd string containing the current directory name.
|
||
@@ -831,7 +830,7 @@ extern int symlinkat (const char *__from, int __tofd,
|
||
/* Like readlink but a relative PATH is interpreted relative to FD. */
|
||
extern ssize_t readlinkat (int __fd, const char *__restrict __path,
|
||
char *__restrict __buf, size_t __len)
|
||
- __THROW __nonnull ((2, 3)) __wur __attr_access ((__read_only__, 3, 4));
|
||
+ __THROW __nonnull ((2, 3)) __wur __attr_access ((__write_only__, 3, 4));
|
||
#endif
|
||
|
||
/* Remove the link NAME. */
|
||
diff --git a/posix/wordexp-test.c b/posix/wordexp-test.c
|
||
index ed1b22308e..cb3f989cba 100644
|
||
--- a/posix/wordexp-test.c
|
||
+++ b/posix/wordexp-test.c
|
||
@@ -183,6 +183,7 @@ struct test_case_struct
|
||
{ 0, NULL, "$var", 0, 0, { NULL, }, IFS },
|
||
{ 0, NULL, "\"\\n\"", 0, 1, { "\\n", }, IFS },
|
||
{ 0, NULL, "", 0, 0, { NULL, }, IFS },
|
||
+ { 0, NULL, "${1234567890123456789012}", 0, 0, { NULL, }, IFS },
|
||
|
||
/* Flags not already covered (testit() has special handling for these) */
|
||
{ 0, NULL, "one two", WRDE_DOOFFS, 2, { "one", "two", }, IFS },
|
||
diff --git a/posix/wordexp.c b/posix/wordexp.c
|
||
index e082d94895..56289503a1 100644
|
||
--- a/posix/wordexp.c
|
||
+++ b/posix/wordexp.c
|
||
@@ -1399,7 +1399,7 @@ envsubst:
|
||
/* Is it a numeric parameter? */
|
||
else if (isdigit (env[0]))
|
||
{
|
||
- int n = atoi (env);
|
||
+ unsigned long n = strtoul (env, NULL, 10);
|
||
|
||
if (n >= __libc_argc)
|
||
/* Substitute NULL. */
|
||
diff --git a/resolv/Makefile b/resolv/Makefile
|
||
index b61c0c3e0c..dbd8f8bf4f 100644
|
||
--- a/resolv/Makefile
|
||
+++ b/resolv/Makefile
|
||
@@ -61,6 +61,11 @@ tests += \
|
||
tst-resolv-search \
|
||
tst-resolv-trailing \
|
||
|
||
+# This test calls __res_context_send directly, which is not exported
|
||
+# from libresolv.
|
||
+tests-internal += tst-resolv-txnid-collision
|
||
+tests-static += tst-resolv-txnid-collision
|
||
+
|
||
# These tests need libdl.
|
||
ifeq (yes,$(build-shared))
|
||
tests += \
|
||
@@ -191,6 +196,8 @@ $(objpfx)tst-resolv-search: $(objpfx)libresolv.so $(shared-thread-library)
|
||
$(objpfx)tst-resolv-trailing: $(objpfx)libresolv.so $(shared-thread-library)
|
||
$(objpfx)tst-resolv-threads: \
|
||
$(libdl) $(objpfx)libresolv.so $(shared-thread-library)
|
||
+$(objpfx)tst-resolv-txnid-collision: $(objpfx)libresolv.a \
|
||
+ $(static-thread-library)
|
||
$(objpfx)tst-resolv-canonname: \
|
||
$(libdl) $(objpfx)libresolv.so $(shared-thread-library)
|
||
$(objpfx)tst-resolv-trustad: $(objpfx)libresolv.so $(shared-thread-library)
|
||
diff --git a/resolv/res_send.c b/resolv/res_send.c
|
||
index 7e5fec6646..70e5066031 100644
|
||
--- a/resolv/res_send.c
|
||
+++ b/resolv/res_send.c
|
||
@@ -1342,15 +1342,6 @@ send_dg(res_state statp,
|
||
*terrno = EMSGSIZE;
|
||
return close_and_return_error (statp, resplen2);
|
||
}
|
||
- if ((recvresp1 || hp->id != anhp->id)
|
||
- && (recvresp2 || hp2->id != anhp->id)) {
|
||
- /*
|
||
- * response from old query, ignore it.
|
||
- * XXX - potential security hazard could
|
||
- * be detected here.
|
||
- */
|
||
- goto wait;
|
||
- }
|
||
|
||
/* Paranoia check. Due to the connected UDP socket,
|
||
the kernel has already filtered invalid addresses
|
||
@@ -1360,15 +1351,24 @@ send_dg(res_state statp,
|
||
|
||
/* Check for the correct header layout and a matching
|
||
question. */
|
||
- if ((recvresp1 || !res_queriesmatch(buf, buf + buflen,
|
||
- *thisansp,
|
||
- *thisansp
|
||
- + *thisanssizp))
|
||
- && (recvresp2 || !res_queriesmatch(buf2, buf2 + buflen2,
|
||
- *thisansp,
|
||
- *thisansp
|
||
- + *thisanssizp)))
|
||
- goto wait;
|
||
+ int matching_query = 0; /* Default to no matching query. */
|
||
+ if (!recvresp1
|
||
+ && anhp->id == hp->id
|
||
+ && res_queriesmatch (buf, buf + buflen,
|
||
+ *thisansp, *thisansp + *thisanssizp))
|
||
+ matching_query = 1;
|
||
+ if (!recvresp2
|
||
+ && anhp->id == hp2->id
|
||
+ && res_queriesmatch (buf2, buf2 + buflen2,
|
||
+ *thisansp, *thisansp + *thisanssizp))
|
||
+ matching_query = 2;
|
||
+ if (matching_query == 0)
|
||
+ /* Spurious UDP packet. Drop it and continue
|
||
+ waiting. */
|
||
+ {
|
||
+ need_recompute = 1;
|
||
+ goto wait;
|
||
+ }
|
||
|
||
if (anhp->rcode == SERVFAIL ||
|
||
anhp->rcode == NOTIMP ||
|
||
@@ -1383,7 +1383,7 @@ send_dg(res_state statp,
|
||
/* No data from the first reply. */
|
||
resplen = 0;
|
||
/* We are waiting for a possible second reply. */
|
||
- if (hp->id == anhp->id)
|
||
+ if (matching_query == 1)
|
||
recvresp1 = 1;
|
||
else
|
||
recvresp2 = 1;
|
||
@@ -1414,7 +1414,7 @@ send_dg(res_state statp,
|
||
return (1);
|
||
}
|
||
/* Mark which reply we received. */
|
||
- if (recvresp1 == 0 && hp->id == anhp->id)
|
||
+ if (matching_query == 1)
|
||
recvresp1 = 1;
|
||
else
|
||
recvresp2 = 1;
|
||
diff --git a/resolv/tst-resolv-txnid-collision.c b/resolv/tst-resolv-txnid-collision.c
|
||
new file mode 100644
|
||
index 0000000000..189b76f126
|
||
--- /dev/null
|
||
+++ b/resolv/tst-resolv-txnid-collision.c
|
||
@@ -0,0 +1,334 @@
|
||
+/* Test parallel queries with transaction ID collisions.
|
||
+ Copyright (C) 2020 Free Software Foundation, Inc.
|
||
+ This file is part of the GNU C Library.
|
||
+
|
||
+ The GNU C Library is free software; you can redistribute it and/or
|
||
+ modify it under the terms of the GNU Lesser General Public
|
||
+ License as published by the Free Software Foundation; either
|
||
+ version 2.1 of the License, or (at your option) any later version.
|
||
+
|
||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
+ Lesser General Public License for more details.
|
||
+
|
||
+ You should have received a copy of the GNU Lesser General Public
|
||
+ License along with the GNU C Library; if not, see
|
||
+ <https://www.gnu.org/licenses/>. */
|
||
+
|
||
+#include <arpa/nameser.h>
|
||
+#include <array_length.h>
|
||
+#include <resolv-internal.h>
|
||
+#include <resolv_context.h>
|
||
+#include <stdbool.h>
|
||
+#include <stdio.h>
|
||
+#include <string.h>
|
||
+#include <support/check.h>
|
||
+#include <support/check_nss.h>
|
||
+#include <support/resolv_test.h>
|
||
+#include <support/support.h>
|
||
+#include <support/test-driver.h>
|
||
+
|
||
+/* Result of parsing a DNS question name.
|
||
+
|
||
+ A question name has the form reorder-N-M-rcode-C.example.net, where
|
||
+ N and M are either 0 and 1, corresponding to the reorder member,
|
||
+ and C is a number that will be stored in the rcode field.
|
||
+
|
||
+ Also see parse_qname below. */
|
||
+struct parsed_qname
|
||
+{
|
||
+ /* The DNS response code requested from the first server. The
|
||
+ second server always responds with RCODE zero. */
|
||
+ int rcode;
|
||
+
|
||
+ /* Indicates whether to perform reordering in the responses from the
|
||
+ respective server. */
|
||
+ bool reorder[2];
|
||
+};
|
||
+
|
||
+/* Fills *PARSED based on QNAME. */
|
||
+static void
|
||
+parse_qname (struct parsed_qname *parsed, const char *qname)
|
||
+{
|
||
+ int reorder0;
|
||
+ int reorder1;
|
||
+ int rcode;
|
||
+ char *suffix;
|
||
+ if (sscanf (qname, "reorder-%d-%d.rcode-%d.%ms",
|
||
+ &reorder0, &reorder1, &rcode, &suffix) == 4)
|
||
+ {
|
||
+ if (reorder0 != 0)
|
||
+ TEST_COMPARE (reorder0, 1);
|
||
+ if (reorder1 != 0)
|
||
+ TEST_COMPARE (reorder1, 1);
|
||
+ TEST_VERIFY (rcode >= 0 && rcode <= 15);
|
||
+ TEST_COMPARE_STRING (suffix, "example.net");
|
||
+ free (suffix);
|
||
+
|
||
+ parsed->rcode = rcode;
|
||
+ parsed->reorder[0] = reorder0;
|
||
+ parsed->reorder[1] = reorder1;
|
||
+ }
|
||
+ else
|
||
+ FAIL_EXIT1 ("unexpected query: %s", qname);
|
||
+}
|
||
+
|
||
+/* Used to construct a response. The first server responds with an
|
||
+ error, the second server succeeds. */
|
||
+static void
|
||
+build_response (const struct resolv_response_context *ctx,
|
||
+ struct resolv_response_builder *b,
|
||
+ const char *qname, uint16_t qclass, uint16_t qtype)
|
||
+{
|
||
+ struct parsed_qname parsed;
|
||
+ parse_qname (&parsed, qname);
|
||
+
|
||
+ switch (ctx->server_index)
|
||
+ {
|
||
+ case 0:
|
||
+ {
|
||
+ struct resolv_response_flags flags = { 0 };
|
||
+ if (parsed.rcode == 0)
|
||
+ /* Simulate a delegation in case a NODATA (RCODE zero)
|
||
+ response is requested. */
|
||
+ flags.clear_ra = true;
|
||
+ else
|
||
+ flags.rcode = parsed.rcode;
|
||
+
|
||
+ resolv_response_init (b, flags);
|
||
+ resolv_response_add_question (b, qname, qclass, qtype);
|
||
+ }
|
||
+ break;
|
||
+
|
||
+ case 1:
|
||
+ {
|
||
+ struct resolv_response_flags flags = { 0, };
|
||
+ resolv_response_init (b, flags);
|
||
+ resolv_response_add_question (b, qname, qclass, qtype);
|
||
+
|
||
+ resolv_response_section (b, ns_s_an);
|
||
+ resolv_response_open_record (b, qname, qclass, qtype, 0);
|
||
+ if (qtype == T_A)
|
||
+ {
|
||
+ char ipv4[4] = { 192, 0, 2, 1 };
|
||
+ resolv_response_add_data (b, &ipv4, sizeof (ipv4));
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ char ipv6[16]
|
||
+ = { 0x20, 0x01, 0xd, 0xb8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 };
|
||
+ resolv_response_add_data (b, &ipv6, sizeof (ipv6));
|
||
+ }
|
||
+ resolv_response_close_record (b);
|
||
+ }
|
||
+ break;
|
||
+ }
|
||
+}
|
||
+
|
||
+/* Used to reorder responses. */
|
||
+struct resolv_response_context *previous_query;
|
||
+
|
||
+/* Used to keep track of the queries received. */
|
||
+static int previous_server_index = -1;
|
||
+static uint16_t previous_qtype;
|
||
+
|
||
+/* For each server, buffer the first query and then send both answers
|
||
+ to the second query, reordered if requested. */
|
||
+static void
|
||
+response (const struct resolv_response_context *ctx,
|
||
+ struct resolv_response_builder *b,
|
||
+ const char *qname, uint16_t qclass, uint16_t qtype)
|
||
+{
|
||
+ TEST_VERIFY (qtype == T_A || qtype == T_AAAA);
|
||
+ if (ctx->server_index != 0)
|
||
+ TEST_COMPARE (ctx->server_index, 1);
|
||
+
|
||
+ struct parsed_qname parsed;
|
||
+ parse_qname (&parsed, qname);
|
||
+
|
||
+ if (previous_query == NULL)
|
||
+ {
|
||
+ /* No buffered query. Record this query and do not send a
|
||
+ response. */
|
||
+ TEST_COMPARE (previous_qtype, 0);
|
||
+ previous_query = resolv_response_context_duplicate (ctx);
|
||
+ previous_qtype = qtype;
|
||
+ resolv_response_drop (b);
|
||
+ previous_server_index = ctx->server_index;
|
||
+
|
||
+ if (test_verbose)
|
||
+ printf ("info: buffering first query for: %s\n", qname);
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ TEST_VERIFY (previous_query != 0);
|
||
+ TEST_COMPARE (ctx->server_index, previous_server_index);
|
||
+ TEST_VERIFY (previous_qtype != qtype); /* Not a duplicate. */
|
||
+
|
||
+ /* If reordering, send a response for this query explicitly, and
|
||
+ then skip the implicit send. */
|
||
+ if (parsed.reorder[ctx->server_index])
|
||
+ {
|
||
+ if (test_verbose)
|
||
+ printf ("info: sending reordered second response for: %s\n",
|
||
+ qname);
|
||
+ build_response (ctx, b, qname, qclass, qtype);
|
||
+ resolv_response_send_udp (ctx, b);
|
||
+ resolv_response_drop (b);
|
||
+ }
|
||
+
|
||
+ /* Build a response for the previous query and send it, thus
|
||
+ reordering the two responses. */
|
||
+ {
|
||
+ if (test_verbose)
|
||
+ printf ("info: sending first response for: %s\n", qname);
|
||
+ struct resolv_response_builder *btmp
|
||
+ = resolv_response_builder_allocate (previous_query->query_buffer,
|
||
+ previous_query->query_length);
|
||
+ build_response (ctx, btmp, qname, qclass, previous_qtype);
|
||
+ resolv_response_send_udp (ctx, btmp);
|
||
+ resolv_response_builder_free (btmp);
|
||
+ }
|
||
+
|
||
+ /* If not reordering, send the reply as usual. */
|
||
+ if (!parsed.reorder[ctx->server_index])
|
||
+ {
|
||
+ if (test_verbose)
|
||
+ printf ("info: sending non-reordered second response for: %s\n",
|
||
+ qname);
|
||
+ build_response (ctx, b, qname, qclass, qtype);
|
||
+ }
|
||
+
|
||
+ /* Unbuffer the response and prepare for the next query. */
|
||
+ resolv_response_context_free (previous_query);
|
||
+ previous_query = NULL;
|
||
+ previous_qtype = 0;
|
||
+ previous_server_index = -1;
|
||
+ }
|
||
+}
|
||
+
|
||
+/* Runs a query for QNAME and checks for the expected reply. See
|
||
+ struct parsed_qname for the expected format for QNAME. */
|
||
+static void
|
||
+test_qname (const char *qname, int rcode)
|
||
+{
|
||
+ struct resolv_context *ctx = __resolv_context_get ();
|
||
+ TEST_VERIFY_EXIT (ctx != NULL);
|
||
+
|
||
+ unsigned char q1[512];
|
||
+ int q1len = res_mkquery (QUERY, qname, C_IN, T_A, NULL, 0, NULL,
|
||
+ q1, sizeof (q1));
|
||
+ TEST_VERIFY_EXIT (q1len > 12);
|
||
+
|
||
+ unsigned char q2[512];
|
||
+ int q2len = res_mkquery (QUERY, qname, C_IN, T_AAAA, NULL, 0, NULL,
|
||
+ q2, sizeof (q2));
|
||
+ TEST_VERIFY_EXIT (q2len > 12);
|
||
+
|
||
+ /* Produce a transaction ID collision. */
|
||
+ memcpy (q2, q1, 2);
|
||
+
|
||
+ unsigned char ans1[512];
|
||
+ unsigned char *ans1p = ans1;
|
||
+ unsigned char *ans2p = NULL;
|
||
+ int nans2p = 0;
|
||
+ int resplen2 = 0;
|
||
+ int ans2p_malloced = 0;
|
||
+
|
||
+ /* Perform a parallel A/AAAA query. */
|
||
+ int resplen1 = __res_context_send (ctx, q1, q1len, q2, q2len,
|
||
+ ans1, sizeof (ans1), &ans1p,
|
||
+ &ans2p, &nans2p,
|
||
+ &resplen2, &ans2p_malloced);
|
||
+
|
||
+ TEST_VERIFY (resplen1 > 12);
|
||
+ TEST_VERIFY (resplen2 > 12);
|
||
+ if (resplen1 <= 12 || resplen2 <= 12)
|
||
+ return;
|
||
+
|
||
+ if (rcode == 1 || rcode == 3)
|
||
+ {
|
||
+ /* Format Error and Name Error responses does not trigger
|
||
+ switching to the next server. */
|
||
+ TEST_COMPARE (ans1p[3] & 0x0f, rcode);
|
||
+ TEST_COMPARE (ans2p[3] & 0x0f, rcode);
|
||
+ return;
|
||
+ }
|
||
+
|
||
+ /* The response should be successful. */
|
||
+ TEST_COMPARE (ans1p[3] & 0x0f, 0);
|
||
+ TEST_COMPARE (ans2p[3] & 0x0f, 0);
|
||
+
|
||
+ /* Due to bug 19691, the answer may not be in the slot matching the
|
||
+ query. Assume that the AAAA response is the longer one. */
|
||
+ unsigned char *a_answer;
|
||
+ int a_answer_length;
|
||
+ unsigned char *aaaa_answer;
|
||
+ int aaaa_answer_length;
|
||
+ if (resplen2 > resplen1)
|
||
+ {
|
||
+ a_answer = ans1p;
|
||
+ a_answer_length = resplen1;
|
||
+ aaaa_answer = ans2p;
|
||
+ aaaa_answer_length = resplen2;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ a_answer = ans2p;
|
||
+ a_answer_length = resplen2;
|
||
+ aaaa_answer = ans1p;
|
||
+ aaaa_answer_length = resplen1;
|
||
+ }
|
||
+
|
||
+ {
|
||
+ char *expected = xasprintf ("name: %s\n"
|
||
+ "address: 192.0.2.1\n",
|
||
+ qname);
|
||
+ check_dns_packet (qname, a_answer, a_answer_length, expected);
|
||
+ free (expected);
|
||
+ }
|
||
+ {
|
||
+ char *expected = xasprintf ("name: %s\n"
|
||
+ "address: 2001:db8::1\n",
|
||
+ qname);
|
||
+ check_dns_packet (qname, aaaa_answer, aaaa_answer_length, expected);
|
||
+ free (expected);
|
||
+ }
|
||
+
|
||
+ if (ans2p_malloced)
|
||
+ free (ans2p);
|
||
+
|
||
+ __resolv_context_put (ctx);
|
||
+}
|
||
+
|
||
+static int
|
||
+do_test (void)
|
||
+{
|
||
+ struct resolv_test *aux = resolv_test_start
|
||
+ ((struct resolv_redirect_config)
|
||
+ {
|
||
+ .response_callback = response,
|
||
+
|
||
+ /* The response callback use global state (the previous_*
|
||
+ variables), and query processing must therefore be
|
||
+ serialized. */
|
||
+ .single_thread_udp = true,
|
||
+ });
|
||
+
|
||
+ for (int rcode = 0; rcode <= 5; ++rcode)
|
||
+ for (int do_reorder_0 = 0; do_reorder_0 < 2; ++do_reorder_0)
|
||
+ for (int do_reorder_1 = 0; do_reorder_1 < 2; ++do_reorder_1)
|
||
+ {
|
||
+ char *qname = xasprintf ("reorder-%d-%d.rcode-%d.example.net",
|
||
+ do_reorder_0, do_reorder_1, rcode);
|
||
+ test_qname (qname, rcode);
|
||
+ free (qname);
|
||
+ }
|
||
+
|
||
+ resolv_test_end (aux);
|
||
+
|
||
+ return 0;
|
||
+}
|
||
+
|
||
+#include <support/test-driver.c>
|
||
diff --git a/rt/Makefile b/rt/Makefile
|
||
index dab5d62a57..93502cfaa7 100644
|
||
--- a/rt/Makefile
|
||
+++ b/rt/Makefile
|
||
@@ -44,6 +44,7 @@ tests := tst-shm tst-timer tst-timer2 \
|
||
tst-aio7 tst-aio8 tst-aio9 tst-aio10 \
|
||
tst-mqueue1 tst-mqueue2 tst-mqueue3 tst-mqueue4 \
|
||
tst-mqueue5 tst-mqueue6 tst-mqueue7 tst-mqueue8 tst-mqueue9 \
|
||
+ tst-bz28213 \
|
||
tst-timer3 tst-timer4 tst-timer5 \
|
||
tst-cpuclock2 tst-cputimer1 tst-cputimer2 tst-cputimer3 \
|
||
tst-shm-cancel
|
||
diff --git a/rt/tst-bz28213.c b/rt/tst-bz28213.c
|
||
new file mode 100644
|
||
index 0000000000..0c096b5a0a
|
||
--- /dev/null
|
||
+++ b/rt/tst-bz28213.c
|
||
@@ -0,0 +1,101 @@
|
||
+/* Bug 28213: test for NULL pointer dereference in mq_notify.
|
||
+ Copyright (C) The GNU Toolchain Authors.
|
||
+ This file is part of the GNU C Library.
|
||
+
|
||
+ The GNU C Library is free software; you can redistribute it and/or
|
||
+ modify it under the terms of the GNU Lesser General Public
|
||
+ License as published by the Free Software Foundation; either
|
||
+ version 2.1 of the License, or (at your option) any later version.
|
||
+
|
||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
+ Lesser General Public License for more details.
|
||
+
|
||
+ You should have received a copy of the GNU Lesser General Public
|
||
+ License along with the GNU C Library; if not, see
|
||
+ <https://www.gnu.org/licenses/>. */
|
||
+
|
||
+#include <errno.h>
|
||
+#include <sys/types.h>
|
||
+#include <sys/stat.h>
|
||
+#include <fcntl.h>
|
||
+#include <unistd.h>
|
||
+#include <mqueue.h>
|
||
+#include <signal.h>
|
||
+#include <stdlib.h>
|
||
+#include <string.h>
|
||
+#include <support/check.h>
|
||
+
|
||
+static mqd_t m = -1;
|
||
+static const char msg[] = "hello";
|
||
+
|
||
+static void
|
||
+check_bz28213_cb (union sigval sv)
|
||
+{
|
||
+ char buf[sizeof (msg)];
|
||
+
|
||
+ (void) sv;
|
||
+
|
||
+ TEST_VERIFY_EXIT ((size_t) mq_receive (m, buf, sizeof (buf), NULL)
|
||
+ == sizeof (buf));
|
||
+ TEST_VERIFY_EXIT (memcmp (buf, msg, sizeof (buf)) == 0);
|
||
+
|
||
+ exit (0);
|
||
+}
|
||
+
|
||
+static void
|
||
+check_bz28213 (void)
|
||
+{
|
||
+ struct sigevent sev;
|
||
+
|
||
+ memset (&sev, '\0', sizeof (sev));
|
||
+ sev.sigev_notify = SIGEV_THREAD;
|
||
+ sev.sigev_notify_function = check_bz28213_cb;
|
||
+
|
||
+ /* Step 1: Register & unregister notifier.
|
||
+ Helper thread should receive NOTIFY_REMOVED notification.
|
||
+ In a vulnerable version of glibc, NULL pointer dereference follows. */
|
||
+ TEST_VERIFY_EXIT (mq_notify (m, &sev) == 0);
|
||
+ TEST_VERIFY_EXIT (mq_notify (m, NULL) == 0);
|
||
+
|
||
+ /* Step 2: Once again, register notification.
|
||
+ Try to send one message.
|
||
+ Test is considered successful, if the callback does exit (0). */
|
||
+ TEST_VERIFY_EXIT (mq_notify (m, &sev) == 0);
|
||
+ TEST_VERIFY_EXIT (mq_send (m, msg, sizeof (msg), 1) == 0);
|
||
+
|
||
+ /* Wait... */
|
||
+ pause ();
|
||
+}
|
||
+
|
||
+static int
|
||
+do_test (void)
|
||
+{
|
||
+ static const char m_name[] = "/bz28213_queue";
|
||
+ struct mq_attr m_attr;
|
||
+
|
||
+ memset (&m_attr, '\0', sizeof (m_attr));
|
||
+ m_attr.mq_maxmsg = 1;
|
||
+ m_attr.mq_msgsize = sizeof (msg);
|
||
+
|
||
+ m = mq_open (m_name,
|
||
+ O_RDWR | O_CREAT | O_EXCL,
|
||
+ 0600,
|
||
+ &m_attr);
|
||
+
|
||
+ if (m < 0)
|
||
+ {
|
||
+ if (errno == ENOSYS)
|
||
+ FAIL_UNSUPPORTED ("POSIX message queues are not implemented\n");
|
||
+ FAIL_EXIT1 ("Failed to create POSIX message queue: %m\n");
|
||
+ }
|
||
+
|
||
+ TEST_VERIFY_EXIT (mq_unlink (m_name) == 0);
|
||
+
|
||
+ check_bz28213 ();
|
||
+
|
||
+ return 0;
|
||
+}
|
||
+
|
||
+#include <support/test-driver.c>
|
||
diff --git a/stdio-common/Makefile b/stdio-common/Makefile
|
||
index 8475fd1f09..eff0c98d82 100644
|
||
--- a/stdio-common/Makefile
|
||
+++ b/stdio-common/Makefile
|
||
@@ -69,7 +69,8 @@ tests := tstscanf test_rdwr test-popen tstgetln test-fseek \
|
||
tst-printf-bz25691 \
|
||
tst-vfprintf-width-prec-alloc \
|
||
tst-printf-fp-free \
|
||
- tst-printf-fp-leak
|
||
+ tst-printf-fp-leak \
|
||
+ test-strerr
|
||
|
||
|
||
test-srcs = tst-unbputc tst-printf tst-printfsz-islongdouble
|
||
diff --git a/stdio-common/errlist.c b/stdio-common/errlist.c
|
||
index d15f13a22a..2ecf121674 100644
|
||
--- a/stdio-common/errlist.c
|
||
+++ b/stdio-common/errlist.c
|
||
@@ -20,9 +20,13 @@
|
||
#include <libintl.h>
|
||
#include <array_length.h>
|
||
|
||
+#ifndef ERR_MAP
|
||
+# define ERR_MAP(n) n
|
||
+#endif
|
||
+
|
||
const char *const _sys_errlist_internal[] =
|
||
{
|
||
-#define _S(n, str) [n] = str,
|
||
+#define _S(n, str) [ERR_MAP(n)] = str,
|
||
#include <errlist.h>
|
||
#undef _S
|
||
};
|
||
@@ -41,20 +45,21 @@ static const union sys_errname_t
|
||
{
|
||
#define MSGSTRFIELD1(line) str##line
|
||
#define MSGSTRFIELD(line) MSGSTRFIELD1(line)
|
||
-#define _S(n, str) char MSGSTRFIELD(__LINE__)[sizeof(str)];
|
||
+#define _S(n, str) char MSGSTRFIELD(__LINE__)[sizeof(#n)];
|
||
#include <errlist.h>
|
||
#undef _S
|
||
};
|
||
char str[0];
|
||
} _sys_errname = { {
|
||
-#define _S(n, s) s,
|
||
+#define _S(n, s) #n,
|
||
#include <errlist.h>
|
||
#undef _S
|
||
} };
|
||
|
||
static const unsigned short _sys_errnameidx[] =
|
||
{
|
||
-#define _S(n, s) [n] = offsetof(union sys_errname_t, MSGSTRFIELD(__LINE__)),
|
||
+#define _S(n, s) \
|
||
+ [ERR_MAP(n)] = offsetof(union sys_errname_t, MSGSTRFIELD(__LINE__)),
|
||
#include <errlist.h>
|
||
#undef _S
|
||
};
|
||
diff --git a/stdio-common/test-strerr.c b/stdio-common/test-strerr.c
|
||
index fded208118..d77b81d507 100644
|
||
--- a/stdio-common/test-strerr.c
|
||
+++ b/stdio-common/test-strerr.c
|
||
@@ -18,46 +18,672 @@
|
||
|
||
#include <string.h>
|
||
#include <errno.h>
|
||
-#include <array_length.h>
|
||
|
||
#include <support/support.h>
|
||
#include <support/check.h>
|
||
|
||
-#define N_(name) name
|
||
-
|
||
-static const char *const errlist[] =
|
||
- {
|
||
-/* This file is auto-generated from errlist.def. */
|
||
-#include <errlist.h>
|
||
- };
|
||
-
|
||
-#define MSGSTR_T errname_t
|
||
-#define MSGSTR errname
|
||
-#define MSGIDX errnameidx
|
||
-#include <errlist-name.h>
|
||
-#undef MSGSTR
|
||
-#undef MSGIDX
|
||
-
|
||
static int
|
||
do_test (void)
|
||
{
|
||
- TEST_VERIFY (strerrordesc_np (-1) == NULL);
|
||
- TEST_VERIFY (strerrordesc_np (array_length (errlist)) == NULL);
|
||
- for (size_t i = 0; i < array_length (errlist); i++)
|
||
- {
|
||
- if (errlist[i] == NULL)
|
||
- continue;
|
||
- TEST_COMPARE_STRING (strerrordesc_np (i), errlist[i]);
|
||
- }
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (0), "Success");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (0), "0");
|
||
|
||
- TEST_VERIFY (strerrorname_np (-1) == NULL);
|
||
- TEST_VERIFY (strerrorname_np (array_length (errlist)) == NULL);
|
||
- for (size_t i = 0; i < array_length (errlist); i++)
|
||
- {
|
||
- if (errlist[i] == NULL)
|
||
- continue;
|
||
- TEST_COMPARE_STRING (strerrorname_np (i), errname.str + errnameidx[i]);
|
||
- }
|
||
+#ifdef EPERM
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EPERM), "Operation not permitted");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EPERM), "EPERM");
|
||
+#endif
|
||
+#ifdef ENOENT
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ENOENT),
|
||
+ "No such file or directory");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ENOENT), "ENOENT");
|
||
+#endif
|
||
+#ifdef ESRCH
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ESRCH), "No such process");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ESRCH), "ESRCH");
|
||
+#endif
|
||
+#ifdef EINTR
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EINTR), "Interrupted system call");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EINTR), "EINTR");
|
||
+#endif
|
||
+#ifdef EIO
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EIO), "Input/output error");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EIO), "EIO");
|
||
+#endif
|
||
+#ifdef ENXIO
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ENXIO), "No such device or address");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ENXIO), "ENXIO");
|
||
+#endif
|
||
+#ifdef E2BIG
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (E2BIG), "Argument list too long");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (E2BIG), "E2BIG");
|
||
+#endif
|
||
+#ifdef ENOEXEC
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ENOEXEC), "Exec format error");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ENOEXEC), "ENOEXEC");
|
||
+#endif
|
||
+#ifdef EBADF
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EBADF), "Bad file descriptor");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EBADF), "EBADF");
|
||
+#endif
|
||
+#ifdef ECHILD
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ECHILD), "No child processes");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ECHILD), "ECHILD");
|
||
+#endif
|
||
+#ifdef EDEADLK
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EDEADLK),
|
||
+ "Resource deadlock avoided");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EDEADLK), "EDEADLK");
|
||
+#endif
|
||
+#ifdef ENOMEM
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ENOMEM), "Cannot allocate memory");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ENOMEM), "ENOMEM");
|
||
+#endif
|
||
+#ifdef EACCES
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EACCES), "Permission denied");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EACCES), "EACCES");
|
||
+#endif
|
||
+#ifdef EFAULT
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EFAULT), "Bad address");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EFAULT), "EFAULT");
|
||
+#endif
|
||
+#ifdef ENOTBLK
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ENOTBLK), "Block device required");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ENOTBLK), "ENOTBLK");
|
||
+#endif
|
||
+#ifdef EBUSY
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EBUSY), "Device or resource busy");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EBUSY), "EBUSY");
|
||
+#endif
|
||
+#ifdef EEXIST
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EEXIST), "File exists");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EEXIST), "EEXIST");
|
||
+#endif
|
||
+#ifdef EXDEV
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EXDEV), "Invalid cross-device link");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EXDEV), "EXDEV");
|
||
+#endif
|
||
+#ifdef ENODEV
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ENODEV), "No such device");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ENODEV), "ENODEV");
|
||
+#endif
|
||
+#ifdef ENOTDIR
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ENOTDIR), "Not a directory");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ENOTDIR), "ENOTDIR");
|
||
+#endif
|
||
+#ifdef EISDIR
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EISDIR), "Is a directory");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EISDIR), "EISDIR");
|
||
+#endif
|
||
+#ifdef EINVAL
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EINVAL), "Invalid argument");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EINVAL), "EINVAL");
|
||
+#endif
|
||
+#ifdef EMFILE
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EMFILE), "Too many open files");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EMFILE), "EMFILE");
|
||
+#endif
|
||
+#ifdef ENFILE
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ENFILE),
|
||
+ "Too many open files in system");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ENFILE), "ENFILE");
|
||
+#endif
|
||
+#ifdef ENOTTY
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ENOTTY),
|
||
+ "Inappropriate ioctl for device");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ENOTTY), "ENOTTY");
|
||
+#endif
|
||
+#ifdef ETXTBSY
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ETXTBSY), "Text file busy");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ETXTBSY), "ETXTBSY");
|
||
+#endif
|
||
+#ifdef EFBIG
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EFBIG), "File too large");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EFBIG), "EFBIG");
|
||
+#endif
|
||
+#ifdef ENOSPC
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ENOSPC), "No space left on device");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ENOSPC), "ENOSPC");
|
||
+#endif
|
||
+#ifdef ESPIPE
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ESPIPE), "Illegal seek");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ESPIPE), "ESPIPE");
|
||
+#endif
|
||
+#ifdef EROFS
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EROFS), "Read-only file system");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EROFS), "EROFS");
|
||
+#endif
|
||
+#ifdef EMLINK
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EMLINK), "Too many links");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EMLINK), "EMLINK");
|
||
+#endif
|
||
+#ifdef EPIPE
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EPIPE), "Broken pipe");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EPIPE), "EPIPE");
|
||
+#endif
|
||
+#ifdef EDOM
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EDOM),
|
||
+ "Numerical argument out of domain");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EDOM), "EDOM");
|
||
+#endif
|
||
+#ifdef ERANGE
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ERANGE),
|
||
+ "Numerical result out of range");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ERANGE), "ERANGE");
|
||
+#endif
|
||
+#ifdef EAGAIN
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EAGAIN),
|
||
+ "Resource temporarily unavailable");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EAGAIN), "EAGAIN");
|
||
+#endif
|
||
+#ifdef EINPROGRESS
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EINPROGRESS),
|
||
+ "Operation now in progress");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EINPROGRESS), "EINPROGRESS");
|
||
+#endif
|
||
+#ifdef EALREADY
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EALREADY),
|
||
+ "Operation already in progress");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EALREADY), "EALREADY");
|
||
+#endif
|
||
+#ifdef ENOTSOCK
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ENOTSOCK),
|
||
+ "Socket operation on non-socket");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ENOTSOCK), "ENOTSOCK");
|
||
+#endif
|
||
+#ifdef EMSGSIZE
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EMSGSIZE), "Message too long");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EMSGSIZE), "EMSGSIZE");
|
||
+#endif
|
||
+#ifdef EPROTOTYPE
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EPROTOTYPE),
|
||
+ "Protocol wrong type for socket");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EPROTOTYPE), "EPROTOTYPE");
|
||
+#endif
|
||
+#ifdef ENOPROTOOPT
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ENOPROTOOPT),
|
||
+ "Protocol not available");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ENOPROTOOPT), "ENOPROTOOPT");
|
||
+#endif
|
||
+#ifdef EPROTONOSUPPORT
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EPROTONOSUPPORT),
|
||
+ "Protocol not supported");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EPROTONOSUPPORT), "EPROTONOSUPPORT");
|
||
+#endif
|
||
+#ifdef ESOCKTNOSUPPORT
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ESOCKTNOSUPPORT),
|
||
+ "Socket type not supported");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ESOCKTNOSUPPORT), "ESOCKTNOSUPPORT");
|
||
+#endif
|
||
+#ifdef EOPNOTSUPP
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EOPNOTSUPP),
|
||
+ "Operation not supported");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EOPNOTSUPP), "EOPNOTSUPP");
|
||
+#endif
|
||
+#ifdef EPFNOSUPPORT
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EPFNOSUPPORT),
|
||
+ "Protocol family not supported");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EPFNOSUPPORT), "EPFNOSUPPORT");
|
||
+#endif
|
||
+#ifdef EAFNOSUPPORT
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EAFNOSUPPORT),
|
||
+ "Address family not supported by protocol");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EAFNOSUPPORT), "EAFNOSUPPORT");
|
||
+#endif
|
||
+#ifdef EADDRINUSE
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EADDRINUSE),
|
||
+ "Address already in use");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EADDRINUSE), "EADDRINUSE");
|
||
+#endif
|
||
+#ifdef EADDRNOTAVAIL
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EADDRNOTAVAIL),
|
||
+ "Cannot assign requested address");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EADDRNOTAVAIL), "EADDRNOTAVAIL");
|
||
+#endif
|
||
+#ifdef ENETDOWN
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ENETDOWN), "Network is down");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ENETDOWN), "ENETDOWN");
|
||
+#endif
|
||
+#ifdef ENETUNREACH
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ENETUNREACH),
|
||
+ "Network is unreachable");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ENETUNREACH), "ENETUNREACH");
|
||
+#endif
|
||
+#ifdef ENETRESET
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ENETRESET),
|
||
+ "Network dropped connection on reset");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ENETRESET), "ENETRESET");
|
||
+#endif
|
||
+#ifdef ECONNABORTED
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ECONNABORTED),
|
||
+ "Software caused connection abort");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ECONNABORTED), "ECONNABORTED");
|
||
+#endif
|
||
+#ifdef ECONNRESET
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ECONNRESET),
|
||
+ "Connection reset by peer");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ECONNRESET), "ECONNRESET");
|
||
+#endif
|
||
+#ifdef ENOBUFS
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ENOBUFS),
|
||
+ "No buffer space available");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ENOBUFS), "ENOBUFS");
|
||
+#endif
|
||
+#ifdef EISCONN
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EISCONN),
|
||
+ "Transport endpoint is already connected");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EISCONN), "EISCONN");
|
||
+#endif
|
||
+#ifdef ENOTCONN
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ENOTCONN),
|
||
+ "Transport endpoint is not connected");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ENOTCONN), "ENOTCONN");
|
||
+#endif
|
||
+#ifdef EDESTADDRREQ
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EDESTADDRREQ),
|
||
+ "Destination address required");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EDESTADDRREQ), "EDESTADDRREQ");
|
||
+#endif
|
||
+#ifdef ESHUTDOWN
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ESHUTDOWN),
|
||
+ "Cannot send after transport endpoint shutdown");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ESHUTDOWN), "ESHUTDOWN");
|
||
+#endif
|
||
+#ifdef ETOOMANYREFS
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ETOOMANYREFS),
|
||
+ "Too many references: cannot splice");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ETOOMANYREFS), "ETOOMANYREFS");
|
||
+#endif
|
||
+#ifdef ETIMEDOUT
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ETIMEDOUT), "Connection timed out");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ETIMEDOUT), "ETIMEDOUT");
|
||
+#endif
|
||
+#ifdef ECONNREFUSED
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ECONNREFUSED), "Connection refused");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ECONNREFUSED), "ECONNREFUSED");
|
||
+#endif
|
||
+#ifdef ELOOP
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ELOOP),
|
||
+ "Too many levels of symbolic links");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ELOOP), "ELOOP");
|
||
+#endif
|
||
+#ifdef ENAMETOOLONG
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ENAMETOOLONG), "File name too long");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ENAMETOOLONG), "ENAMETOOLONG");
|
||
+#endif
|
||
+#ifdef EHOSTDOWN
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EHOSTDOWN), "Host is down");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EHOSTDOWN), "EHOSTDOWN");
|
||
+#endif
|
||
+#ifdef EHOSTUNREACH
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EHOSTUNREACH), "No route to host");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EHOSTUNREACH), "EHOSTUNREACH");
|
||
+#endif
|
||
+#ifdef ENOTEMPTY
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ENOTEMPTY), "Directory not empty");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ENOTEMPTY), "ENOTEMPTY");
|
||
+#endif
|
||
+#ifdef EUSERS
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EUSERS), "Too many users");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EUSERS), "EUSERS");
|
||
+#endif
|
||
+#ifdef EDQUOT
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EDQUOT), "Disk quota exceeded");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EDQUOT), "EDQUOT");
|
||
+#endif
|
||
+#ifdef ESTALE
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ESTALE), "Stale file handle");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ESTALE), "ESTALE");
|
||
+#endif
|
||
+#ifdef EREMOTE
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EREMOTE), "Object is remote");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EREMOTE), "EREMOTE");
|
||
+#endif
|
||
+#ifdef ENOLCK
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ENOLCK), "No locks available");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ENOLCK), "ENOLCK");
|
||
+#endif
|
||
+#ifdef ENOSYS
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ENOSYS), "Function not implemented");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ENOSYS), "ENOSYS");
|
||
+#endif
|
||
+#ifdef EILSEQ
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EILSEQ),
|
||
+ "Invalid or incomplete multibyte or wide character");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EILSEQ), "EILSEQ");
|
||
+#endif
|
||
+#ifdef EBADMSG
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EBADMSG), "Bad message");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EBADMSG), "EBADMSG");
|
||
+#endif
|
||
+#ifdef EIDRM
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EIDRM), "Identifier removed");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EIDRM), "EIDRM");
|
||
+#endif
|
||
+#ifdef EMULTIHOP
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EMULTIHOP), "Multihop attempted");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EMULTIHOP), "EMULTIHOP");
|
||
+#endif
|
||
+#ifdef ENODATA
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ENODATA), "No data available");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ENODATA), "ENODATA");
|
||
+#endif
|
||
+#ifdef ENOLINK
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ENOLINK), "Link has been severed");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ENOLINK), "ENOLINK");
|
||
+#endif
|
||
+#ifdef ENOMSG
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ENOMSG),
|
||
+ "No message of desired type");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ENOMSG), "ENOMSG");
|
||
+#endif
|
||
+#ifdef ENOSR
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ENOSR), "Out of streams resources");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ENOSR), "ENOSR");
|
||
+#endif
|
||
+#ifdef ENOSTR
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ENOSTR), "Device not a stream");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ENOSTR), "ENOSTR");
|
||
+#endif
|
||
+#ifdef EOVERFLOW
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EOVERFLOW),
|
||
+ "Value too large for defined data type");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EOVERFLOW), "EOVERFLOW");
|
||
+#endif
|
||
+#ifdef EPROTO
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EPROTO), "Protocol error");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EPROTO), "EPROTO");
|
||
+#endif
|
||
+#ifdef ETIME
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ETIME), "Timer expired");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ETIME), "ETIME");
|
||
+#endif
|
||
+#ifdef ECANCELED
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ECANCELED), "Operation canceled");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ECANCELED), "ECANCELED");
|
||
+#endif
|
||
+#ifdef EOWNERDEAD
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EOWNERDEAD), "Owner died");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EOWNERDEAD), "EOWNERDEAD");
|
||
+#endif
|
||
+#ifdef ENOTRECOVERABLE
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ENOTRECOVERABLE),
|
||
+ "State not recoverable");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ENOTRECOVERABLE), "ENOTRECOVERABLE");
|
||
+#endif
|
||
+#ifdef ERESTART
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ERESTART),
|
||
+ "Interrupted system call should be restarted");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ERESTART), "ERESTART");
|
||
+#endif
|
||
+#ifdef ECHRNG
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ECHRNG),
|
||
+ "Channel number out of range");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ECHRNG), "ECHRNG");
|
||
+#endif
|
||
+#ifdef EL2NSYNC
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EL2NSYNC),
|
||
+ "Level 2 not synchronized");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EL2NSYNC), "EL2NSYNC");
|
||
+#endif
|
||
+#ifdef EL3HLT
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EL3HLT), "Level 3 halted");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EL3HLT), "EL3HLT");
|
||
+#endif
|
||
+#ifdef EL3RST
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EL3RST), "Level 3 reset");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EL3RST), "EL3RST");
|
||
+#endif
|
||
+#ifdef ELNRNG
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ELNRNG), "Link number out of range");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ELNRNG), "ELNRNG");
|
||
+#endif
|
||
+#ifdef EUNATCH
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EUNATCH),
|
||
+ "Protocol driver not attached");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EUNATCH), "EUNATCH");
|
||
+#endif
|
||
+#ifdef ENOCSI
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ENOCSI),
|
||
+ "No CSI structure available");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ENOCSI), "ENOCSI");
|
||
+#endif
|
||
+#ifdef EL2HLT
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EL2HLT), "Level 2 halted");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EL2HLT), "EL2HLT");
|
||
+#endif
|
||
+#ifdef EBADE
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EBADE), "Invalid exchange");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EBADE), "EBADE");
|
||
+#endif
|
||
+#ifdef EBADR
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EBADR),
|
||
+ "Invalid request descriptor");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EBADR), "EBADR");
|
||
+#endif
|
||
+#ifdef EXFULL
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EXFULL), "Exchange full");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EXFULL), "EXFULL");
|
||
+#endif
|
||
+#ifdef ENOANO
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ENOANO), "No anode");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ENOANO), "ENOANO");
|
||
+#endif
|
||
+#ifdef EBADRQC
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EBADRQC), "Invalid request code");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EBADRQC), "EBADRQC");
|
||
+#endif
|
||
+#ifdef EBADSLT
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EBADSLT), "Invalid slot");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EBADSLT), "EBADSLT");
|
||
+#endif
|
||
+#ifdef EBFONT
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EBFONT), "Bad font file format");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EBFONT), "EBFONT");
|
||
+#endif
|
||
+#ifdef ENONET
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ENONET),
|
||
+ "Machine is not on the network");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ENONET), "ENONET");
|
||
+#endif
|
||
+#ifdef ENOPKG
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ENOPKG), "Package not installed");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ENOPKG), "ENOPKG");
|
||
+#endif
|
||
+#ifdef EADV
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EADV), "Advertise error");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EADV), "EADV");
|
||
+#endif
|
||
+#ifdef ESRMNT
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ESRMNT), "Srmount error");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ESRMNT), "ESRMNT");
|
||
+#endif
|
||
+#ifdef ECOMM
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ECOMM),
|
||
+ "Communication error on send");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ECOMM), "ECOMM");
|
||
+#endif
|
||
+#ifdef EDOTDOT
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EDOTDOT), "RFS specific error");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EDOTDOT), "EDOTDOT");
|
||
+#endif
|
||
+#ifdef ENOTUNIQ
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ENOTUNIQ),
|
||
+ "Name not unique on network");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ENOTUNIQ), "ENOTUNIQ");
|
||
+#endif
|
||
+#ifdef EBADFD
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EBADFD),
|
||
+ "File descriptor in bad state");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EBADFD), "EBADFD");
|
||
+#endif
|
||
+#ifdef EREMCHG
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EREMCHG), "Remote address changed");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EREMCHG), "EREMCHG");
|
||
+#endif
|
||
+#ifdef ELIBACC
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ELIBACC),
|
||
+ "Can not access a needed shared library");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ELIBACC), "ELIBACC");
|
||
+#endif
|
||
+#ifdef ELIBBAD
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ELIBBAD),
|
||
+ "Accessing a corrupted shared library");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ELIBBAD), "ELIBBAD");
|
||
+#endif
|
||
+#ifdef ELIBSCN
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ELIBSCN),
|
||
+ ".lib section in a.out corrupted");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ELIBSCN), "ELIBSCN");
|
||
+#endif
|
||
+#ifdef ELIBMAX
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ELIBMAX),
|
||
+ "Attempting to link in too many shared libraries");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ELIBMAX), "ELIBMAX");
|
||
+#endif
|
||
+#ifdef ELIBEXEC
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ELIBEXEC),
|
||
+ "Cannot exec a shared library directly");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ELIBEXEC), "ELIBEXEC");
|
||
+#endif
|
||
+#ifdef ESTRPIPE
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ESTRPIPE), "Streams pipe error");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ESTRPIPE), "ESTRPIPE");
|
||
+#endif
|
||
+#ifdef EUCLEAN
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EUCLEAN),
|
||
+ "Structure needs cleaning");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EUCLEAN), "EUCLEAN");
|
||
+#endif
|
||
+#ifdef ENOTNAM
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ENOTNAM),
|
||
+ "Not a XENIX named type file");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ENOTNAM), "ENOTNAM");
|
||
+#endif
|
||
+#ifdef ENAVAIL
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ENAVAIL),
|
||
+ "No XENIX semaphores available");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ENAVAIL), "ENAVAIL");
|
||
+#endif
|
||
+#ifdef EISNAM
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EISNAM), "Is a named type file");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EISNAM), "EISNAM");
|
||
+#endif
|
||
+#ifdef EREMOTEIO
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EREMOTEIO), "Remote I/O error");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EREMOTEIO), "EREMOTEIO");
|
||
+#endif
|
||
+#ifdef ENOMEDIUM
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ENOMEDIUM), "No medium found");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ENOMEDIUM), "ENOMEDIUM");
|
||
+#endif
|
||
+#ifdef EMEDIUMTYPE
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EMEDIUMTYPE), "Wrong medium type");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EMEDIUMTYPE), "EMEDIUMTYPE");
|
||
+#endif
|
||
+#ifdef ENOKEY
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ENOKEY),
|
||
+ "Required key not available");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ENOKEY), "ENOKEY");
|
||
+#endif
|
||
+#ifdef EKEYEXPIRED
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EKEYEXPIRED), "Key has expired");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EKEYEXPIRED), "EKEYEXPIRED");
|
||
+#endif
|
||
+#ifdef EKEYREVOKED
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EKEYREVOKED),
|
||
+ "Key has been revoked");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EKEYREVOKED), "EKEYREVOKED");
|
||
+#endif
|
||
+#ifdef EKEYREJECTED
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EKEYREJECTED),
|
||
+ "Key was rejected by service");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EKEYREJECTED), "EKEYREJECTED");
|
||
+#endif
|
||
+#ifdef ERFKILL
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ERFKILL),
|
||
+ "Operation not possible due to RF-kill");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ERFKILL), "ERFKILL");
|
||
+#endif
|
||
+#ifdef EHWPOISON
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EHWPOISON),
|
||
+ "Memory page has hardware error");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EHWPOISON), "EHWPOISON");
|
||
+#endif
|
||
+#ifdef EBADRPC
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EBADRPC), "RPC struct is bad");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EBADRPC), "EBADRPC");
|
||
+#endif
|
||
+#ifdef EFTYPE
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EFTYPE),
|
||
+ "Inappropriate file type or format");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EFTYPE), "EFTYPE");
|
||
+#endif
|
||
+#ifdef EPROCUNAVAIL
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EPROCUNAVAIL),
|
||
+ "RPC bad procedure for program");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EPROCUNAVAIL), "EPROCUNAVAIL");
|
||
+#endif
|
||
+#ifdef EAUTH
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EAUTH), "Authentication error");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EAUTH), "EAUTH");
|
||
+#endif
|
||
+#ifdef EDIED
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EDIED), "Translator died");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EDIED), "EDIED");
|
||
+#endif
|
||
+#ifdef ERPCMISMATCH
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ERPCMISMATCH), "RPC version wrong");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ERPCMISMATCH), "ERPCMISMATCH");
|
||
+#endif
|
||
+#ifdef EGREGIOUS
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EGREGIOUS),
|
||
+ "You really blew it this time");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EGREGIOUS), "EGREGIOUS");
|
||
+#endif
|
||
+#ifdef EPROCLIM
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EPROCLIM), "Too many processes");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EPROCLIM), "EPROCLIM");
|
||
+#endif
|
||
+#ifdef EGRATUITOUS
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EGRATUITOUS), "Gratuitous error");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EGRATUITOUS), "EGRATUITOUS");
|
||
+#endif
|
||
+#if defined (ENOTSUP) && ENOTSUP != EOPNOTSUPP
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ENOTSUP), "Not supported");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ENOTSUP), "ENOTSUP");
|
||
+#endif
|
||
+#ifdef EPROGMISMATCH
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EPROGMISMATCH),
|
||
+ "RPC program version wrong");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EPROGMISMATCH), "EPROGMISMATCH");
|
||
+#endif
|
||
+#ifdef EBACKGROUND
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EBACKGROUND),
|
||
+ "Inappropriate operation for background process");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EBACKGROUND), "EBACKGROUND");
|
||
+#endif
|
||
+#ifdef EIEIO
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EIEIO), "Computer bought the farm");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EIEIO), "EIEIO");
|
||
+#endif
|
||
+#if defined (EWOULDBLOCK) && EWOULDBLOCK != EAGAIN
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EWOULDBLOCK),
|
||
+ "Operation would block");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EWOULDBLOCK), "EWOULDBLOCK");
|
||
+#endif
|
||
+#ifdef ENEEDAUTH
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ENEEDAUTH), "Need authenticator");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ENEEDAUTH), "ENEEDAUTH");
|
||
+#endif
|
||
+#ifdef ED
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (ED), "?");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (ED), "ED");
|
||
+#endif
|
||
+#ifdef EPROGUNAVAIL
|
||
+ TEST_COMPARE_STRING (strerrordesc_np (EPROGUNAVAIL),
|
||
+ "RPC program not available");
|
||
+ TEST_COMPARE_STRING (strerrorname_np (EPROGUNAVAIL), "EPROGUNAVAIL");
|
||
+#endif
|
||
|
||
return 0;
|
||
}
|
||
diff --git a/stdio-common/vfscanf-internal.c b/stdio-common/vfscanf-internal.c
|
||
index 95b46dcbeb..3a323547f9 100644
|
||
--- a/stdio-common/vfscanf-internal.c
|
||
+++ b/stdio-common/vfscanf-internal.c
|
||
@@ -277,7 +277,7 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr,
|
||
#endif
|
||
{
|
||
va_list arg;
|
||
- const CHAR_T *f = format;
|
||
+ const UCHAR_T *f = (const UCHAR_T *) format;
|
||
UCHAR_T fc; /* Current character of the format. */
|
||
WINT_T done = 0; /* Assignments done. */
|
||
size_t read_in = 0; /* Chars read in. */
|
||
@@ -415,10 +415,11 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr,
|
||
#endif
|
||
|
||
#ifndef COMPILE_WSCANF
|
||
- if (!isascii ((unsigned char) *f))
|
||
+ if (!isascii (*f))
|
||
{
|
||
/* Non-ASCII, may be a multibyte. */
|
||
- int len = __mbrlen (f, strlen (f), &state);
|
||
+ int len = __mbrlen ((const char *) f, strlen ((const char *) f),
|
||
+ &state);
|
||
if (len > 0)
|
||
{
|
||
do
|
||
@@ -426,7 +427,7 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr,
|
||
c = inchar ();
|
||
if (__glibc_unlikely (c == EOF))
|
||
input_error ();
|
||
- else if (c != (unsigned char) *f++)
|
||
+ else if (c != *f++)
|
||
{
|
||
ungetc_not_eof (c, s);
|
||
conv_error ();
|
||
@@ -484,9 +485,9 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr,
|
||
char_buffer_rewind (&charbuf);
|
||
|
||
/* Check for a positional parameter specification. */
|
||
- if (ISDIGIT ((UCHAR_T) *f))
|
||
+ if (ISDIGIT (*f))
|
||
{
|
||
- argpos = read_int ((const UCHAR_T **) &f);
|
||
+ argpos = read_int (&f);
|
||
if (*f == L_('$'))
|
||
++f;
|
||
else
|
||
@@ -521,8 +522,8 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr,
|
||
|
||
/* Find the maximum field width. */
|
||
width = 0;
|
||
- if (ISDIGIT ((UCHAR_T) *f))
|
||
- width = read_int ((const UCHAR_T **) &f);
|
||
+ if (ISDIGIT (*f))
|
||
+ width = read_int (&f);
|
||
got_width:
|
||
if (width == 0)
|
||
width = -1;
|
||
@@ -2522,12 +2523,11 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr,
|
||
}
|
||
|
||
while ((fc = *f++) != '\0' && fc != ']')
|
||
- if (fc == '-' && *f != '\0' && *f != ']'
|
||
- && (unsigned char) f[-2] <= (unsigned char) *f)
|
||
+ if (fc == '-' && *f != '\0' && *f != ']' && f[-2] <= *f)
|
||
{
|
||
/* Add all characters from the one before the '-'
|
||
up to (but not including) the next format char. */
|
||
- for (fc = (unsigned char) f[-2]; fc < (unsigned char) *f; ++fc)
|
||
+ for (fc = f[-2]; fc < *f; ++fc)
|
||
((char *)charbuf.scratch.data)[fc] = 1;
|
||
}
|
||
else
|
||
diff --git a/stdlib/tst-secure-getenv.c b/stdlib/tst-secure-getenv.c
|
||
index 3cfe9a05c3..d4b1139c5e 100644
|
||
--- a/stdlib/tst-secure-getenv.c
|
||
+++ b/stdlib/tst-secure-getenv.c
|
||
@@ -30,167 +30,12 @@
|
||
#include <sys/wait.h>
|
||
#include <unistd.h>
|
||
|
||
+#include <support/check.h>
|
||
#include <support/support.h>
|
||
+#include <support/capture_subprocess.h>
|
||
#include <support/test-driver.h>
|
||
|
||
static char MAGIC_ARGUMENT[] = "run-actual-test";
|
||
-#define MAGIC_STATUS 19
|
||
-
|
||
-/* Return a GID which is not our current GID, but is present in the
|
||
- supplementary group list. */
|
||
-static gid_t
|
||
-choose_gid (void)
|
||
-{
|
||
- int count = getgroups (0, NULL);
|
||
- if (count < 0)
|
||
- {
|
||
- printf ("getgroups: %m\n");
|
||
- exit (1);
|
||
- }
|
||
- gid_t *groups;
|
||
- groups = xcalloc (count, sizeof (*groups));
|
||
- int ret = getgroups (count, groups);
|
||
- if (ret < 0)
|
||
- {
|
||
- printf ("getgroups: %m\n");
|
||
- exit (1);
|
||
- }
|
||
- gid_t current = getgid ();
|
||
- gid_t not_current = 0;
|
||
- for (int i = 0; i < ret; ++i)
|
||
- {
|
||
- if (groups[i] != current)
|
||
- {
|
||
- not_current = groups[i];
|
||
- break;
|
||
- }
|
||
- }
|
||
- free (groups);
|
||
- return not_current;
|
||
-}
|
||
-
|
||
-
|
||
-/* Copies the executable into a restricted directory, so that we can
|
||
- safely make it SGID with the TARGET group ID. Then runs the
|
||
- executable. */
|
||
-static int
|
||
-run_executable_sgid (gid_t target)
|
||
-{
|
||
- char *dirname = xasprintf ("%s/secure-getenv.%jd",
|
||
- test_dir, (intmax_t) getpid ());
|
||
- char *execname = xasprintf ("%s/bin", dirname);
|
||
- int infd = -1;
|
||
- int outfd = -1;
|
||
- int ret = -1;
|
||
- if (mkdir (dirname, 0700) < 0)
|
||
- {
|
||
- printf ("mkdir: %m\n");
|
||
- goto err;
|
||
- }
|
||
- infd = open ("/proc/self/exe", O_RDONLY);
|
||
- if (infd < 0)
|
||
- {
|
||
- printf ("open (/proc/self/exe): %m\n");
|
||
- goto err;
|
||
- }
|
||
- outfd = open (execname, O_WRONLY | O_CREAT | O_EXCL, 0700);
|
||
- if (outfd < 0)
|
||
- {
|
||
- printf ("open (%s): %m\n", execname);
|
||
- goto err;
|
||
- }
|
||
- char buf[4096];
|
||
- for (;;)
|
||
- {
|
||
- ssize_t rdcount = read (infd, buf, sizeof (buf));
|
||
- if (rdcount < 0)
|
||
- {
|
||
- printf ("read: %m\n");
|
||
- goto err;
|
||
- }
|
||
- if (rdcount == 0)
|
||
- break;
|
||
- char *p = buf;
|
||
- char *end = buf + rdcount;
|
||
- while (p != end)
|
||
- {
|
||
- ssize_t wrcount = write (outfd, buf, end - p);
|
||
- if (wrcount == 0)
|
||
- errno = ENOSPC;
|
||
- if (wrcount <= 0)
|
||
- {
|
||
- printf ("write: %m\n");
|
||
- goto err;
|
||
- }
|
||
- p += wrcount;
|
||
- }
|
||
- }
|
||
- if (fchown (outfd, getuid (), target) < 0)
|
||
- {
|
||
- printf ("fchown (%s): %m\n", execname);
|
||
- goto err;
|
||
- }
|
||
- if (fchmod (outfd, 02750) < 0)
|
||
- {
|
||
- printf ("fchmod (%s): %m\n", execname);
|
||
- goto err;
|
||
- }
|
||
- if (close (outfd) < 0)
|
||
- {
|
||
- printf ("close (outfd): %m\n");
|
||
- goto err;
|
||
- }
|
||
- if (close (infd) < 0)
|
||
- {
|
||
- printf ("close (infd): %m\n");
|
||
- goto err;
|
||
- }
|
||
-
|
||
- int kid = fork ();
|
||
- if (kid < 0)
|
||
- {
|
||
- printf ("fork: %m\n");
|
||
- goto err;
|
||
- }
|
||
- if (kid == 0)
|
||
- {
|
||
- /* Child process. */
|
||
- char *args[] = { execname, MAGIC_ARGUMENT, NULL };
|
||
- execve (execname, args, environ);
|
||
- printf ("execve (%s): %m\n", execname);
|
||
- _exit (1);
|
||
- }
|
||
- int status;
|
||
- if (waitpid (kid, &status, 0) < 0)
|
||
- {
|
||
- printf ("waitpid: %m\n");
|
||
- goto err;
|
||
- }
|
||
- if (!WIFEXITED (status) || WEXITSTATUS (status) != MAGIC_STATUS)
|
||
- {
|
||
- printf ("Unexpected exit status %d from child process\n",
|
||
- status);
|
||
- goto err;
|
||
- }
|
||
- ret = 0;
|
||
-
|
||
-err:
|
||
- if (outfd >= 0)
|
||
- close (outfd);
|
||
- if (infd >= 0)
|
||
- close (infd);
|
||
- if (execname)
|
||
- {
|
||
- unlink (execname);
|
||
- free (execname);
|
||
- }
|
||
- if (dirname)
|
||
- {
|
||
- rmdir (dirname);
|
||
- free (dirname);
|
||
- }
|
||
- return ret;
|
||
-}
|
||
|
||
static int
|
||
do_test (void)
|
||
@@ -212,15 +57,15 @@ do_test (void)
|
||
exit (1);
|
||
}
|
||
|
||
- gid_t target = choose_gid ();
|
||
- if (target == 0)
|
||
- {
|
||
- fprintf (stderr,
|
||
- "Could not find a suitable GID for user %jd, skipping test\n",
|
||
- (intmax_t) getuid ());
|
||
- exit (0);
|
||
- }
|
||
- return run_executable_sgid (target);
|
||
+ int status = support_capture_subprogram_self_sgid (MAGIC_ARGUMENT);
|
||
+
|
||
+ if (WEXITSTATUS (status) == EXIT_UNSUPPORTED)
|
||
+ return EXIT_UNSUPPORTED;
|
||
+
|
||
+ if (!WIFEXITED (status))
|
||
+ FAIL_EXIT1 ("Unexpected exit status %d from child process\n", status);
|
||
+
|
||
+ return 0;
|
||
}
|
||
|
||
static void
|
||
@@ -229,23 +74,15 @@ alternative_main (int argc, char **argv)
|
||
if (argc == 2 && strcmp (argv[1], MAGIC_ARGUMENT) == 0)
|
||
{
|
||
if (getgid () == getegid ())
|
||
- {
|
||
- /* This can happen if the file system is mounted nosuid. */
|
||
- fprintf (stderr, "SGID failed: GID and EGID match (%jd)\n",
|
||
- (intmax_t) getgid ());
|
||
- exit (MAGIC_STATUS);
|
||
- }
|
||
+ /* This can happen if the file system is mounted nosuid. */
|
||
+ FAIL_UNSUPPORTED ("SGID failed: GID and EGID match (%jd)\n",
|
||
+ (intmax_t) getgid ());
|
||
if (getenv ("PATH") == NULL)
|
||
- {
|
||
- printf ("PATH variable not present\n");
|
||
- exit (3);
|
||
- }
|
||
+ FAIL_EXIT (3, "PATH variable not present\n");
|
||
if (secure_getenv ("PATH") != NULL)
|
||
- {
|
||
- printf ("PATH variable not filtered out\n");
|
||
- exit (4);
|
||
- }
|
||
- exit (MAGIC_STATUS);
|
||
+ FAIL_EXIT (4, "PATH variable not filtered out\n");
|
||
+
|
||
+ exit (EXIT_SUCCESS);
|
||
}
|
||
}
|
||
|
||
diff --git a/string/bits/string_fortified.h b/string/bits/string_fortified.h
|
||
index 309d0f39b2..c8d3051af8 100644
|
||
--- a/string/bits/string_fortified.h
|
||
+++ b/string/bits/string_fortified.h
|
||
@@ -22,11 +22,6 @@
|
||
# error "Never use <bits/string_fortified.h> directly; include <string.h> instead."
|
||
#endif
|
||
|
||
-#if !__GNUC_PREREQ (5,0)
|
||
-__warndecl (__warn_memset_zero_len,
|
||
- "memset used with constant zero length parameter; this could be due to transposed parameters");
|
||
-#endif
|
||
-
|
||
__fortify_function void *
|
||
__NTH (memcpy (void *__restrict __dest, const void *__restrict __src,
|
||
size_t __len))
|
||
@@ -58,16 +53,6 @@ __NTH (mempcpy (void *__restrict __dest, const void *__restrict __src,
|
||
__fortify_function void *
|
||
__NTH (memset (void *__dest, int __ch, size_t __len))
|
||
{
|
||
- /* GCC-5.0 and newer implements these checks in the compiler, so we don't
|
||
- need them here. */
|
||
-#if !__GNUC_PREREQ (5,0)
|
||
- if (__builtin_constant_p (__len) && __len == 0
|
||
- && (!__builtin_constant_p (__ch) || __ch != 0))
|
||
- {
|
||
- __warn_memset_zero_len ();
|
||
- return __dest;
|
||
- }
|
||
-#endif
|
||
return __builtin___memset_chk (__dest, __ch, __len, __bos0 (__dest));
|
||
}
|
||
|
||
diff --git a/string/test-memchr.c b/string/test-memchr.c
|
||
index 5dd0aa5470..de70e794d9 100644
|
||
--- a/string/test-memchr.c
|
||
+++ b/string/test-memchr.c
|
||
@@ -65,8 +65,8 @@ do_one_test (impl_t *impl, const CHAR *s, int c, size_t n, CHAR *exp_res)
|
||
CHAR *res = CALL (impl, s, c, n);
|
||
if (res != exp_res)
|
||
{
|
||
- error (0, 0, "Wrong result in function %s %p %p", impl->name,
|
||
- res, exp_res);
|
||
+ error (0, 0, "Wrong result in function %s (%p, %d, %zu) -> %p != %p",
|
||
+ impl->name, s, c, n, res, exp_res);
|
||
ret = 1;
|
||
return;
|
||
}
|
||
@@ -91,7 +91,7 @@ do_test (size_t align, size_t pos, size_t len, size_t n, int seek_char)
|
||
}
|
||
buf[align + len] = 0;
|
||
|
||
- if (pos < len)
|
||
+ if (pos < MIN(n, len))
|
||
{
|
||
buf[align + pos] = seek_char;
|
||
buf[align + len] = -seek_char;
|
||
@@ -107,6 +107,38 @@ do_test (size_t align, size_t pos, size_t len, size_t n, int seek_char)
|
||
do_one_test (impl, (CHAR *) (buf + align), seek_char, n, result);
|
||
}
|
||
|
||
+static void
|
||
+do_overflow_tests (void)
|
||
+{
|
||
+ size_t i, j, len;
|
||
+ const size_t one = 1;
|
||
+ uintptr_t buf_addr = (uintptr_t) buf1;
|
||
+
|
||
+ for (i = 0; i < 750; ++i)
|
||
+ {
|
||
+ do_test (0, i, 751, SIZE_MAX - i, BIG_CHAR);
|
||
+ do_test (0, i, 751, i - buf_addr, BIG_CHAR);
|
||
+ do_test (0, i, 751, -buf_addr - i, BIG_CHAR);
|
||
+ do_test (0, i, 751, SIZE_MAX - buf_addr - i, BIG_CHAR);
|
||
+ do_test (0, i, 751, SIZE_MAX - buf_addr + i, BIG_CHAR);
|
||
+
|
||
+ len = 0;
|
||
+ for (j = 8 * sizeof(size_t) - 1; j ; --j)
|
||
+ {
|
||
+ len |= one << j;
|
||
+ do_test (0, i, 751, len - i, BIG_CHAR);
|
||
+ do_test (0, i, 751, len + i, BIG_CHAR);
|
||
+ do_test (0, i, 751, len - buf_addr - i, BIG_CHAR);
|
||
+ do_test (0, i, 751, len - buf_addr + i, BIG_CHAR);
|
||
+
|
||
+ do_test (0, i, 751, ~len - i, BIG_CHAR);
|
||
+ do_test (0, i, 751, ~len + i, BIG_CHAR);
|
||
+ do_test (0, i, 751, ~len - buf_addr - i, BIG_CHAR);
|
||
+ do_test (0, i, 751, ~len - buf_addr + i, BIG_CHAR);
|
||
+ }
|
||
+ }
|
||
+}
|
||
+
|
||
static void
|
||
do_random_tests (void)
|
||
{
|
||
@@ -221,6 +253,7 @@ test_main (void)
|
||
do_test (page_size / 2 - i, i, i, 1, 0x9B);
|
||
|
||
do_random_tests ();
|
||
+ do_overflow_tests ();
|
||
return ret;
|
||
}
|
||
|
||
diff --git a/string/test-strncat.c b/string/test-strncat.c
|
||
index abbacb95c6..0c7f68d086 100644
|
||
--- a/string/test-strncat.c
|
||
+++ b/string/test-strncat.c
|
||
@@ -134,6 +134,66 @@ do_test (size_t align1, size_t align2, size_t len1, size_t len2,
|
||
}
|
||
}
|
||
|
||
+static void
|
||
+do_overflow_tests (void)
|
||
+{
|
||
+ size_t i, j, len;
|
||
+ const size_t one = 1;
|
||
+ CHAR *s1, *s2;
|
||
+ uintptr_t s1_addr;
|
||
+ s1 = (CHAR *) buf1;
|
||
+ s2 = (CHAR *) buf2;
|
||
+ s1_addr = (uintptr_t)s1;
|
||
+ for (j = 0; j < 200; ++j)
|
||
+ s2[j] = 32 + 23 * j % (BIG_CHAR - 32);
|
||
+ s2[200] = 0;
|
||
+ for (i = 0; i < 750; ++i) {
|
||
+ for (j = 0; j < i; ++j)
|
||
+ s1[j] = 32 + 23 * j % (BIG_CHAR - 32);
|
||
+ s1[i] = '\0';
|
||
+
|
||
+ FOR_EACH_IMPL (impl, 0)
|
||
+ {
|
||
+ s2[200] = '\0';
|
||
+ do_one_test (impl, s2, s1, SIZE_MAX - i);
|
||
+ s2[200] = '\0';
|
||
+ do_one_test (impl, s2, s1, i - s1_addr);
|
||
+ s2[200] = '\0';
|
||
+ do_one_test (impl, s2, s1, -s1_addr - i);
|
||
+ s2[200] = '\0';
|
||
+ do_one_test (impl, s2, s1, SIZE_MAX - s1_addr - i);
|
||
+ s2[200] = '\0';
|
||
+ do_one_test (impl, s2, s1, SIZE_MAX - s1_addr + i);
|
||
+ }
|
||
+
|
||
+ len = 0;
|
||
+ for (j = 8 * sizeof(size_t) - 1; j ; --j)
|
||
+ {
|
||
+ len |= one << j;
|
||
+ FOR_EACH_IMPL (impl, 0)
|
||
+ {
|
||
+ s2[200] = '\0';
|
||
+ do_one_test (impl, s2, s1, len - i);
|
||
+ s2[200] = '\0';
|
||
+ do_one_test (impl, s2, s1, len + i);
|
||
+ s2[200] = '\0';
|
||
+ do_one_test (impl, s2, s1, len - s1_addr - i);
|
||
+ s2[200] = '\0';
|
||
+ do_one_test (impl, s2, s1, len - s1_addr + i);
|
||
+
|
||
+ s2[200] = '\0';
|
||
+ do_one_test (impl, s2, s1, ~len - i);
|
||
+ s2[200] = '\0';
|
||
+ do_one_test (impl, s2, s1, ~len + i);
|
||
+ s2[200] = '\0';
|
||
+ do_one_test (impl, s2, s1, ~len - s1_addr - i);
|
||
+ s2[200] = '\0';
|
||
+ do_one_test (impl, s2, s1, ~len - s1_addr + i);
|
||
+ }
|
||
+ }
|
||
+ }
|
||
+}
|
||
+
|
||
static void
|
||
do_random_tests (void)
|
||
{
|
||
@@ -316,6 +376,7 @@ test_main (void)
|
||
}
|
||
|
||
do_random_tests ();
|
||
+ do_overflow_tests ();
|
||
return ret;
|
||
}
|
||
|
||
diff --git a/string/test-strnlen.c b/string/test-strnlen.c
|
||
index 80ac9e8602..a1a6746cc9 100644
|
||
--- a/string/test-strnlen.c
|
||
+++ b/string/test-strnlen.c
|
||
@@ -27,6 +27,7 @@
|
||
|
||
#ifndef WIDE
|
||
# define STRNLEN strnlen
|
||
+# define MEMSET memset
|
||
# define CHAR char
|
||
# define BIG_CHAR CHAR_MAX
|
||
# define MIDDLE_CHAR 127
|
||
@@ -34,6 +35,7 @@
|
||
#else
|
||
# include <wchar.h>
|
||
# define STRNLEN wcsnlen
|
||
+# define MEMSET wmemset
|
||
# define CHAR wchar_t
|
||
# define BIG_CHAR WCHAR_MAX
|
||
# define MIDDLE_CHAR 1121
|
||
@@ -87,6 +89,38 @@ do_test (size_t align, size_t len, size_t maxlen, int max_char)
|
||
do_one_test (impl, (CHAR *) (buf + align), maxlen, MIN (len, maxlen));
|
||
}
|
||
|
||
+static void
|
||
+do_overflow_tests (void)
|
||
+{
|
||
+ size_t i, j, len;
|
||
+ const size_t one = 1;
|
||
+ uintptr_t buf_addr = (uintptr_t) buf1;
|
||
+
|
||
+ for (i = 0; i < 750; ++i)
|
||
+ {
|
||
+ do_test (0, i, SIZE_MAX - i, BIG_CHAR);
|
||
+ do_test (0, i, i - buf_addr, BIG_CHAR);
|
||
+ do_test (0, i, -buf_addr - i, BIG_CHAR);
|
||
+ do_test (0, i, SIZE_MAX - buf_addr - i, BIG_CHAR);
|
||
+ do_test (0, i, SIZE_MAX - buf_addr + i, BIG_CHAR);
|
||
+
|
||
+ len = 0;
|
||
+ for (j = 8 * sizeof(size_t) - 1; j ; --j)
|
||
+ {
|
||
+ len |= one << j;
|
||
+ do_test (0, i, len - i, BIG_CHAR);
|
||
+ do_test (0, i, len + i, BIG_CHAR);
|
||
+ do_test (0, i, len - buf_addr - i, BIG_CHAR);
|
||
+ do_test (0, i, len - buf_addr + i, BIG_CHAR);
|
||
+
|
||
+ do_test (0, i, ~len - i, BIG_CHAR);
|
||
+ do_test (0, i, ~len + i, BIG_CHAR);
|
||
+ do_test (0, i, ~len - buf_addr - i, BIG_CHAR);
|
||
+ do_test (0, i, ~len - buf_addr + i, BIG_CHAR);
|
||
+ }
|
||
+ }
|
||
+}
|
||
+
|
||
static void
|
||
do_random_tests (void)
|
||
{
|
||
@@ -153,7 +187,7 @@ do_page_tests (void)
|
||
size_t last_offset = (page_size / sizeof (CHAR)) - 1;
|
||
|
||
CHAR *s = (CHAR *) buf2;
|
||
- memset (s, 65, (last_offset - 1));
|
||
+ MEMSET (s, 65, (last_offset - 1));
|
||
s[last_offset] = 0;
|
||
|
||
/* Place short strings ending at page boundary. */
|
||
@@ -196,6 +230,35 @@ do_page_tests (void)
|
||
}
|
||
}
|
||
|
||
+/* Tests meant to unveil fail on implementations that access bytes
|
||
+ beyond the maxium length. */
|
||
+
|
||
+static void
|
||
+do_page_2_tests (void)
|
||
+{
|
||
+ size_t i, exp_len, offset;
|
||
+ size_t last_offset = page_size / sizeof (CHAR);
|
||
+
|
||
+ CHAR *s = (CHAR *) buf2;
|
||
+ MEMSET (s, 65, last_offset);
|
||
+
|
||
+ /* Place short strings ending at page boundary without the null
|
||
+ byte. */
|
||
+ offset = last_offset;
|
||
+ for (i = 0; i < 128; i++)
|
||
+ {
|
||
+ /* Decrease offset to stress several sizes and alignments. */
|
||
+ offset--;
|
||
+ exp_len = last_offset - offset;
|
||
+ FOR_EACH_IMPL (impl, 0)
|
||
+ {
|
||
+ /* If an implementation goes beyond EXP_LEN, it will trigger
|
||
+ the segfault. */
|
||
+ do_one_test (impl, (CHAR *) (s + offset), exp_len, exp_len);
|
||
+ }
|
||
+ }
|
||
+}
|
||
+
|
||
int
|
||
test_main (void)
|
||
{
|
||
@@ -242,6 +305,8 @@ test_main (void)
|
||
|
||
do_random_tests ();
|
||
do_page_tests ();
|
||
+ do_page_2_tests ();
|
||
+ do_overflow_tests ();
|
||
return ret;
|
||
}
|
||
|
||
diff --git a/support/Makefile b/support/Makefile
|
||
index 93faafddf9..3d3aff5ff9 100644
|
||
--- a/support/Makefile
|
||
+++ b/support/Makefile
|
||
@@ -35,6 +35,8 @@ libsupport-routines = \
|
||
ignore_stderr \
|
||
next_to_fault \
|
||
oom_error \
|
||
+ resolv_response_context_duplicate \
|
||
+ resolv_response_context_free \
|
||
resolv_test \
|
||
set_fortify_handler \
|
||
support-xfstat \
|
||
@@ -133,6 +135,7 @@ libsupport-routines = \
|
||
xpthread_join \
|
||
xpthread_key_create \
|
||
xpthread_key_delete \
|
||
+ xpthread_kill \
|
||
xpthread_mutex_consistent \
|
||
xpthread_mutex_destroy \
|
||
xpthread_mutex_init \
|
||
diff --git a/support/capture_subprocess.h b/support/capture_subprocess.h
|
||
index 9808750f80..421f657678 100644
|
||
--- a/support/capture_subprocess.h
|
||
+++ b/support/capture_subprocess.h
|
||
@@ -41,6 +41,12 @@ struct support_capture_subprocess support_capture_subprocess
|
||
struct support_capture_subprocess support_capture_subprogram
|
||
(const char *file, char *const argv[]);
|
||
|
||
+/* Copy the running program into a setgid binary and run it with CHILD_ID
|
||
+ argument. If execution is successful, return the exit status of the child
|
||
+ program, otherwise return a non-zero failure exit code. */
|
||
+int support_capture_subprogram_self_sgid
|
||
+ (char *child_id);
|
||
+
|
||
/* Deallocate the subprocess data captured by
|
||
support_capture_subprocess. */
|
||
void support_capture_subprocess_free (struct support_capture_subprocess *);
|
||
diff --git a/support/resolv_response_context_duplicate.c b/support/resolv_response_context_duplicate.c
|
||
new file mode 100644
|
||
index 0000000000..f9c5c3462a
|
||
--- /dev/null
|
||
+++ b/support/resolv_response_context_duplicate.c
|
||
@@ -0,0 +1,37 @@
|
||
+/* Duplicate a response context used in DNS resolver tests.
|
||
+ Copyright (C) 2020 Free Software Foundation, Inc.
|
||
+ This file is part of the GNU C Library.
|
||
+
|
||
+ The GNU C Library is free software; you can redistribute it and/or
|
||
+ modify it under the terms of the GNU Lesser General Public
|
||
+ License as published by the Free Software Foundation; either
|
||
+ version 2.1 of the License, or (at your option) any later version.
|
||
+
|
||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
+ Lesser General Public License for more details.
|
||
+
|
||
+ You should have received a copy of the GNU Lesser General Public
|
||
+ License along with the GNU C Library; if not, see
|
||
+ <https://www.gnu.org/licenses/>. */
|
||
+
|
||
+#include <string.h>
|
||
+#include <support/resolv_test.h>
|
||
+#include <support/support.h>
|
||
+
|
||
+struct resolv_response_context *
|
||
+resolv_response_context_duplicate (const struct resolv_response_context *ctx)
|
||
+{
|
||
+ struct resolv_response_context *result = xmalloc (sizeof (*result));
|
||
+ memcpy (result, ctx, sizeof (*result));
|
||
+ if (result->client_address != NULL)
|
||
+ {
|
||
+ result->client_address = xmalloc (result->client_address_length);
|
||
+ memcpy (result->client_address, ctx->client_address,
|
||
+ result->client_address_length);
|
||
+ }
|
||
+ result->query_buffer = xmalloc (result->query_length);
|
||
+ memcpy (result->query_buffer, ctx->query_buffer, result->query_length);
|
||
+ return result;
|
||
+}
|
||
diff --git a/support/resolv_response_context_free.c b/support/resolv_response_context_free.c
|
||
new file mode 100644
|
||
index 0000000000..b88c05ffd4
|
||
--- /dev/null
|
||
+++ b/support/resolv_response_context_free.c
|
||
@@ -0,0 +1,28 @@
|
||
+/* Free a response context used in DNS resolver tests.
|
||
+ Copyright (C) 2020 Free Software Foundation, Inc.
|
||
+ This file is part of the GNU C Library.
|
||
+
|
||
+ The GNU C Library is free software; you can redistribute it and/or
|
||
+ modify it under the terms of the GNU Lesser General Public
|
||
+ License as published by the Free Software Foundation; either
|
||
+ version 2.1 of the License, or (at your option) any later version.
|
||
+
|
||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
+ Lesser General Public License for more details.
|
||
+
|
||
+ You should have received a copy of the GNU Lesser General Public
|
||
+ License along with the GNU C Library; if not, see
|
||
+ <https://www.gnu.org/licenses/>. */
|
||
+
|
||
+#include <stdlib.h>
|
||
+#include <support/resolv_test.h>
|
||
+
|
||
+void
|
||
+resolv_response_context_free (struct resolv_response_context *ctx)
|
||
+{
|
||
+ free (ctx->query_buffer);
|
||
+ free (ctx->client_address);
|
||
+ free (ctx);
|
||
+}
|
||
diff --git a/support/resolv_test.c b/support/resolv_test.c
|
||
index 53b7fc41ab..9878a040a3 100644
|
||
--- a/support/resolv_test.c
|
||
+++ b/support/resolv_test.c
|
||
@@ -181,7 +181,9 @@ resolv_response_init (struct resolv_response_builder *b,
|
||
b->buffer[2] |= b->query_buffer[2] & 0x01; /* Copy the RD bit. */
|
||
if (flags.tc)
|
||
b->buffer[2] |= 0x02;
|
||
- b->buffer[3] = 0x80 | flags.rcode; /* Always set RA. */
|
||
+ b->buffer[3] = flags.rcode;
|
||
+ if (!flags.clear_ra)
|
||
+ b->buffer[3] |= 0x80;
|
||
if (flags.ad)
|
||
b->buffer[3] |= 0x20;
|
||
|
||
@@ -434,9 +436,9 @@ resolv_response_buffer (const struct resolv_response_builder *b)
|
||
return result;
|
||
}
|
||
|
||
-static struct resolv_response_builder *
|
||
-response_builder_allocate
|
||
- (const unsigned char *query_buffer, size_t query_length)
|
||
+struct resolv_response_builder *
|
||
+resolv_response_builder_allocate (const unsigned char *query_buffer,
|
||
+ size_t query_length)
|
||
{
|
||
struct resolv_response_builder *b = xmalloc (sizeof (*b));
|
||
memset (b, 0, offsetof (struct resolv_response_builder, buffer));
|
||
@@ -445,8 +447,8 @@ response_builder_allocate
|
||
return b;
|
||
}
|
||
|
||
-static void
|
||
-response_builder_free (struct resolv_response_builder *b)
|
||
+void
|
||
+resolv_response_builder_free (struct resolv_response_builder *b)
|
||
{
|
||
tdestroy (b->compression_offsets, free);
|
||
free (b);
|
||
@@ -661,13 +663,17 @@ server_thread_udp_process_one (struct resolv_test *obj, int server_index)
|
||
|
||
struct resolv_response_context ctx =
|
||
{
|
||
+ .test = obj,
|
||
+ .client_address = &peer,
|
||
+ .client_address_length = peerlen,
|
||
.query_buffer = query,
|
||
.query_length = length,
|
||
.server_index = server_index,
|
||
.tcp = false,
|
||
.edns = qinfo.edns,
|
||
};
|
||
- struct resolv_response_builder *b = response_builder_allocate (query, length);
|
||
+ struct resolv_response_builder *b
|
||
+ = resolv_response_builder_allocate (query, length);
|
||
obj->config.response_callback
|
||
(&ctx, b, qinfo.qname, qinfo.qclass, qinfo.qtype);
|
||
|
||
@@ -684,7 +690,7 @@ server_thread_udp_process_one (struct resolv_test *obj, int server_index)
|
||
if (b->offset >= 12)
|
||
printf ("info: UDP server %d: sending response:"
|
||
" %zu bytes, RCODE %d (for %s/%u/%u)\n",
|
||
- server_index, b->offset, b->buffer[3] & 0x0f,
|
||
+ ctx.server_index, b->offset, b->buffer[3] & 0x0f,
|
||
qinfo.qname, qinfo.qclass, qinfo.qtype);
|
||
else
|
||
printf ("info: UDP server %d: sending response: %zu bytes"
|
||
@@ -694,23 +700,31 @@ server_thread_udp_process_one (struct resolv_test *obj, int server_index)
|
||
if (b->truncate_bytes > 0)
|
||
printf ("info: truncated by %u bytes\n", b->truncate_bytes);
|
||
}
|
||
- size_t to_send = b->offset;
|
||
- if (to_send < b->truncate_bytes)
|
||
- to_send = 0;
|
||
- else
|
||
- to_send -= b->truncate_bytes;
|
||
-
|
||
- /* Ignore most errors here because the other end may have closed
|
||
- the socket. */
|
||
- if (sendto (obj->servers[server_index].socket_udp,
|
||
- b->buffer, to_send, 0,
|
||
- (struct sockaddr *) &peer, peerlen) < 0)
|
||
- TEST_VERIFY_EXIT (errno != EBADF);
|
||
+ resolv_response_send_udp (&ctx, b);
|
||
}
|
||
- response_builder_free (b);
|
||
+ resolv_response_builder_free (b);
|
||
return true;
|
||
}
|
||
|
||
+void
|
||
+resolv_response_send_udp (const struct resolv_response_context *ctx,
|
||
+ struct resolv_response_builder *b)
|
||
+{
|
||
+ TEST_VERIFY_EXIT (!ctx->tcp);
|
||
+ size_t to_send = b->offset;
|
||
+ if (to_send < b->truncate_bytes)
|
||
+ to_send = 0;
|
||
+ else
|
||
+ to_send -= b->truncate_bytes;
|
||
+
|
||
+ /* Ignore most errors here because the other end may have closed
|
||
+ the socket. */
|
||
+ if (sendto (ctx->test->servers[ctx->server_index].socket_udp,
|
||
+ b->buffer, to_send, 0,
|
||
+ ctx->client_address, ctx->client_address_length) < 0)
|
||
+ TEST_VERIFY_EXIT (errno != EBADF);
|
||
+}
|
||
+
|
||
/* UDP thread_callback function. Variant for one thread per
|
||
server. */
|
||
static void
|
||
@@ -897,14 +911,15 @@ server_thread_tcp_client (void *arg)
|
||
|
||
struct resolv_response_context ctx =
|
||
{
|
||
+ .test = closure->obj,
|
||
.query_buffer = query_buffer,
|
||
.query_length = query_length,
|
||
.server_index = closure->server_index,
|
||
.tcp = true,
|
||
.edns = qinfo.edns,
|
||
};
|
||
- struct resolv_response_builder *b = response_builder_allocate
|
||
- (query_buffer, query_length);
|
||
+ struct resolv_response_builder *b
|
||
+ = resolv_response_builder_allocate (query_buffer, query_length);
|
||
closure->obj->config.response_callback
|
||
(&ctx, b, qinfo.qname, qinfo.qclass, qinfo.qtype);
|
||
|
||
@@ -936,7 +951,7 @@ server_thread_tcp_client (void *arg)
|
||
writev_fully (closure->client_socket, buffers, 2);
|
||
}
|
||
bool close_flag = b->close;
|
||
- response_builder_free (b);
|
||
+ resolv_response_builder_free (b);
|
||
free (query_buffer);
|
||
if (close_flag)
|
||
break;
|
||
diff --git a/support/resolv_test.h b/support/resolv_test.h
|
||
index 67819469a0..31a5c1c3e7 100644
|
||
--- a/support/resolv_test.h
|
||
+++ b/support/resolv_test.h
|
||
@@ -35,25 +35,36 @@ struct resolv_edns_info
|
||
uint16_t payload_size;
|
||
};
|
||
|
||
+/* This opaque struct collects information about the resolver testing
|
||
+ currently in progress. */
|
||
+struct resolv_test;
|
||
+
|
||
/* This struct provides context information when the response callback
|
||
specified in struct resolv_redirect_config is invoked. */
|
||
struct resolv_response_context
|
||
{
|
||
- const unsigned char *query_buffer;
|
||
+ struct resolv_test *test;
|
||
+ void *client_address;
|
||
+ size_t client_address_length;
|
||
+ unsigned char *query_buffer;
|
||
size_t query_length;
|
||
int server_index;
|
||
bool tcp;
|
||
struct resolv_edns_info edns;
|
||
};
|
||
|
||
+/* Produces a deep copy of the context. */
|
||
+struct resolv_response_context *
|
||
+ resolv_response_context_duplicate (const struct resolv_response_context *);
|
||
+
|
||
+/* Frees the copy. For the context passed to the response function,
|
||
+ this happens implicitly. */
|
||
+void resolv_response_context_free (struct resolv_response_context *);
|
||
+
|
||
/* This opaque struct is used to construct responses from within the
|
||
response callback function. */
|
||
struct resolv_response_builder;
|
||
|
||
-/* This opaque struct collects information about the resolver testing
|
||
- currently in progress. */
|
||
-struct resolv_test;
|
||
-
|
||
enum
|
||
{
|
||
/* Maximum number of test servers supported by the framework. */
|
||
@@ -137,6 +148,10 @@ struct resolv_response_flags
|
||
/* If true, the AD (authenticated data) flag will be set. */
|
||
bool ad;
|
||
|
||
+ /* If true, do not set the RA (recursion available) flag in the
|
||
+ response. */
|
||
+ bool clear_ra;
|
||
+
|
||
/* Initial section count values. Can be used to artificially
|
||
increase the counts, for malformed packet testing.*/
|
||
unsigned short qdcount;
|
||
@@ -188,6 +203,22 @@ void resolv_response_close (struct resolv_response_builder *);
|
||
/* The size of the response packet built so far. */
|
||
size_t resolv_response_length (const struct resolv_response_builder *);
|
||
|
||
+/* Allocates a response builder tied to a specific query packet,
|
||
+ starting at QUERY_BUFFER, containing QUERY_LENGTH bytes. */
|
||
+struct resolv_response_builder *
|
||
+ resolv_response_builder_allocate (const unsigned char *query_buffer,
|
||
+ size_t query_length);
|
||
+
|
||
+/* Deallocates a response buffer. */
|
||
+void resolv_response_builder_free (struct resolv_response_builder *);
|
||
+
|
||
+/* Sends a UDP response using a specific context. This can be used to
|
||
+ reorder or duplicate responses, along with
|
||
+ resolv_response_context_duplicate and
|
||
+ response_builder_allocate. */
|
||
+void resolv_response_send_udp (const struct resolv_response_context *,
|
||
+ struct resolv_response_builder *);
|
||
+
|
||
__END_DECLS
|
||
|
||
#endif /* SUPPORT_RESOLV_TEST_H */
|
||
diff --git a/support/subprocess.h b/support/subprocess.h
|
||
index 8b442fd5c0..34ffd02e8e 100644
|
||
--- a/support/subprocess.h
|
||
+++ b/support/subprocess.h
|
||
@@ -38,6 +38,11 @@ struct support_subprocess support_subprocess
|
||
struct support_subprocess support_subprogram
|
||
(const char *file, char *const argv[]);
|
||
|
||
+/* Invoke program FILE with ARGV arguments by using posix_spawn and wait for it
|
||
+ to complete. Return program exit status. */
|
||
+int support_subprogram_wait
|
||
+ (const char *file, char *const argv[]);
|
||
+
|
||
/* Wait for the subprocess indicated by PROC::PID. Return the status
|
||
indicate by waitpid call. */
|
||
int support_process_wait (struct support_subprocess *proc);
|
||
diff --git a/support/support_capture_subprocess.c b/support/support_capture_subprocess.c
|
||
index eeed676e3d..28a37df67f 100644
|
||
--- a/support/support_capture_subprocess.c
|
||
+++ b/support/support_capture_subprocess.c
|
||
@@ -20,11 +20,14 @@
|
||
#include <support/capture_subprocess.h>
|
||
|
||
#include <errno.h>
|
||
+#include <fcntl.h>
|
||
#include <stdlib.h>
|
||
#include <support/check.h>
|
||
#include <support/xunistd.h>
|
||
#include <support/xsocket.h>
|
||
#include <support/xspawn.h>
|
||
+#include <support/support.h>
|
||
+#include <support/test-driver.h>
|
||
|
||
static void
|
||
transfer (const char *what, struct pollfd *pfd, struct xmemstream *stream)
|
||
@@ -36,7 +39,7 @@ transfer (const char *what, struct pollfd *pfd, struct xmemstream *stream)
|
||
if (ret < 0)
|
||
{
|
||
support_record_failure ();
|
||
- printf ("error: reading from subprocess %s: %m", what);
|
||
+ printf ("error: reading from subprocess %s: %m\n", what);
|
||
pfd->events = 0;
|
||
pfd->revents = 0;
|
||
}
|
||
@@ -102,6 +105,129 @@ support_capture_subprogram (const char *file, char *const argv[])
|
||
return result;
|
||
}
|
||
|
||
+/* Copies the executable into a restricted directory, so that we can
|
||
+ safely make it SGID with the TARGET group ID. Then runs the
|
||
+ executable. */
|
||
+static int
|
||
+copy_and_spawn_sgid (char *child_id, gid_t gid)
|
||
+{
|
||
+ char *dirname = xasprintf ("%s/tst-tunables-setuid.%jd",
|
||
+ test_dir, (intmax_t) getpid ());
|
||
+ char *execname = xasprintf ("%s/bin", dirname);
|
||
+ int infd = -1;
|
||
+ int outfd = -1;
|
||
+ int ret = 1, status = 1;
|
||
+
|
||
+ TEST_VERIFY (mkdir (dirname, 0700) == 0);
|
||
+ if (support_record_failure_is_failed ())
|
||
+ goto err;
|
||
+
|
||
+ infd = open ("/proc/self/exe", O_RDONLY);
|
||
+ if (infd < 0)
|
||
+ FAIL_UNSUPPORTED ("unsupported: Cannot read binary from procfs\n");
|
||
+
|
||
+ outfd = open (execname, O_WRONLY | O_CREAT | O_EXCL, 0700);
|
||
+ TEST_VERIFY (outfd >= 0);
|
||
+ if (support_record_failure_is_failed ())
|
||
+ goto err;
|
||
+
|
||
+ char buf[4096];
|
||
+ for (;;)
|
||
+ {
|
||
+ ssize_t rdcount = read (infd, buf, sizeof (buf));
|
||
+ TEST_VERIFY (rdcount >= 0);
|
||
+ if (support_record_failure_is_failed ())
|
||
+ goto err;
|
||
+ if (rdcount == 0)
|
||
+ break;
|
||
+ char *p = buf;
|
||
+ char *end = buf + rdcount;
|
||
+ while (p != end)
|
||
+ {
|
||
+ ssize_t wrcount = write (outfd, buf, end - p);
|
||
+ if (wrcount == 0)
|
||
+ errno = ENOSPC;
|
||
+ TEST_VERIFY (wrcount > 0);
|
||
+ if (support_record_failure_is_failed ())
|
||
+ goto err;
|
||
+ p += wrcount;
|
||
+ }
|
||
+ }
|
||
+ TEST_VERIFY (fchown (outfd, getuid (), gid) == 0);
|
||
+ if (support_record_failure_is_failed ())
|
||
+ goto err;
|
||
+ TEST_VERIFY (fchmod (outfd, 02750) == 0);
|
||
+ if (support_record_failure_is_failed ())
|
||
+ goto err;
|
||
+ TEST_VERIFY (close (outfd) == 0);
|
||
+ if (support_record_failure_is_failed ())
|
||
+ goto err;
|
||
+ TEST_VERIFY (close (infd) == 0);
|
||
+ if (support_record_failure_is_failed ())
|
||
+ goto err;
|
||
+
|
||
+ /* We have the binary, now spawn the subprocess. Avoid using
|
||
+ support_subprogram because we only want the program exit status, not the
|
||
+ contents. */
|
||
+ ret = 0;
|
||
+
|
||
+ char * const args[] = {execname, child_id, NULL};
|
||
+
|
||
+ status = support_subprogram_wait (args[0], args);
|
||
+
|
||
+err:
|
||
+ if (outfd >= 0)
|
||
+ close (outfd);
|
||
+ if (infd >= 0)
|
||
+ close (infd);
|
||
+ if (execname != NULL)
|
||
+ {
|
||
+ unlink (execname);
|
||
+ free (execname);
|
||
+ }
|
||
+ if (dirname != NULL)
|
||
+ {
|
||
+ rmdir (dirname);
|
||
+ free (dirname);
|
||
+ }
|
||
+
|
||
+ if (ret != 0)
|
||
+ FAIL_EXIT1("Failed to make sgid executable for test\n");
|
||
+
|
||
+ return status;
|
||
+}
|
||
+
|
||
+int
|
||
+support_capture_subprogram_self_sgid (char *child_id)
|
||
+{
|
||
+ gid_t target = 0;
|
||
+ const int count = 64;
|
||
+ gid_t groups[count];
|
||
+
|
||
+ /* Get a GID which is not our current GID, but is present in the
|
||
+ supplementary group list. */
|
||
+ int ret = getgroups (count, groups);
|
||
+ if (ret < 0)
|
||
+ FAIL_UNSUPPORTED("Could not get group list for user %jd\n",
|
||
+ (intmax_t) getuid ());
|
||
+
|
||
+ gid_t current = getgid ();
|
||
+ for (int i = 0; i < ret; ++i)
|
||
+ {
|
||
+ if (groups[i] != current)
|
||
+ {
|
||
+ target = groups[i];
|
||
+ break;
|
||
+ }
|
||
+ }
|
||
+
|
||
+ if (target == 0)
|
||
+ FAIL_UNSUPPORTED("Could not find a suitable GID for user %jd\n",
|
||
+ (intmax_t) getuid ());
|
||
+
|
||
+ return copy_and_spawn_sgid (child_id, target);
|
||
+}
|
||
+
|
||
void
|
||
support_capture_subprocess_free (struct support_capture_subprocess *p)
|
||
{
|
||
diff --git a/support/support_subprocess.c b/support/support_subprocess.c
|
||
index 36e3a77af2..4a25828111 100644
|
||
--- a/support/support_subprocess.c
|
||
+++ b/support/support_subprocess.c
|
||
@@ -27,7 +27,7 @@
|
||
#include <support/subprocess.h>
|
||
|
||
static struct support_subprocess
|
||
-support_suprocess_init (void)
|
||
+support_subprocess_init (void)
|
||
{
|
||
struct support_subprocess result;
|
||
|
||
@@ -48,7 +48,7 @@ support_suprocess_init (void)
|
||
struct support_subprocess
|
||
support_subprocess (void (*callback) (void *), void *closure)
|
||
{
|
||
- struct support_subprocess result = support_suprocess_init ();
|
||
+ struct support_subprocess result = support_subprocess_init ();
|
||
|
||
result.pid = xfork ();
|
||
if (result.pid == 0)
|
||
@@ -71,7 +71,7 @@ support_subprocess (void (*callback) (void *), void *closure)
|
||
struct support_subprocess
|
||
support_subprogram (const char *file, char *const argv[])
|
||
{
|
||
- struct support_subprocess result = support_suprocess_init ();
|
||
+ struct support_subprocess result = support_subprocess_init ();
|
||
|
||
posix_spawn_file_actions_t fa;
|
||
/* posix_spawn_file_actions_init does not fail. */
|
||
@@ -84,7 +84,7 @@ support_subprogram (const char *file, char *const argv[])
|
||
xposix_spawn_file_actions_addclose (&fa, result.stdout_pipe[1]);
|
||
xposix_spawn_file_actions_addclose (&fa, result.stderr_pipe[1]);
|
||
|
||
- result.pid = xposix_spawn (file, &fa, NULL, argv, NULL);
|
||
+ result.pid = xposix_spawn (file, &fa, NULL, argv, environ);
|
||
|
||
xclose (result.stdout_pipe[1]);
|
||
xclose (result.stderr_pipe[1]);
|
||
@@ -92,6 +92,19 @@ support_subprogram (const char *file, char *const argv[])
|
||
return result;
|
||
}
|
||
|
||
+int
|
||
+support_subprogram_wait (const char *file, char *const argv[])
|
||
+{
|
||
+ posix_spawn_file_actions_t fa;
|
||
+
|
||
+ posix_spawn_file_actions_init (&fa);
|
||
+ struct support_subprocess res = support_subprocess_init ();
|
||
+
|
||
+ res.pid = xposix_spawn (file, &fa, NULL, argv, environ);
|
||
+
|
||
+ return support_process_wait (&res);
|
||
+}
|
||
+
|
||
int
|
||
support_process_wait (struct support_subprocess *proc)
|
||
{
|
||
diff --git a/support/xpthread_kill.c b/support/xpthread_kill.c
|
||
new file mode 100644
|
||
index 0000000000..111a75d85e
|
||
--- /dev/null
|
||
+++ b/support/xpthread_kill.c
|
||
@@ -0,0 +1,26 @@
|
||
+/* pthread_kill with error checking.
|
||
+ Copyright (C) 2021 Free Software Foundation, Inc.
|
||
+ This file is part of the GNU C Library.
|
||
+
|
||
+ The GNU C Library is free software; you can redistribute it and/or
|
||
+ modify it under the terms of the GNU Lesser General Public
|
||
+ License as published by the Free Software Foundation; either
|
||
+ version 2.1 of the License, or (at your option) any later version.
|
||
+
|
||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
+ Lesser General Public License for more details.
|
||
+
|
||
+ You should have received a copy of the GNU Lesser General Public
|
||
+ License along with the GNU C Library; if not, see
|
||
+ <https://www.gnu.org/licenses/>. */
|
||
+
|
||
+#include <signal.h>
|
||
+#include <support/xthread.h>
|
||
+
|
||
+void
|
||
+xpthread_kill (pthread_t thr, int signo)
|
||
+{
|
||
+ xpthread_check_return ("pthread_kill", pthread_kill (thr, signo));
|
||
+}
|
||
diff --git a/support/xthread.h b/support/xthread.h
|
||
index 05f8d4a7d9..cb1fc30da0 100644
|
||
--- a/support/xthread.h
|
||
+++ b/support/xthread.h
|
||
@@ -75,6 +75,8 @@ void xpthread_attr_setstacksize (pthread_attr_t *attr,
|
||
void xpthread_attr_setguardsize (pthread_attr_t *attr,
|
||
size_t guardsize);
|
||
|
||
+void xpthread_kill (pthread_t thr, int signo);
|
||
+
|
||
/* Set the stack size in ATTR to a small value, but still large enough
|
||
to cover most internal glibc stack usage. */
|
||
void support_set_small_thread_stack_size (pthread_attr_t *attr);
|
||
diff --git a/sysdeps/aarch64/dl-bti.c b/sysdeps/aarch64/dl-bti.c
|
||
index 196e462520..cf7624aaa2 100644
|
||
--- a/sysdeps/aarch64/dl-bti.c
|
||
+++ b/sysdeps/aarch64/dl-bti.c
|
||
@@ -19,43 +19,76 @@
|
||
#include <errno.h>
|
||
#include <libintl.h>
|
||
#include <ldsodefs.h>
|
||
+#include <sys/mman.h>
|
||
|
||
-static int
|
||
-enable_bti (struct link_map *map, const char *program)
|
||
+/* See elf/dl-load.h. */
|
||
+#ifndef MAP_COPY
|
||
+# define MAP_COPY (MAP_PRIVATE | MAP_DENYWRITE)
|
||
+#endif
|
||
+
|
||
+/* Enable BTI protection for MAP. */
|
||
+
|
||
+void
|
||
+_dl_bti_protect (struct link_map *map, int fd)
|
||
{
|
||
+ const size_t pagesz = GLRO(dl_pagesize);
|
||
const ElfW(Phdr) *phdr;
|
||
- unsigned prot;
|
||
|
||
for (phdr = map->l_phdr; phdr < &map->l_phdr[map->l_phnum]; ++phdr)
|
||
if (phdr->p_type == PT_LOAD && (phdr->p_flags & PF_X))
|
||
{
|
||
- void *start = (void *) (phdr->p_vaddr + map->l_addr);
|
||
- size_t len = phdr->p_memsz;
|
||
+ size_t vstart = ALIGN_DOWN (phdr->p_vaddr, pagesz);
|
||
+ size_t vend = ALIGN_UP (phdr->p_vaddr + phdr->p_filesz, pagesz);
|
||
+ off_t off = ALIGN_DOWN (phdr->p_offset, pagesz);
|
||
+ void *start = (void *) (vstart + map->l_addr);
|
||
+ size_t len = vend - vstart;
|
||
|
||
- prot = PROT_EXEC | PROT_BTI;
|
||
+ unsigned prot = PROT_EXEC | PROT_BTI;
|
||
if (phdr->p_flags & PF_R)
|
||
prot |= PROT_READ;
|
||
if (phdr->p_flags & PF_W)
|
||
prot |= PROT_WRITE;
|
||
|
||
- if (__mprotect (start, len, prot) < 0)
|
||
- {
|
||
- if (program)
|
||
- _dl_fatal_printf ("%s: mprotect failed to turn on BTI\n",
|
||
- map->l_name);
|
||
- else
|
||
- _dl_signal_error (errno, map->l_name, "dlopen",
|
||
- N_("mprotect failed to turn on BTI"));
|
||
- }
|
||
+ if (fd == -1)
|
||
+ /* Ignore failures for kernel mapped binaries. */
|
||
+ __mprotect (start, len, prot);
|
||
+ else
|
||
+ map->l_mach.bti_fail = __mmap (start, len, prot,
|
||
+ MAP_FIXED|MAP_COPY|MAP_FILE,
|
||
+ fd, off) == MAP_FAILED;
|
||
}
|
||
- return 0;
|
||
}
|
||
|
||
-/* Enable BTI for L if required. */
|
||
+
|
||
+static void
|
||
+bti_failed (struct link_map *l, const char *program)
|
||
+{
|
||
+ if (program)
|
||
+ _dl_fatal_printf ("%s: %s: failed to turn on BTI protection\n",
|
||
+ program, l->l_name);
|
||
+ else
|
||
+ /* Note: the errno value is not available any more. */
|
||
+ _dl_signal_error (0, l->l_name, "dlopen",
|
||
+ N_("failed to turn on BTI protection"));
|
||
+}
|
||
+
|
||
+
|
||
+/* Enable BTI for L and its dependencies. */
|
||
|
||
void
|
||
_dl_bti_check (struct link_map *l, const char *program)
|
||
{
|
||
- if (GLRO(dl_aarch64_cpu_features).bti && l->l_mach.bti)
|
||
- enable_bti (l, program);
|
||
+ if (!GLRO(dl_aarch64_cpu_features).bti)
|
||
+ return;
|
||
+
|
||
+ if (l->l_mach.bti_fail)
|
||
+ bti_failed (l, program);
|
||
+
|
||
+ unsigned int i = l->l_searchlist.r_nlist;
|
||
+ while (i-- > 0)
|
||
+ {
|
||
+ struct link_map *dep = l->l_initfini[i];
|
||
+ if (dep->l_mach.bti_fail)
|
||
+ bti_failed (dep, program);
|
||
+ }
|
||
}
|
||
diff --git a/sysdeps/aarch64/dl-machine.h b/sysdeps/aarch64/dl-machine.h
|
||
index 70b9ed3925..fde7cfd9e2 100644
|
||
--- a/sysdeps/aarch64/dl-machine.h
|
||
+++ b/sysdeps/aarch64/dl-machine.h
|
||
@@ -395,13 +395,6 @@ elf_machine_lazy_rel (struct link_map *map,
|
||
/* Check for unexpected PLT reloc type. */
|
||
if (__builtin_expect (r_type == AARCH64_R(JUMP_SLOT), 1))
|
||
{
|
||
- if (map->l_mach.plt == 0)
|
||
- {
|
||
- /* Prelinking. */
|
||
- *reloc_addr += l_addr;
|
||
- return;
|
||
- }
|
||
-
|
||
if (__glibc_unlikely (map->l_info[DT_AARCH64 (VARIANT_PCS)] != NULL))
|
||
{
|
||
/* Check the symbol table for variant PCS symbols. */
|
||
@@ -425,7 +418,10 @@ elf_machine_lazy_rel (struct link_map *map,
|
||
}
|
||
}
|
||
|
||
- *reloc_addr = map->l_mach.plt;
|
||
+ if (map->l_mach.plt == 0)
|
||
+ *reloc_addr += l_addr;
|
||
+ else
|
||
+ *reloc_addr = map->l_mach.plt;
|
||
}
|
||
else if (__builtin_expect (r_type == AARCH64_R(TLSDESC), 1))
|
||
{
|
||
diff --git a/sysdeps/aarch64/dl-prop.h b/sysdeps/aarch64/dl-prop.h
|
||
index b0785bda83..e926e54984 100644
|
||
--- a/sysdeps/aarch64/dl-prop.h
|
||
+++ b/sysdeps/aarch64/dl-prop.h
|
||
@@ -19,6 +19,8 @@
|
||
#ifndef _DL_PROP_H
|
||
#define _DL_PROP_H
|
||
|
||
+extern void _dl_bti_protect (struct link_map *, int) attribute_hidden;
|
||
+
|
||
extern void _dl_bti_check (struct link_map *, const char *)
|
||
attribute_hidden;
|
||
|
||
@@ -35,14 +37,18 @@ _dl_open_check (struct link_map *m)
|
||
}
|
||
|
||
static inline void __attribute__ ((always_inline))
|
||
-_dl_process_pt_note (struct link_map *l, const ElfW(Phdr) *ph)
|
||
+_dl_process_pt_note (struct link_map *l, int fd, const ElfW(Phdr) *ph)
|
||
{
|
||
}
|
||
|
||
static inline int
|
||
-_dl_process_gnu_property (struct link_map *l, uint32_t type, uint32_t datasz,
|
||
- void *data)
|
||
+_dl_process_gnu_property (struct link_map *l, int fd, uint32_t type,
|
||
+ uint32_t datasz, void *data)
|
||
{
|
||
+ if (!GLRO(dl_aarch64_cpu_features).bti)
|
||
+ /* Skip note processing. */
|
||
+ return 0;
|
||
+
|
||
if (type == GNU_PROPERTY_AARCH64_FEATURE_1_AND)
|
||
{
|
||
/* Stop if the property note is ill-formed. */
|
||
@@ -51,7 +57,7 @@ _dl_process_gnu_property (struct link_map *l, uint32_t type, uint32_t datasz,
|
||
|
||
unsigned int feature_1 = *(unsigned int *) data;
|
||
if (feature_1 & GNU_PROPERTY_AARCH64_FEATURE_1_BTI)
|
||
- l->l_mach.bti = true;
|
||
+ _dl_bti_protect (l, fd);
|
||
|
||
/* Stop if we processed the property note. */
|
||
return 0;
|
||
diff --git a/sysdeps/aarch64/linkmap.h b/sysdeps/aarch64/linkmap.h
|
||
index 847a03ace2..b3f7663b07 100644
|
||
--- a/sysdeps/aarch64/linkmap.h
|
||
+++ b/sysdeps/aarch64/linkmap.h
|
||
@@ -22,5 +22,5 @@ struct link_map_machine
|
||
{
|
||
ElfW(Addr) plt; /* Address of .plt */
|
||
void *tlsdesc_table; /* Address of TLS descriptor hash table. */
|
||
- bool bti; /* Branch Target Identification is enabled. */
|
||
+ bool bti_fail; /* Failed to enable Branch Target Identification. */
|
||
};
|
||
diff --git a/sysdeps/aarch64/multiarch/memcpy.c b/sysdeps/aarch64/multiarch/memcpy.c
|
||
index 7cf5f033e8..799d60c98c 100644
|
||
--- a/sysdeps/aarch64/multiarch/memcpy.c
|
||
+++ b/sysdeps/aarch64/multiarch/memcpy.c
|
||
@@ -41,7 +41,8 @@ libc_ifunc (__libc_memcpy,
|
||
? __memcpy_falkor
|
||
: (IS_THUNDERX2 (midr) || IS_THUNDERX2PA (midr)
|
||
? __memcpy_thunderx2
|
||
- : (IS_NEOVERSE_N1 (midr)
|
||
+ : (IS_NEOVERSE_N1 (midr) || IS_NEOVERSE_N2 (midr)
|
||
+ || IS_NEOVERSE_V1 (midr)
|
||
? __memcpy_simd
|
||
: __memcpy_generic)))));
|
||
|
||
diff --git a/sysdeps/aarch64/multiarch/memcpy_advsimd.S b/sysdeps/aarch64/multiarch/memcpy_advsimd.S
|
||
index d4ba747777..48bb6d7ca4 100644
|
||
--- a/sysdeps/aarch64/multiarch/memcpy_advsimd.S
|
||
+++ b/sysdeps/aarch64/multiarch/memcpy_advsimd.S
|
||
@@ -223,12 +223,13 @@ L(copy_long_backwards):
|
||
b.ls L(copy64_from_start)
|
||
|
||
L(loop64_backwards):
|
||
- stp A_q, B_q, [dstend, -32]
|
||
+ str B_q, [dstend, -16]
|
||
+ str A_q, [dstend, -32]
|
||
ldp A_q, B_q, [srcend, -96]
|
||
- stp C_q, D_q, [dstend, -64]
|
||
+ str D_q, [dstend, -48]
|
||
+ str C_q, [dstend, -64]!
|
||
ldp C_q, D_q, [srcend, -128]
|
||
sub srcend, srcend, 64
|
||
- sub dstend, dstend, 64
|
||
subs count, count, 64
|
||
b.hi L(loop64_backwards)
|
||
|
||
diff --git a/sysdeps/aarch64/multiarch/memmove.c b/sysdeps/aarch64/multiarch/memmove.c
|
||
index ad10aa8ac6..46a4cb3a54 100644
|
||
--- a/sysdeps/aarch64/multiarch/memmove.c
|
||
+++ b/sysdeps/aarch64/multiarch/memmove.c
|
||
@@ -41,7 +41,8 @@ libc_ifunc (__libc_memmove,
|
||
? __memmove_falkor
|
||
: (IS_THUNDERX2 (midr) || IS_THUNDERX2PA (midr)
|
||
? __memmove_thunderx2
|
||
- : (IS_NEOVERSE_N1 (midr)
|
||
+ : (IS_NEOVERSE_N1 (midr) || IS_NEOVERSE_N2 (midr)
|
||
+ || IS_NEOVERSE_V1 (midr)
|
||
? __memmove_simd
|
||
: __memmove_generic)))));
|
||
|
||
diff --git a/sysdeps/aarch64/start.S b/sysdeps/aarch64/start.S
|
||
index 75393e1c18..1998ea95d4 100644
|
||
--- a/sysdeps/aarch64/start.S
|
||
+++ b/sysdeps/aarch64/start.S
|
||
@@ -43,11 +43,9 @@
|
||
*/
|
||
|
||
.text
|
||
- .globl _start
|
||
- .type _start,#function
|
||
-_start:
|
||
- BTI_C
|
||
+ENTRY(_start)
|
||
/* Create an initial frame with 0 LR and FP */
|
||
+ cfi_undefined (x30)
|
||
mov x29, #0
|
||
mov x30, #0
|
||
|
||
@@ -101,8 +99,10 @@ _start:
|
||
because crt1.o and rcrt1.o share code and the later must avoid the
|
||
use of GOT relocations before __libc_start_main is called. */
|
||
__wrap_main:
|
||
+ BTI_C
|
||
b main
|
||
#endif
|
||
+END(_start)
|
||
|
||
/* Define a symbol for the first piece of initialized data. */
|
||
.data
|
||
diff --git a/sysdeps/generic/dl-prop.h b/sysdeps/generic/dl-prop.h
|
||
index f1cf576fe3..df27ff8e6a 100644
|
||
--- a/sysdeps/generic/dl-prop.h
|
||
+++ b/sysdeps/generic/dl-prop.h
|
||
@@ -37,15 +37,15 @@ _dl_open_check (struct link_map *m)
|
||
}
|
||
|
||
static inline void __attribute__ ((always_inline))
|
||
-_dl_process_pt_note (struct link_map *l, const ElfW(Phdr) *ph)
|
||
+_dl_process_pt_note (struct link_map *l, int fd, const ElfW(Phdr) *ph)
|
||
{
|
||
}
|
||
|
||
/* Called for each property in the NT_GNU_PROPERTY_TYPE_0 note of L,
|
||
processing of the properties continues until this returns 0. */
|
||
static inline int __attribute__ ((always_inline))
|
||
-_dl_process_gnu_property (struct link_map *l, uint32_t type, uint32_t datasz,
|
||
- void *data)
|
||
+_dl_process_gnu_property (struct link_map *l, int fd, uint32_t type,
|
||
+ uint32_t datasz, void *data)
|
||
{
|
||
return 0;
|
||
}
|
||
diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
|
||
index ba114ab4b1..62ac40d81b 100644
|
||
--- a/sysdeps/generic/ldsodefs.h
|
||
+++ b/sysdeps/generic/ldsodefs.h
|
||
@@ -919,8 +919,9 @@ extern void _dl_rtld_di_serinfo (struct link_map *loader,
|
||
Dl_serinfo *si, bool counting);
|
||
|
||
/* Process PT_GNU_PROPERTY program header PH in module L after
|
||
- PT_LOAD segments are mapped. */
|
||
-void _dl_process_pt_gnu_property (struct link_map *l, const ElfW(Phdr) *ph);
|
||
+ PT_LOAD segments are mapped from file FD. */
|
||
+void _dl_process_pt_gnu_property (struct link_map *l, int fd,
|
||
+ const ElfW(Phdr) *ph);
|
||
|
||
|
||
/* Search loaded objects' symbol tables for a definition of the symbol
|
||
diff --git a/sysdeps/generic/unwind.h b/sysdeps/generic/unwind.h
|
||
index b667a5b652..c229603af3 100644
|
||
--- a/sysdeps/generic/unwind.h
|
||
+++ b/sysdeps/generic/unwind.h
|
||
@@ -75,15 +75,21 @@ typedef void (*_Unwind_Exception_Cleanup_Fn) (_Unwind_Reason_Code,
|
||
|
||
struct _Unwind_Exception
|
||
{
|
||
- _Unwind_Exception_Class exception_class;
|
||
- _Unwind_Exception_Cleanup_Fn exception_cleanup;
|
||
- _Unwind_Word private_1;
|
||
- _Unwind_Word private_2;
|
||
-
|
||
- /* @@@ The IA-64 ABI says that this structure must be double-word aligned.
|
||
- Taking that literally does not make much sense generically. Instead we
|
||
- provide the maximum alignment required by any type for the machine. */
|
||
-} __attribute__((__aligned__));
|
||
+ union
|
||
+ {
|
||
+ struct
|
||
+ {
|
||
+ _Unwind_Exception_Class exception_class;
|
||
+ _Unwind_Exception_Cleanup_Fn exception_cleanup;
|
||
+ _Unwind_Word private_1;
|
||
+ _Unwind_Word private_2;
|
||
+ };
|
||
+
|
||
+ /* The IA-64 ABI says that this structure must be double-word aligned. */
|
||
+ _Unwind_Word unwind_exception_align[2]
|
||
+ __attribute__ ((__aligned__ (2 * sizeof (_Unwind_Word))));
|
||
+ };
|
||
+};
|
||
|
||
|
||
/* The ACTIONS argument to the personality routine is a bitwise OR of one
|
||
diff --git a/sysdeps/gnu/errlist.h b/sysdeps/gnu/errlist.h
|
||
index 5d11ed723d..6329e5f393 100644
|
||
--- a/sysdeps/gnu/errlist.h
|
||
+++ b/sysdeps/gnu/errlist.h
|
||
@@ -1,24 +1,21 @@
|
||
-#ifndef ERR_MAP
|
||
-#define ERR_MAP(value) value
|
||
-#endif
|
||
-_S(ERR_MAP(0), N_("Success"))
|
||
+_S(0, N_("Success"))
|
||
#ifdef EPERM
|
||
/*
|
||
TRANS Only the owner of the file (or other resource)
|
||
TRANS or processes with special privileges can perform the operation. */
|
||
-_S(ERR_MAP(EPERM), N_("Operation not permitted"))
|
||
+_S(EPERM, N_("Operation not permitted"))
|
||
#endif
|
||
#ifdef ENOENT
|
||
/*
|
||
TRANS This is a ``file doesn't exist'' error
|
||
TRANS for ordinary files that are referenced in contexts where they are
|
||
TRANS expected to already exist. */
|
||
-_S(ERR_MAP(ENOENT), N_("No such file or directory"))
|
||
+_S(ENOENT, N_("No such file or directory"))
|
||
#endif
|
||
#ifdef ESRCH
|
||
/*
|
||
TRANS No process matches the specified process ID. */
|
||
-_S(ERR_MAP(ESRCH), N_("No such process"))
|
||
+_S(ESRCH, N_("No such process"))
|
||
#endif
|
||
#ifdef EINTR
|
||
/*
|
||
@@ -29,12 +26,12 @@ TRANS
|
||
TRANS You can choose to have functions resume after a signal that is handled,
|
||
TRANS rather than failing with @code{EINTR}; see @ref{Interrupted
|
||
TRANS Primitives}. */
|
||
-_S(ERR_MAP(EINTR), N_("Interrupted system call"))
|
||
+_S(EINTR, N_("Interrupted system call"))
|
||
#endif
|
||
#ifdef EIO
|
||
/*
|
||
TRANS Usually used for physical read or write errors. */
|
||
-_S(ERR_MAP(EIO), N_("Input/output error"))
|
||
+_S(EIO, N_("Input/output error"))
|
||
#endif
|
||
#ifdef ENXIO
|
||
/*
|
||
@@ -43,7 +40,7 @@ TRANS represented by a file you specified, and it couldn't find the device.
|
||
TRANS This can mean that the device file was installed incorrectly, or that
|
||
TRANS the physical device is missing or not correctly attached to the
|
||
TRANS computer. */
|
||
-_S(ERR_MAP(ENXIO), N_("No such device or address"))
|
||
+_S(ENXIO, N_("No such device or address"))
|
||
#endif
|
||
#ifdef E2BIG
|
||
/*
|
||
@@ -51,27 +48,27 @@ TRANS Used when the arguments passed to a new program
|
||
TRANS being executed with one of the @code{exec} functions (@pxref{Executing a
|
||
TRANS File}) occupy too much memory space. This condition never arises on
|
||
TRANS @gnuhurdsystems{}. */
|
||
-_S(ERR_MAP(E2BIG), N_("Argument list too long"))
|
||
+_S(E2BIG, N_("Argument list too long"))
|
||
#endif
|
||
#ifdef ENOEXEC
|
||
/*
|
||
TRANS Invalid executable file format. This condition is detected by the
|
||
TRANS @code{exec} functions; see @ref{Executing a File}. */
|
||
-_S(ERR_MAP(ENOEXEC), N_("Exec format error"))
|
||
+_S(ENOEXEC, N_("Exec format error"))
|
||
#endif
|
||
#ifdef EBADF
|
||
/*
|
||
TRANS For example, I/O on a descriptor that has been
|
||
TRANS closed or reading from a descriptor open only for writing (or vice
|
||
TRANS versa). */
|
||
-_S(ERR_MAP(EBADF), N_("Bad file descriptor"))
|
||
+_S(EBADF, N_("Bad file descriptor"))
|
||
#endif
|
||
#ifdef ECHILD
|
||
/*
|
||
TRANS This error happens on operations that are
|
||
TRANS supposed to manipulate child processes, when there aren't any processes
|
||
TRANS to manipulate. */
|
||
-_S(ERR_MAP(ECHILD), N_("No child processes"))
|
||
+_S(ECHILD, N_("No child processes"))
|
||
#endif
|
||
#ifdef EDEADLK
|
||
/*
|
||
@@ -79,74 +76,74 @@ TRANS Allocating a system resource would have resulted in a
|
||
TRANS deadlock situation. The system does not guarantee that it will notice
|
||
TRANS all such situations. This error means you got lucky and the system
|
||
TRANS noticed; it might just hang. @xref{File Locks}, for an example. */
|
||
-_S(ERR_MAP(EDEADLK), N_("Resource deadlock avoided"))
|
||
+_S(EDEADLK, N_("Resource deadlock avoided"))
|
||
#endif
|
||
#ifdef ENOMEM
|
||
/*
|
||
TRANS The system cannot allocate more virtual memory
|
||
TRANS because its capacity is full. */
|
||
-_S(ERR_MAP(ENOMEM), N_("Cannot allocate memory"))
|
||
+_S(ENOMEM, N_("Cannot allocate memory"))
|
||
#endif
|
||
#ifdef EACCES
|
||
/*
|
||
TRANS The file permissions do not allow the attempted operation. */
|
||
-_S(ERR_MAP(EACCES), N_("Permission denied"))
|
||
+_S(EACCES, N_("Permission denied"))
|
||
#endif
|
||
#ifdef EFAULT
|
||
/*
|
||
TRANS An invalid pointer was detected.
|
||
TRANS On @gnuhurdsystems{}, this error never happens; you get a signal instead. */
|
||
-_S(ERR_MAP(EFAULT), N_("Bad address"))
|
||
+_S(EFAULT, N_("Bad address"))
|
||
#endif
|
||
#ifdef ENOTBLK
|
||
/*
|
||
TRANS A file that isn't a block special file was given in a situation that
|
||
TRANS requires one. For example, trying to mount an ordinary file as a file
|
||
TRANS system in Unix gives this error. */
|
||
-_S(ERR_MAP(ENOTBLK), N_("Block device required"))
|
||
+_S(ENOTBLK, N_("Block device required"))
|
||
#endif
|
||
#ifdef EBUSY
|
||
/*
|
||
TRANS A system resource that can't be shared is already in use.
|
||
TRANS For example, if you try to delete a file that is the root of a currently
|
||
TRANS mounted filesystem, you get this error. */
|
||
-_S(ERR_MAP(EBUSY), N_("Device or resource busy"))
|
||
+_S(EBUSY, N_("Device or resource busy"))
|
||
#endif
|
||
#ifdef EEXIST
|
||
/*
|
||
TRANS An existing file was specified in a context where it only
|
||
TRANS makes sense to specify a new file. */
|
||
-_S(ERR_MAP(EEXIST), N_("File exists"))
|
||
+_S(EEXIST, N_("File exists"))
|
||
#endif
|
||
#ifdef EXDEV
|
||
/*
|
||
TRANS An attempt to make an improper link across file systems was detected.
|
||
TRANS This happens not only when you use @code{link} (@pxref{Hard Links}) but
|
||
TRANS also when you rename a file with @code{rename} (@pxref{Renaming Files}). */
|
||
-_S(ERR_MAP(EXDEV), N_("Invalid cross-device link"))
|
||
+_S(EXDEV, N_("Invalid cross-device link"))
|
||
#endif
|
||
#ifdef ENODEV
|
||
/*
|
||
TRANS The wrong type of device was given to a function that expects a
|
||
TRANS particular sort of device. */
|
||
-_S(ERR_MAP(ENODEV), N_("No such device"))
|
||
+_S(ENODEV, N_("No such device"))
|
||
#endif
|
||
#ifdef ENOTDIR
|
||
/*
|
||
TRANS A file that isn't a directory was specified when a directory is required. */
|
||
-_S(ERR_MAP(ENOTDIR), N_("Not a directory"))
|
||
+_S(ENOTDIR, N_("Not a directory"))
|
||
#endif
|
||
#ifdef EISDIR
|
||
/*
|
||
TRANS You cannot open a directory for writing,
|
||
TRANS or create or remove hard links to it. */
|
||
-_S(ERR_MAP(EISDIR), N_("Is a directory"))
|
||
+_S(EISDIR, N_("Is a directory"))
|
||
#endif
|
||
#ifdef EINVAL
|
||
/*
|
||
TRANS This is used to indicate various kinds of problems
|
||
TRANS with passing the wrong argument to a library function. */
|
||
-_S(ERR_MAP(EINVAL), N_("Invalid argument"))
|
||
+_S(EINVAL, N_("Invalid argument"))
|
||
#endif
|
||
#ifdef EMFILE
|
||
/*
|
||
@@ -157,20 +154,20 @@ TRANS In BSD and GNU, the number of open files is controlled by a resource
|
||
TRANS limit that can usually be increased. If you get this error, you might
|
||
TRANS want to increase the @code{RLIMIT_NOFILE} limit or make it unlimited;
|
||
TRANS @pxref{Limits on Resources}. */
|
||
-_S(ERR_MAP(EMFILE), N_("Too many open files"))
|
||
+_S(EMFILE, N_("Too many open files"))
|
||
#endif
|
||
#ifdef ENFILE
|
||
/*
|
||
TRANS There are too many distinct file openings in the entire system. Note
|
||
TRANS that any number of linked channels count as just one file opening; see
|
||
TRANS @ref{Linked Channels}. This error never occurs on @gnuhurdsystems{}. */
|
||
-_S(ERR_MAP(ENFILE), N_("Too many open files in system"))
|
||
+_S(ENFILE, N_("Too many open files in system"))
|
||
#endif
|
||
#ifdef ENOTTY
|
||
/*
|
||
TRANS Inappropriate I/O control operation, such as trying to set terminal
|
||
TRANS modes on an ordinary file. */
|
||
-_S(ERR_MAP(ENOTTY), N_("Inappropriate ioctl for device"))
|
||
+_S(ENOTTY, N_("Inappropriate ioctl for device"))
|
||
#endif
|
||
#ifdef ETXTBSY
|
||
/*
|
||
@@ -179,35 +176,35 @@ TRANS write to a file that is currently being executed. Often using a
|
||
TRANS debugger to run a program is considered having it open for writing and
|
||
TRANS will cause this error. (The name stands for ``text file busy''.) This
|
||
TRANS is not an error on @gnuhurdsystems{}; the text is copied as necessary. */
|
||
-_S(ERR_MAP(ETXTBSY), N_("Text file busy"))
|
||
+_S(ETXTBSY, N_("Text file busy"))
|
||
#endif
|
||
#ifdef EFBIG
|
||
/*
|
||
TRANS The size of a file would be larger than allowed by the system. */
|
||
-_S(ERR_MAP(EFBIG), N_("File too large"))
|
||
+_S(EFBIG, N_("File too large"))
|
||
#endif
|
||
#ifdef ENOSPC
|
||
/*
|
||
TRANS Write operation on a file failed because the
|
||
TRANS disk is full. */
|
||
-_S(ERR_MAP(ENOSPC), N_("No space left on device"))
|
||
+_S(ENOSPC, N_("No space left on device"))
|
||
#endif
|
||
#ifdef ESPIPE
|
||
/*
|
||
TRANS Invalid seek operation (such as on a pipe). */
|
||
-_S(ERR_MAP(ESPIPE), N_("Illegal seek"))
|
||
+_S(ESPIPE, N_("Illegal seek"))
|
||
#endif
|
||
#ifdef EROFS
|
||
/*
|
||
TRANS An attempt was made to modify something on a read-only file system. */
|
||
-_S(ERR_MAP(EROFS), N_("Read-only file system"))
|
||
+_S(EROFS, N_("Read-only file system"))
|
||
#endif
|
||
#ifdef EMLINK
|
||
/*
|
||
TRANS The link count of a single file would become too large.
|
||
TRANS @code{rename} can cause this error if the file being renamed already has
|
||
TRANS as many links as it can take (@pxref{Renaming Files}). */
|
||
-_S(ERR_MAP(EMLINK), N_("Too many links"))
|
||
+_S(EMLINK, N_("Too many links"))
|
||
#endif
|
||
#ifdef EPIPE
|
||
/*
|
||
@@ -216,19 +213,19 @@ TRANS Every library function that returns this error code also generates a
|
||
TRANS @code{SIGPIPE} signal; this signal terminates the program if not handled
|
||
TRANS or blocked. Thus, your program will never actually see @code{EPIPE}
|
||
TRANS unless it has handled or blocked @code{SIGPIPE}. */
|
||
-_S(ERR_MAP(EPIPE), N_("Broken pipe"))
|
||
+_S(EPIPE, N_("Broken pipe"))
|
||
#endif
|
||
#ifdef EDOM
|
||
/*
|
||
TRANS Used by mathematical functions when an argument value does
|
||
TRANS not fall into the domain over which the function is defined. */
|
||
-_S(ERR_MAP(EDOM), N_("Numerical argument out of domain"))
|
||
+_S(EDOM, N_("Numerical argument out of domain"))
|
||
#endif
|
||
#ifdef ERANGE
|
||
/*
|
||
TRANS Used by mathematical functions when the result value is
|
||
TRANS not representable because of overflow or underflow. */
|
||
-_S(ERR_MAP(ERANGE), N_("Numerical result out of range"))
|
||
+_S(ERANGE, N_("Numerical result out of range"))
|
||
#endif
|
||
#ifdef EAGAIN
|
||
/*
|
||
@@ -261,7 +258,7 @@ TRANS Such shortages are usually fairly serious and affect the whole system,
|
||
TRANS so usually an interactive program should report the error to the user
|
||
TRANS and return to its command loop.
|
||
TRANS @end itemize */
|
||
-_S(ERR_MAP(EAGAIN), N_("Resource temporarily unavailable"))
|
||
+_S(EAGAIN, N_("Resource temporarily unavailable"))
|
||
#endif
|
||
#ifdef EINPROGRESS
|
||
/*
|
||
@@ -273,47 +270,47 @@ TRANS the operation has begun and will take some time. Attempts to manipulate
|
||
TRANS the object before the call completes return @code{EALREADY}. You can
|
||
TRANS use the @code{select} function to find out when the pending operation
|
||
TRANS has completed; @pxref{Waiting for I/O}. */
|
||
-_S(ERR_MAP(EINPROGRESS), N_("Operation now in progress"))
|
||
+_S(EINPROGRESS, N_("Operation now in progress"))
|
||
#endif
|
||
#ifdef EALREADY
|
||
/*
|
||
TRANS An operation is already in progress on an object that has non-blocking
|
||
TRANS mode selected. */
|
||
-_S(ERR_MAP(EALREADY), N_("Operation already in progress"))
|
||
+_S(EALREADY, N_("Operation already in progress"))
|
||
#endif
|
||
#ifdef ENOTSOCK
|
||
/*
|
||
TRANS A file that isn't a socket was specified when a socket is required. */
|
||
-_S(ERR_MAP(ENOTSOCK), N_("Socket operation on non-socket"))
|
||
+_S(ENOTSOCK, N_("Socket operation on non-socket"))
|
||
#endif
|
||
#ifdef EMSGSIZE
|
||
/*
|
||
TRANS The size of a message sent on a socket was larger than the supported
|
||
TRANS maximum size. */
|
||
-_S(ERR_MAP(EMSGSIZE), N_("Message too long"))
|
||
+_S(EMSGSIZE, N_("Message too long"))
|
||
#endif
|
||
#ifdef EPROTOTYPE
|
||
/*
|
||
TRANS The socket type does not support the requested communications protocol. */
|
||
-_S(ERR_MAP(EPROTOTYPE), N_("Protocol wrong type for socket"))
|
||
+_S(EPROTOTYPE, N_("Protocol wrong type for socket"))
|
||
#endif
|
||
#ifdef ENOPROTOOPT
|
||
/*
|
||
TRANS You specified a socket option that doesn't make sense for the
|
||
TRANS particular protocol being used by the socket. @xref{Socket Options}. */
|
||
-_S(ERR_MAP(ENOPROTOOPT), N_("Protocol not available"))
|
||
+_S(ENOPROTOOPT, N_("Protocol not available"))
|
||
#endif
|
||
#ifdef EPROTONOSUPPORT
|
||
/*
|
||
TRANS The socket domain does not support the requested communications protocol
|
||
TRANS (perhaps because the requested protocol is completely invalid).
|
||
TRANS @xref{Creating a Socket}. */
|
||
-_S(ERR_MAP(EPROTONOSUPPORT), N_("Protocol not supported"))
|
||
+_S(EPROTONOSUPPORT, N_("Protocol not supported"))
|
||
#endif
|
||
#ifdef ESOCKTNOSUPPORT
|
||
/*
|
||
TRANS The socket type is not supported. */
|
||
-_S(ERR_MAP(ESOCKTNOSUPPORT), N_("Socket type not supported"))
|
||
+_S(ESOCKTNOSUPPORT, N_("Socket type not supported"))
|
||
#endif
|
||
#ifdef EOPNOTSUPP
|
||
/*
|
||
@@ -323,71 +320,71 @@ TRANS implemented for all communications protocols. On @gnuhurdsystems{}, this
|
||
TRANS error can happen for many calls when the object does not support the
|
||
TRANS particular operation; it is a generic indication that the server knows
|
||
TRANS nothing to do for that call. */
|
||
-_S(ERR_MAP(EOPNOTSUPP), N_("Operation not supported"))
|
||
+_S(EOPNOTSUPP, N_("Operation not supported"))
|
||
#endif
|
||
#ifdef EPFNOSUPPORT
|
||
/*
|
||
TRANS The socket communications protocol family you requested is not supported. */
|
||
-_S(ERR_MAP(EPFNOSUPPORT), N_("Protocol family not supported"))
|
||
+_S(EPFNOSUPPORT, N_("Protocol family not supported"))
|
||
#endif
|
||
#ifdef EAFNOSUPPORT
|
||
/*
|
||
TRANS The address family specified for a socket is not supported; it is
|
||
TRANS inconsistent with the protocol being used on the socket. @xref{Sockets}. */
|
||
-_S(ERR_MAP(EAFNOSUPPORT), N_("Address family not supported by protocol"))
|
||
+_S(EAFNOSUPPORT, N_("Address family not supported by protocol"))
|
||
#endif
|
||
#ifdef EADDRINUSE
|
||
/*
|
||
TRANS The requested socket address is already in use. @xref{Socket Addresses}. */
|
||
-_S(ERR_MAP(EADDRINUSE), N_("Address already in use"))
|
||
+_S(EADDRINUSE, N_("Address already in use"))
|
||
#endif
|
||
#ifdef EADDRNOTAVAIL
|
||
/*
|
||
TRANS The requested socket address is not available; for example, you tried
|
||
TRANS to give a socket a name that doesn't match the local host name.
|
||
TRANS @xref{Socket Addresses}. */
|
||
-_S(ERR_MAP(EADDRNOTAVAIL), N_("Cannot assign requested address"))
|
||
+_S(EADDRNOTAVAIL, N_("Cannot assign requested address"))
|
||
#endif
|
||
#ifdef ENETDOWN
|
||
/*
|
||
TRANS A socket operation failed because the network was down. */
|
||
-_S(ERR_MAP(ENETDOWN), N_("Network is down"))
|
||
+_S(ENETDOWN, N_("Network is down"))
|
||
#endif
|
||
#ifdef ENETUNREACH
|
||
/*
|
||
TRANS A socket operation failed because the subnet containing the remote host
|
||
TRANS was unreachable. */
|
||
-_S(ERR_MAP(ENETUNREACH), N_("Network is unreachable"))
|
||
+_S(ENETUNREACH, N_("Network is unreachable"))
|
||
#endif
|
||
#ifdef ENETRESET
|
||
/*
|
||
TRANS A network connection was reset because the remote host crashed. */
|
||
-_S(ERR_MAP(ENETRESET), N_("Network dropped connection on reset"))
|
||
+_S(ENETRESET, N_("Network dropped connection on reset"))
|
||
#endif
|
||
#ifdef ECONNABORTED
|
||
/*
|
||
TRANS A network connection was aborted locally. */
|
||
-_S(ERR_MAP(ECONNABORTED), N_("Software caused connection abort"))
|
||
+_S(ECONNABORTED, N_("Software caused connection abort"))
|
||
#endif
|
||
#ifdef ECONNRESET
|
||
/*
|
||
TRANS A network connection was closed for reasons outside the control of the
|
||
TRANS local host, such as by the remote machine rebooting or an unrecoverable
|
||
TRANS protocol violation. */
|
||
-_S(ERR_MAP(ECONNRESET), N_("Connection reset by peer"))
|
||
+_S(ECONNRESET, N_("Connection reset by peer"))
|
||
#endif
|
||
#ifdef ENOBUFS
|
||
/*
|
||
TRANS The kernel's buffers for I/O operations are all in use. In GNU, this
|
||
TRANS error is always synonymous with @code{ENOMEM}; you may get one or the
|
||
TRANS other from network operations. */
|
||
-_S(ERR_MAP(ENOBUFS), N_("No buffer space available"))
|
||
+_S(ENOBUFS, N_("No buffer space available"))
|
||
#endif
|
||
#ifdef EISCONN
|
||
/*
|
||
TRANS You tried to connect a socket that is already connected.
|
||
TRANS @xref{Connecting}. */
|
||
-_S(ERR_MAP(EISCONN), N_("Transport endpoint is already connected"))
|
||
+_S(EISCONN, N_("Transport endpoint is already connected"))
|
||
#endif
|
||
#ifdef ENOTCONN
|
||
/*
|
||
@@ -395,74 +392,74 @@ TRANS The socket is not connected to anything. You get this error when you
|
||
TRANS try to transmit data over a socket, without first specifying a
|
||
TRANS destination for the data. For a connectionless socket (for datagram
|
||
TRANS protocols, such as UDP), you get @code{EDESTADDRREQ} instead. */
|
||
-_S(ERR_MAP(ENOTCONN), N_("Transport endpoint is not connected"))
|
||
+_S(ENOTCONN, N_("Transport endpoint is not connected"))
|
||
#endif
|
||
#ifdef EDESTADDRREQ
|
||
/*
|
||
TRANS No default destination address was set for the socket. You get this
|
||
TRANS error when you try to transmit data over a connectionless socket,
|
||
TRANS without first specifying a destination for the data with @code{connect}. */
|
||
-_S(ERR_MAP(EDESTADDRREQ), N_("Destination address required"))
|
||
+_S(EDESTADDRREQ, N_("Destination address required"))
|
||
#endif
|
||
#ifdef ESHUTDOWN
|
||
/*
|
||
TRANS The socket has already been shut down. */
|
||
-_S(ERR_MAP(ESHUTDOWN), N_("Cannot send after transport endpoint shutdown"))
|
||
+_S(ESHUTDOWN, N_("Cannot send after transport endpoint shutdown"))
|
||
#endif
|
||
#ifdef ETOOMANYREFS
|
||
-_S(ERR_MAP(ETOOMANYREFS), N_("Too many references: cannot splice"))
|
||
+_S(ETOOMANYREFS, N_("Too many references: cannot splice"))
|
||
#endif
|
||
#ifdef ETIMEDOUT
|
||
/*
|
||
TRANS A socket operation with a specified timeout received no response during
|
||
TRANS the timeout period. */
|
||
-_S(ERR_MAP(ETIMEDOUT), N_("Connection timed out"))
|
||
+_S(ETIMEDOUT, N_("Connection timed out"))
|
||
#endif
|
||
#ifdef ECONNREFUSED
|
||
/*
|
||
TRANS A remote host refused to allow the network connection (typically because
|
||
TRANS it is not running the requested service). */
|
||
-_S(ERR_MAP(ECONNREFUSED), N_("Connection refused"))
|
||
+_S(ECONNREFUSED, N_("Connection refused"))
|
||
#endif
|
||
#ifdef ELOOP
|
||
/*
|
||
TRANS Too many levels of symbolic links were encountered in looking up a file name.
|
||
TRANS This often indicates a cycle of symbolic links. */
|
||
-_S(ERR_MAP(ELOOP), N_("Too many levels of symbolic links"))
|
||
+_S(ELOOP, N_("Too many levels of symbolic links"))
|
||
#endif
|
||
#ifdef ENAMETOOLONG
|
||
/*
|
||
TRANS Filename too long (longer than @code{PATH_MAX}; @pxref{Limits for
|
||
TRANS Files}) or host name too long (in @code{gethostname} or
|
||
TRANS @code{sethostname}; @pxref{Host Identification}). */
|
||
-_S(ERR_MAP(ENAMETOOLONG), N_("File name too long"))
|
||
+_S(ENAMETOOLONG, N_("File name too long"))
|
||
#endif
|
||
#ifdef EHOSTDOWN
|
||
/*
|
||
TRANS The remote host for a requested network connection is down. */
|
||
-_S(ERR_MAP(EHOSTDOWN), N_("Host is down"))
|
||
+_S(EHOSTDOWN, N_("Host is down"))
|
||
#endif
|
||
/*
|
||
TRANS The remote host for a requested network connection is not reachable. */
|
||
#ifdef EHOSTUNREACH
|
||
-_S(ERR_MAP(EHOSTUNREACH), N_("No route to host"))
|
||
+_S(EHOSTUNREACH, N_("No route to host"))
|
||
#endif
|
||
#ifdef ENOTEMPTY
|
||
/*
|
||
TRANS Directory not empty, where an empty directory was expected. Typically,
|
||
TRANS this error occurs when you are trying to delete a directory. */
|
||
-_S(ERR_MAP(ENOTEMPTY), N_("Directory not empty"))
|
||
+_S(ENOTEMPTY, N_("Directory not empty"))
|
||
#endif
|
||
#ifdef EUSERS
|
||
/*
|
||
TRANS The file quota system is confused because there are too many users.
|
||
TRANS @c This can probably happen in a GNU system when using NFS. */
|
||
-_S(ERR_MAP(EUSERS), N_("Too many users"))
|
||
+_S(EUSERS, N_("Too many users"))
|
||
#endif
|
||
#ifdef EDQUOT
|
||
/*
|
||
TRANS The user's disk quota was exceeded. */
|
||
-_S(ERR_MAP(EDQUOT), N_("Disk quota exceeded"))
|
||
+_S(EDQUOT, N_("Disk quota exceeded"))
|
||
#endif
|
||
#ifdef ESTALE
|
||
/*
|
||
@@ -471,7 +468,7 @@ TRANS file system which is due to file system rearrangements on the server host
|
||
TRANS for NFS file systems or corruption in other file systems.
|
||
TRANS Repairing this condition usually requires unmounting, possibly repairing
|
||
TRANS and remounting the file system. */
|
||
-_S(ERR_MAP(ESTALE), N_("Stale file handle"))
|
||
+_S(ESTALE, N_("Stale file handle"))
|
||
#endif
|
||
#ifdef EREMOTE
|
||
/*
|
||
@@ -479,7 +476,7 @@ TRANS An attempt was made to NFS-mount a remote file system with a file name tha
|
||
TRANS already specifies an NFS-mounted file.
|
||
TRANS (This is an error on some operating systems, but we expect it to work
|
||
TRANS properly on @gnuhurdsystems{}, making this error code impossible.) */
|
||
-_S(ERR_MAP(EREMOTE), N_("Object is remote"))
|
||
+_S(EREMOTE, N_("Object is remote"))
|
||
#endif
|
||
#ifdef ENOLCK
|
||
/*
|
||
@@ -487,7 +484,7 @@ TRANS This is used by the file locking facilities; see
|
||
TRANS @ref{File Locks}. This error is never generated by @gnuhurdsystems{}, but
|
||
TRANS it can result from an operation to an NFS server running another
|
||
TRANS operating system. */
|
||
-_S(ERR_MAP(ENOLCK), N_("No locks available"))
|
||
+_S(ENOLCK, N_("No locks available"))
|
||
#endif
|
||
#ifdef ENOSYS
|
||
/*
|
||
@@ -496,46 +493,46 @@ TRANS not implemented at all, either in the C library itself or in the
|
||
TRANS operating system. When you get this error, you can be sure that this
|
||
TRANS particular function will always fail with @code{ENOSYS} unless you
|
||
TRANS install a new version of the C library or the operating system. */
|
||
-_S(ERR_MAP(ENOSYS), N_("Function not implemented"))
|
||
+_S(ENOSYS, N_("Function not implemented"))
|
||
#endif
|
||
#ifdef EILSEQ
|
||
/*
|
||
TRANS While decoding a multibyte character the function came along an invalid
|
||
TRANS or an incomplete sequence of bytes or the given wide character is invalid. */
|
||
-_S(ERR_MAP(EILSEQ), N_("Invalid or incomplete multibyte or wide character"))
|
||
+_S(EILSEQ, N_("Invalid or incomplete multibyte or wide character"))
|
||
#endif
|
||
#ifdef EBADMSG
|
||
-_S(ERR_MAP(EBADMSG), N_("Bad message"))
|
||
+_S(EBADMSG, N_("Bad message"))
|
||
#endif
|
||
#ifdef EIDRM
|
||
-_S(ERR_MAP(EIDRM), N_("Identifier removed"))
|
||
+_S(EIDRM, N_("Identifier removed"))
|
||
#endif
|
||
#ifdef EMULTIHOP
|
||
-_S(ERR_MAP(EMULTIHOP), N_("Multihop attempted"))
|
||
+_S(EMULTIHOP, N_("Multihop attempted"))
|
||
#endif
|
||
#ifdef ENODATA
|
||
-_S(ERR_MAP(ENODATA), N_("No data available"))
|
||
+_S(ENODATA, N_("No data available"))
|
||
#endif
|
||
#ifdef ENOLINK
|
||
-_S(ERR_MAP(ENOLINK), N_("Link has been severed"))
|
||
+_S(ENOLINK, N_("Link has been severed"))
|
||
#endif
|
||
#ifdef ENOMSG
|
||
-_S(ERR_MAP(ENOMSG), N_("No message of desired type"))
|
||
+_S(ENOMSG, N_("No message of desired type"))
|
||
#endif
|
||
#ifdef ENOSR
|
||
-_S(ERR_MAP(ENOSR), N_("Out of streams resources"))
|
||
+_S(ENOSR, N_("Out of streams resources"))
|
||
#endif
|
||
#ifdef ENOSTR
|
||
-_S(ERR_MAP(ENOSTR), N_("Device not a stream"))
|
||
+_S(ENOSTR, N_("Device not a stream"))
|
||
#endif
|
||
#ifdef EOVERFLOW
|
||
-_S(ERR_MAP(EOVERFLOW), N_("Value too large for defined data type"))
|
||
+_S(EOVERFLOW, N_("Value too large for defined data type"))
|
||
#endif
|
||
#ifdef EPROTO
|
||
-_S(ERR_MAP(EPROTO), N_("Protocol error"))
|
||
+_S(EPROTO, N_("Protocol error"))
|
||
#endif
|
||
#ifdef ETIME
|
||
-_S(ERR_MAP(ETIME), N_("Timer expired"))
|
||
+_S(ETIME, N_("Timer expired"))
|
||
#endif
|
||
#ifdef ECANCELED
|
||
/*
|
||
@@ -543,148 +540,148 @@ TRANS An asynchronous operation was canceled before it
|
||
TRANS completed. @xref{Asynchronous I/O}. When you call @code{aio_cancel},
|
||
TRANS the normal result is for the operations affected to complete with this
|
||
TRANS error; @pxref{Cancel AIO Operations}. */
|
||
-_S(ERR_MAP(ECANCELED), N_("Operation canceled"))
|
||
+_S(ECANCELED, N_("Operation canceled"))
|
||
#endif
|
||
#ifdef EOWNERDEAD
|
||
-_S(ERR_MAP(EOWNERDEAD), N_("Owner died"))
|
||
+_S(EOWNERDEAD, N_("Owner died"))
|
||
#endif
|
||
#ifdef ENOTRECOVERABLE
|
||
-_S(ERR_MAP(ENOTRECOVERABLE), N_("State not recoverable"))
|
||
+_S(ENOTRECOVERABLE, N_("State not recoverable"))
|
||
#endif
|
||
#ifdef ERESTART
|
||
-_S(ERR_MAP(ERESTART), N_("Interrupted system call should be restarted"))
|
||
+_S(ERESTART, N_("Interrupted system call should be restarted"))
|
||
#endif
|
||
#ifdef ECHRNG
|
||
-_S(ERR_MAP(ECHRNG), N_("Channel number out of range"))
|
||
+_S(ECHRNG, N_("Channel number out of range"))
|
||
#endif
|
||
#ifdef EL2NSYNC
|
||
-_S(ERR_MAP(EL2NSYNC), N_("Level 2 not synchronized"))
|
||
+_S(EL2NSYNC, N_("Level 2 not synchronized"))
|
||
#endif
|
||
#ifdef EL3HLT
|
||
-_S(ERR_MAP(EL3HLT), N_("Level 3 halted"))
|
||
+_S(EL3HLT, N_("Level 3 halted"))
|
||
#endif
|
||
#ifdef EL3RST
|
||
-_S(ERR_MAP(EL3RST), N_("Level 3 reset"))
|
||
+_S(EL3RST, N_("Level 3 reset"))
|
||
#endif
|
||
#ifdef ELNRNG
|
||
-_S(ERR_MAP(ELNRNG), N_("Link number out of range"))
|
||
+_S(ELNRNG, N_("Link number out of range"))
|
||
#endif
|
||
#ifdef EUNATCH
|
||
-_S(ERR_MAP(EUNATCH), N_("Protocol driver not attached"))
|
||
+_S(EUNATCH, N_("Protocol driver not attached"))
|
||
#endif
|
||
#ifdef ENOCSI
|
||
-_S(ERR_MAP(ENOCSI), N_("No CSI structure available"))
|
||
+_S(ENOCSI, N_("No CSI structure available"))
|
||
#endif
|
||
#ifdef EL2HLT
|
||
-_S(ERR_MAP(EL2HLT), N_("Level 2 halted"))
|
||
+_S(EL2HLT, N_("Level 2 halted"))
|
||
#endif
|
||
#ifdef EBADE
|
||
-_S(ERR_MAP(EBADE), N_("Invalid exchange"))
|
||
+_S(EBADE, N_("Invalid exchange"))
|
||
#endif
|
||
#ifdef EBADR
|
||
-_S(ERR_MAP(EBADR), N_("Invalid request descriptor"))
|
||
+_S(EBADR, N_("Invalid request descriptor"))
|
||
#endif
|
||
#ifdef EXFULL
|
||
-_S(ERR_MAP(EXFULL), N_("Exchange full"))
|
||
+_S(EXFULL, N_("Exchange full"))
|
||
#endif
|
||
#ifdef ENOANO
|
||
-_S(ERR_MAP(ENOANO), N_("No anode"))
|
||
+_S(ENOANO, N_("No anode"))
|
||
#endif
|
||
#ifdef EBADRQC
|
||
-_S(ERR_MAP(EBADRQC), N_("Invalid request code"))
|
||
+_S(EBADRQC, N_("Invalid request code"))
|
||
#endif
|
||
#ifdef EBADSLT
|
||
-_S(ERR_MAP(EBADSLT), N_("Invalid slot"))
|
||
+_S(EBADSLT, N_("Invalid slot"))
|
||
#endif
|
||
#ifdef EBFONT
|
||
-_S(ERR_MAP(EBFONT), N_("Bad font file format"))
|
||
+_S(EBFONT, N_("Bad font file format"))
|
||
#endif
|
||
#ifdef ENONET
|
||
-_S(ERR_MAP(ENONET), N_("Machine is not on the network"))
|
||
+_S(ENONET, N_("Machine is not on the network"))
|
||
#endif
|
||
#ifdef ENOPKG
|
||
-_S(ERR_MAP(ENOPKG), N_("Package not installed"))
|
||
+_S(ENOPKG, N_("Package not installed"))
|
||
#endif
|
||
#ifdef EADV
|
||
-_S(ERR_MAP(EADV), N_("Advertise error"))
|
||
+_S(EADV, N_("Advertise error"))
|
||
#endif
|
||
#ifdef ESRMNT
|
||
-_S(ERR_MAP(ESRMNT), N_("Srmount error"))
|
||
+_S(ESRMNT, N_("Srmount error"))
|
||
#endif
|
||
#ifdef ECOMM
|
||
-_S(ERR_MAP(ECOMM), N_("Communication error on send"))
|
||
+_S(ECOMM, N_("Communication error on send"))
|
||
#endif
|
||
#ifdef EDOTDOT
|
||
-_S(ERR_MAP(EDOTDOT), N_("RFS specific error"))
|
||
+_S(EDOTDOT, N_("RFS specific error"))
|
||
#endif
|
||
#ifdef ENOTUNIQ
|
||
-_S(ERR_MAP(ENOTUNIQ), N_("Name not unique on network"))
|
||
+_S(ENOTUNIQ, N_("Name not unique on network"))
|
||
#endif
|
||
#ifdef EBADFD
|
||
-_S(ERR_MAP(EBADFD), N_("File descriptor in bad state"))
|
||
+_S(EBADFD, N_("File descriptor in bad state"))
|
||
#endif
|
||
#ifdef EREMCHG
|
||
-_S(ERR_MAP(EREMCHG), N_("Remote address changed"))
|
||
+_S(EREMCHG, N_("Remote address changed"))
|
||
#endif
|
||
#ifdef ELIBACC
|
||
-_S(ERR_MAP(ELIBACC), N_("Can not access a needed shared library"))
|
||
+_S(ELIBACC, N_("Can not access a needed shared library"))
|
||
#endif
|
||
#ifdef ELIBBAD
|
||
-_S(ERR_MAP(ELIBBAD), N_("Accessing a corrupted shared library"))
|
||
+_S(ELIBBAD, N_("Accessing a corrupted shared library"))
|
||
#endif
|
||
#ifdef ELIBSCN
|
||
-_S(ERR_MAP(ELIBSCN), N_(".lib section in a.out corrupted"))
|
||
+_S(ELIBSCN, N_(".lib section in a.out corrupted"))
|
||
#endif
|
||
#ifdef ELIBMAX
|
||
-_S(ERR_MAP(ELIBMAX), N_("Attempting to link in too many shared libraries"))
|
||
+_S(ELIBMAX, N_("Attempting to link in too many shared libraries"))
|
||
#endif
|
||
#ifdef ELIBEXEC
|
||
-_S(ERR_MAP(ELIBEXEC), N_("Cannot exec a shared library directly"))
|
||
+_S(ELIBEXEC, N_("Cannot exec a shared library directly"))
|
||
#endif
|
||
#ifdef ESTRPIPE
|
||
-_S(ERR_MAP(ESTRPIPE), N_("Streams pipe error"))
|
||
+_S(ESTRPIPE, N_("Streams pipe error"))
|
||
#endif
|
||
#ifdef EUCLEAN
|
||
-_S(ERR_MAP(EUCLEAN), N_("Structure needs cleaning"))
|
||
+_S(EUCLEAN, N_("Structure needs cleaning"))
|
||
#endif
|
||
#ifdef ENOTNAM
|
||
-_S(ERR_MAP(ENOTNAM), N_("Not a XENIX named type file"))
|
||
+_S(ENOTNAM, N_("Not a XENIX named type file"))
|
||
#endif
|
||
#ifdef ENAVAIL
|
||
-_S(ERR_MAP(ENAVAIL), N_("No XENIX semaphores available"))
|
||
+_S(ENAVAIL, N_("No XENIX semaphores available"))
|
||
#endif
|
||
#ifdef EISNAM
|
||
-_S(ERR_MAP(EISNAM), N_("Is a named type file"))
|
||
+_S(EISNAM, N_("Is a named type file"))
|
||
#endif
|
||
#ifdef EREMOTEIO
|
||
-_S(ERR_MAP(EREMOTEIO), N_("Remote I/O error"))
|
||
+_S(EREMOTEIO, N_("Remote I/O error"))
|
||
#endif
|
||
#ifdef ENOMEDIUM
|
||
-_S(ERR_MAP(ENOMEDIUM), N_("No medium found"))
|
||
+_S(ENOMEDIUM, N_("No medium found"))
|
||
#endif
|
||
#ifdef EMEDIUMTYPE
|
||
-_S(ERR_MAP(EMEDIUMTYPE), N_("Wrong medium type"))
|
||
+_S(EMEDIUMTYPE, N_("Wrong medium type"))
|
||
#endif
|
||
#ifdef ENOKEY
|
||
-_S(ERR_MAP(ENOKEY), N_("Required key not available"))
|
||
+_S(ENOKEY, N_("Required key not available"))
|
||
#endif
|
||
#ifdef EKEYEXPIRED
|
||
-_S(ERR_MAP(EKEYEXPIRED), N_("Key has expired"))
|
||
+_S(EKEYEXPIRED, N_("Key has expired"))
|
||
#endif
|
||
#ifdef EKEYREVOKED
|
||
-_S(ERR_MAP(EKEYREVOKED), N_("Key has been revoked"))
|
||
+_S(EKEYREVOKED, N_("Key has been revoked"))
|
||
#endif
|
||
#ifdef EKEYREJECTED
|
||
-_S(ERR_MAP(EKEYREJECTED), N_("Key was rejected by service"))
|
||
+_S(EKEYREJECTED, N_("Key was rejected by service"))
|
||
#endif
|
||
#ifdef ERFKILL
|
||
-_S(ERR_MAP(ERFKILL), N_("Operation not possible due to RF-kill"))
|
||
+_S(ERFKILL, N_("Operation not possible due to RF-kill"))
|
||
#endif
|
||
#ifdef EHWPOISON
|
||
-_S(ERR_MAP(EHWPOISON), N_("Memory page has hardware error"))
|
||
+_S(EHWPOISON, N_("Memory page has hardware error"))
|
||
#endif
|
||
#ifdef EBADRPC
|
||
-_S(ERR_MAP(EBADRPC), N_("RPC struct is bad"))
|
||
+_S(EBADRPC, N_("RPC struct is bad"))
|
||
#endif
|
||
#ifdef EFTYPE
|
||
/*
|
||
@@ -693,40 +690,40 @@ TRANS operation, or a data file had the wrong format.
|
||
TRANS
|
||
TRANS On some systems @code{chmod} returns this error if you try to set the
|
||
TRANS sticky bit on a non-directory file; @pxref{Setting Permissions}. */
|
||
-_S(ERR_MAP(EFTYPE), N_("Inappropriate file type or format"))
|
||
+_S(EFTYPE, N_("Inappropriate file type or format"))
|
||
#endif
|
||
#ifdef EPROCUNAVAIL
|
||
-_S(ERR_MAP(EPROCUNAVAIL), N_("RPC bad procedure for program"))
|
||
+_S(EPROCUNAVAIL, N_("RPC bad procedure for program"))
|
||
#endif
|
||
#ifdef EAUTH
|
||
-_S(ERR_MAP(EAUTH), N_("Authentication error"))
|
||
+_S(EAUTH, N_("Authentication error"))
|
||
#endif
|
||
#ifdef EDIED
|
||
/*
|
||
TRANS On @gnuhurdsystems{}, opening a file returns this error when the file is
|
||
TRANS translated by a program and the translator program dies while starting
|
||
TRANS up, before it has connected to the file. */
|
||
-_S(ERR_MAP(EDIED), N_("Translator died"))
|
||
+_S(EDIED, N_("Translator died"))
|
||
#endif
|
||
#ifdef ERPCMISMATCH
|
||
-_S(ERR_MAP(ERPCMISMATCH), N_("RPC version wrong"))
|
||
+_S(ERPCMISMATCH, N_("RPC version wrong"))
|
||
#endif
|
||
#ifdef EGREGIOUS
|
||
/*
|
||
TRANS You did @strong{what}? */
|
||
-_S(ERR_MAP(EGREGIOUS), N_("You really blew it this time"))
|
||
+_S(EGREGIOUS, N_("You really blew it this time"))
|
||
#endif
|
||
#ifdef EPROCLIM
|
||
/*
|
||
TRANS This means that the per-user limit on new process would be exceeded by
|
||
TRANS an attempted @code{fork}. @xref{Limits on Resources}, for details on
|
||
TRANS the @code{RLIMIT_NPROC} limit. */
|
||
-_S(ERR_MAP(EPROCLIM), N_("Too many processes"))
|
||
+_S(EPROCLIM, N_("Too many processes"))
|
||
#endif
|
||
#ifdef EGRATUITOUS
|
||
/*
|
||
TRANS This error code has no purpose. */
|
||
-_S(ERR_MAP(EGRATUITOUS), N_("Gratuitous error"))
|
||
+_S(EGRATUITOUS, N_("Gratuitous error"))
|
||
#endif
|
||
#if defined (ENOTSUP) && ENOTSUP != EOPNOTSUPP
|
||
/*
|
||
@@ -742,10 +739,10 @@ TRANS values.
|
||
TRANS
|
||
TRANS If the entire function is not available at all in the implementation,
|
||
TRANS it returns @code{ENOSYS} instead. */
|
||
-_S(ERR_MAP(ENOTSUP), N_("Not supported"))
|
||
+_S(ENOTSUP, N_("Not supported"))
|
||
#endif
|
||
#ifdef EPROGMISMATCH
|
||
-_S(ERR_MAP(EPROGMISMATCH), N_("RPC program version wrong"))
|
||
+_S(EPROGMISMATCH, N_("RPC program version wrong"))
|
||
#endif
|
||
#ifdef EBACKGROUND
|
||
/*
|
||
@@ -755,7 +752,7 @@ TRANS foreground process group of the terminal. Users do not usually see this
|
||
TRANS error because functions such as @code{read} and @code{write} translate
|
||
TRANS it into a @code{SIGTTIN} or @code{SIGTTOU} signal. @xref{Job Control},
|
||
TRANS for information on process groups and these signals. */
|
||
-_S(ERR_MAP(EBACKGROUND), N_("Inappropriate operation for background process"))
|
||
+_S(EBACKGROUND, N_("Inappropriate operation for background process"))
|
||
#endif
|
||
#ifdef EIEIO
|
||
/*
|
||
@@ -773,7 +770,7 @@ TRANS @c "bought the farm" means "died". -jtobey
|
||
TRANS @c
|
||
TRANS @c Translators, please do not translate this litteraly, translate it into
|
||
TRANS @c an idiomatic funny way of saying that the computer died. */
|
||
-_S(ERR_MAP(EIEIO), N_("Computer bought the farm"))
|
||
+_S(EIEIO, N_("Computer bought the farm"))
|
||
#endif
|
||
#if defined (EWOULDBLOCK) && EWOULDBLOCK != EAGAIN
|
||
/*
|
||
@@ -782,18 +779,18 @@ TRANS The values are always the same, on every operating system.
|
||
TRANS
|
||
TRANS C libraries in many older Unix systems have @code{EWOULDBLOCK} as a
|
||
TRANS separate error code. */
|
||
-_S(ERR_MAP(EWOULDBLOCK), N_("Operation would block"))
|
||
+_S(EWOULDBLOCK, N_("Operation would block"))
|
||
#endif
|
||
#ifdef ENEEDAUTH
|
||
-_S(ERR_MAP(ENEEDAUTH), N_("Need authenticator"))
|
||
+_S(ENEEDAUTH, N_("Need authenticator"))
|
||
#endif
|
||
#ifdef ED
|
||
/*
|
||
TRANS The experienced user will know what is wrong.
|
||
TRANS @c This error code is a joke. Its perror text is part of the joke.
|
||
TRANS @c Don't change it. */
|
||
-_S(ERR_MAP(ED), N_("?"))
|
||
+_S(ED, N_("?"))
|
||
#endif
|
||
#ifdef EPROGUNAVAIL
|
||
-_S(ERR_MAP(EPROGUNAVAIL), N_("RPC program not available"))
|
||
+_S(EPROGUNAVAIL, N_("RPC program not available"))
|
||
#endif
|
||
diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h
|
||
index 0f08079e48..672d8f27ce 100644
|
||
--- a/sysdeps/i386/dl-machine.h
|
||
+++ b/sysdeps/i386/dl-machine.h
|
||
@@ -338,16 +338,22 @@ elf_machine_rel (struct link_map *map, const Elf32_Rel *reloc,
|
||
{
|
||
# ifndef RTLD_BOOTSTRAP
|
||
if (sym_map != map
|
||
- && sym_map->l_type != lt_executable
|
||
&& !sym_map->l_relocated)
|
||
{
|
||
const char *strtab
|
||
= (const char *) D_PTR (map, l_info[DT_STRTAB]);
|
||
- _dl_error_printf ("\
|
||
+ if (sym_map->l_type == lt_executable)
|
||
+ _dl_fatal_printf ("\
|
||
+%s: IFUNC symbol '%s' referenced in '%s' is defined in the executable \
|
||
+and creates an unsatisfiable circular dependency.\n",
|
||
+ RTLD_PROGNAME, strtab + refsym->st_name,
|
||
+ map->l_name);
|
||
+ else
|
||
+ _dl_error_printf ("\
|
||
%s: Relink `%s' with `%s' for IFUNC symbol `%s'\n",
|
||
- RTLD_PROGNAME, map->l_name,
|
||
- sym_map->l_name,
|
||
- strtab + refsym->st_name);
|
||
+ RTLD_PROGNAME, map->l_name,
|
||
+ sym_map->l_name,
|
||
+ strtab + refsym->st_name);
|
||
}
|
||
# endif
|
||
value = ((Elf32_Addr (*) (void)) value) ();
|
||
diff --git a/sysdeps/powerpc/powerpc64/backtrace.c b/sysdeps/powerpc/powerpc64/backtrace.c
|
||
index 8a53a1088f..362a2b713c 100644
|
||
--- a/sysdeps/powerpc/powerpc64/backtrace.c
|
||
+++ b/sysdeps/powerpc/powerpc64/backtrace.c
|
||
@@ -54,11 +54,22 @@ struct signal_frame_64 {
|
||
/* We don't care about the rest, since the IP value is at 'uc' field. */
|
||
};
|
||
|
||
+/* Test if the address match to the inside the trampoline code.
|
||
+ Up to and including kernel 5.8, returning from an interrupt or syscall to a
|
||
+ signal handler starts execution directly at the handler's entry point, with
|
||
+ LR set to address of the sigreturn trampoline (the vDSO symbol).
|
||
+ Newer kernels will branch to signal handler from the trampoline instead, so
|
||
+ checking the stacktrace against the vDSO entrypoint does not work in such
|
||
+ case.
|
||
+ The vDSO branches with a 'bctrl' instruction, so checking either the
|
||
+ vDSO address itself and the next instruction should cover all kernel
|
||
+ versions. */
|
||
static inline bool
|
||
is_sigtramp_address (void *nip)
|
||
{
|
||
#ifdef HAVE_SIGTRAMP_RT64
|
||
- if (nip == GLRO (dl_vdso_sigtramp_rt64))
|
||
+ if (nip == GLRO (dl_vdso_sigtramp_rt64) ||
|
||
+ nip == GLRO (dl_vdso_sigtramp_rt64) + 4)
|
||
return true;
|
||
#endif
|
||
return false;
|
||
diff --git a/sysdeps/pthread/Makefile b/sysdeps/pthread/Makefile
|
||
index 920d875420..bf9b7f7223 100644
|
||
--- a/sysdeps/pthread/Makefile
|
||
+++ b/sysdeps/pthread/Makefile
|
||
@@ -107,6 +107,7 @@ tests += tst-cnd-basic tst-mtx-trylock tst-cnd-broadcast \
|
||
tst-unload \
|
||
tst-unwind-thread \
|
||
tst-pt-vfork1 tst-pt-vfork2 tst-vfork1x tst-vfork2x \
|
||
+ tst-pthread-exit-signal \
|
||
|
||
|
||
# Files which must not be linked with libpthread.
|
||
diff --git a/sysdeps/pthread/tst-pthread-exit-signal.c b/sysdeps/pthread/tst-pthread-exit-signal.c
|
||
new file mode 100644
|
||
index 0000000000..b4526fe663
|
||
--- /dev/null
|
||
+++ b/sysdeps/pthread/tst-pthread-exit-signal.c
|
||
@@ -0,0 +1,45 @@
|
||
+/* Test that pending signals are not delivered on thread exit (bug 28607).
|
||
+ Copyright (C) 2021 Free Software Foundation, Inc.
|
||
+ This file is part of the GNU C Library.
|
||
+
|
||
+ The GNU C Library is free software; you can redistribute it and/or
|
||
+ modify it under the terms of the GNU Lesser General Public
|
||
+ License as published by the Free Software Foundation; either
|
||
+ version 2.1 of the License, or (at your option) any later version.
|
||
+
|
||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
+ Lesser General Public License for more details.
|
||
+
|
||
+ You should have received a copy of the GNU Lesser General Public
|
||
+ License along with the GNU C Library; if not, see
|
||
+ <https://www.gnu.org/licenses/>. */
|
||
+
|
||
+/* Due to bug 28607, pthread_kill (or pthread_cancel) restored the
|
||
+ signal mask during during thread exit, triggering the delivery of a
|
||
+ blocked pending signal (SIGUSR1 in this test). */
|
||
+
|
||
+#include <support/xthread.h>
|
||
+#include <support/xsignal.h>
|
||
+
|
||
+static void *
|
||
+threadfunc (void *closure)
|
||
+{
|
||
+ sigset_t sigmask;
|
||
+ sigfillset (&sigmask);
|
||
+ xpthread_sigmask (SIG_SETMASK, &sigmask, NULL);
|
||
+ xpthread_kill (pthread_self (), SIGUSR1);
|
||
+ pthread_exit (NULL);
|
||
+ return NULL;
|
||
+}
|
||
+
|
||
+static int
|
||
+do_test (void)
|
||
+{
|
||
+ pthread_t thr = xpthread_create (NULL, threadfunc, NULL);
|
||
+ xpthread_join (thr);
|
||
+ return 0;
|
||
+}
|
||
+
|
||
+#include <support/test-driver.c>
|
||
diff --git a/sysdeps/s390/configure b/sysdeps/s390/configure
|
||
index fa46e9e351..e7f576338d 100644
|
||
--- a/sysdeps/s390/configure
|
||
+++ b/sysdeps/s390/configure
|
||
@@ -123,7 +123,9 @@ void testinsn (char *buf)
|
||
__asm__ (".machine \"arch13\" \n\t"
|
||
".machinemode \"zarch_nohighgprs\" \n\t"
|
||
"lghi %%r0,16 \n\t"
|
||
- "mvcrl 0(%0),32(%0)" : : "a" (buf) : "memory", "r0");
|
||
+ "mvcrl 0(%0),32(%0) \n\t"
|
||
+ "vstrs %%v20,%%v20,%%v20,%%v20,0,2"
|
||
+ : : "a" (buf) : "memory", "r0");
|
||
}
|
||
EOF
|
||
if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS --shared conftest.c
|
||
@@ -271,7 +273,9 @@ else
|
||
void testinsn (char *buf)
|
||
{
|
||
__asm__ ("lghi %%r0,16 \n\t"
|
||
- "mvcrl 0(%0),32(%0)" : : "a" (buf) : "memory", "r0");
|
||
+ "mvcrl 0(%0),32(%0) \n\t"
|
||
+ "vstrs %%v20,%%v20,%%v20,%%v20,0,2"
|
||
+ : : "a" (buf) : "memory", "r0");
|
||
}
|
||
EOF
|
||
if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS --shared conftest.c
|
||
diff --git a/sysdeps/s390/configure.ac b/sysdeps/s390/configure.ac
|
||
index 3ed5a8ef87..5c3479e8cf 100644
|
||
--- a/sysdeps/s390/configure.ac
|
||
+++ b/sysdeps/s390/configure.ac
|
||
@@ -88,7 +88,9 @@ void testinsn (char *buf)
|
||
__asm__ (".machine \"arch13\" \n\t"
|
||
".machinemode \"zarch_nohighgprs\" \n\t"
|
||
"lghi %%r0,16 \n\t"
|
||
- "mvcrl 0(%0),32(%0)" : : "a" (buf) : "memory", "r0");
|
||
+ "mvcrl 0(%0),32(%0) \n\t"
|
||
+ "vstrs %%v20,%%v20,%%v20,%%v20,0,2"
|
||
+ : : "a" (buf) : "memory", "r0");
|
||
}
|
||
EOF
|
||
dnl test, if assembler supports S390 arch13 instructions
|
||
@@ -195,7 +197,9 @@ cat > conftest.c <<\EOF
|
||
void testinsn (char *buf)
|
||
{
|
||
__asm__ ("lghi %%r0,16 \n\t"
|
||
- "mvcrl 0(%0),32(%0)" : : "a" (buf) : "memory", "r0");
|
||
+ "mvcrl 0(%0),32(%0) \n\t"
|
||
+ "vstrs %%v20,%%v20,%%v20,%%v20,0,2"
|
||
+ : : "a" (buf) : "memory", "r0");
|
||
}
|
||
EOF
|
||
dnl test, if assembler supports S390 arch13 zarch instructions as default
|
||
diff --git a/sysdeps/s390/memmove.c b/sysdeps/s390/memmove.c
|
||
index 5fc85e129f..ee59b5de14 100644
|
||
--- a/sysdeps/s390/memmove.c
|
||
+++ b/sysdeps/s390/memmove.c
|
||
@@ -43,7 +43,7 @@ extern __typeof (__redirect_memmove) MEMMOVE_ARCH13 attribute_hidden;
|
||
s390_libc_ifunc_expr (__redirect_memmove, memmove,
|
||
({
|
||
s390_libc_ifunc_expr_stfle_init ();
|
||
- (HAVE_MEMMOVE_ARCH13
|
||
+ (HAVE_MEMMOVE_ARCH13 && (hwcap & HWCAP_S390_VXRS_EXT2)
|
||
&& S390_IS_ARCH13_MIE3 (stfle_bits))
|
||
? MEMMOVE_ARCH13
|
||
: (HAVE_MEMMOVE_Z13 && (hwcap & HWCAP_S390_VX))
|
||
diff --git a/sysdeps/s390/multiarch/ifunc-impl-list.c b/sysdeps/s390/multiarch/ifunc-impl-list.c
|
||
index e6195c6e26..17c0cc3952 100644
|
||
--- a/sysdeps/s390/multiarch/ifunc-impl-list.c
|
||
+++ b/sysdeps/s390/multiarch/ifunc-impl-list.c
|
||
@@ -171,7 +171,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
IFUNC_IMPL (i, name, memmove,
|
||
# if HAVE_MEMMOVE_ARCH13
|
||
IFUNC_IMPL_ADD (array, i, memmove,
|
||
- S390_IS_ARCH13_MIE3 (stfle_bits),
|
||
+ ((dl_hwcap & HWCAP_S390_VXRS_EXT2)
|
||
+ && S390_IS_ARCH13_MIE3 (stfle_bits)),
|
||
MEMMOVE_ARCH13)
|
||
# endif
|
||
# if HAVE_MEMMOVE_Z13
|
||
diff --git a/sysdeps/sh/be/sh4/fpu/Implies b/sysdeps/sh/be/sh4/fpu/Implies
|
||
new file mode 100644
|
||
index 0000000000..71b28ee1a4
|
||
--- /dev/null
|
||
+++ b/sysdeps/sh/be/sh4/fpu/Implies
|
||
@@ -0,0 +1 @@
|
||
+sh/sh4/fpu
|
||
diff --git a/sysdeps/sh/le/sh4/fpu/Implies b/sysdeps/sh/le/sh4/fpu/Implies
|
||
new file mode 100644
|
||
index 0000000000..71b28ee1a4
|
||
--- /dev/null
|
||
+++ b/sysdeps/sh/le/sh4/fpu/Implies
|
||
@@ -0,0 +1 @@
|
||
+sh/sh4/fpu
|
||
diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile
|
||
index 9b2a253032..34748ffcd1 100644
|
||
--- a/sysdeps/unix/sysv/linux/Makefile
|
||
+++ b/sysdeps/unix/sysv/linux/Makefile
|
||
@@ -100,7 +100,7 @@ tests += tst-clone tst-clone2 tst-clone3 tst-fanotify tst-personality \
|
||
tst-quota tst-sync_file_range tst-sysconf-iov_max tst-ttyname \
|
||
test-errno-linux tst-memfd_create tst-mlock2 tst-pkey \
|
||
tst-rlimit-infinity tst-ofdlocks tst-gettid tst-gettid-kill \
|
||
- tst-tgkill
|
||
+ tst-tgkill tst-sysvsem-linux tst-sysvmsg-linux tst-sysvshm-linux
|
||
tests-internal += tst-ofdlocks-compat tst-sigcontext-get_pc
|
||
|
||
CFLAGS-tst-sigcontext-get_pc.c = -fasynchronous-unwind-tables
|
||
diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h
|
||
index fc688450ee..00a4d0c8e7 100644
|
||
--- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h
|
||
+++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h
|
||
@@ -54,6 +54,10 @@
|
||
&& MIDR_PARTNUM(midr) == 0x000)
|
||
#define IS_NEOVERSE_N1(midr) (MIDR_IMPLEMENTOR(midr) == 'A' \
|
||
&& MIDR_PARTNUM(midr) == 0xd0c)
|
||
+#define IS_NEOVERSE_N2(midr) (MIDR_IMPLEMENTOR(midr) == 'A' \
|
||
+ && MIDR_PARTNUM(midr) == 0xd49)
|
||
+#define IS_NEOVERSE_V1(midr) (MIDR_IMPLEMENTOR(midr) == 'A' \
|
||
+ && MIDR_PARTNUM(midr) == 0xd40)
|
||
|
||
#define IS_EMAG(midr) (MIDR_IMPLEMENTOR(midr) == 'P' \
|
||
&& MIDR_PARTNUM(midr) == 0x000)
|
||
diff --git a/sysdeps/unix/sysv/linux/mq_notify.c b/sysdeps/unix/sysv/linux/mq_notify.c
|
||
index 61bbb03b64..2bb98172c8 100644
|
||
--- a/sysdeps/unix/sysv/linux/mq_notify.c
|
||
+++ b/sysdeps/unix/sysv/linux/mq_notify.c
|
||
@@ -132,9 +132,12 @@ helper_thread (void *arg)
|
||
to wait until it is done with it. */
|
||
(void) __pthread_barrier_wait (¬ify_barrier);
|
||
}
|
||
- else if (data.raw[NOTIFY_COOKIE_LEN - 1] == NOTIFY_REMOVED)
|
||
- /* The only state we keep is the copy of the thread attributes. */
|
||
- free (data.attr);
|
||
+ else if (data.raw[NOTIFY_COOKIE_LEN - 1] == NOTIFY_REMOVED && data.attr != NULL)
|
||
+ {
|
||
+ /* The only state we keep is the copy of the thread attributes. */
|
||
+ pthread_attr_destroy (data.attr);
|
||
+ free (data.attr);
|
||
+ }
|
||
}
|
||
return NULL;
|
||
}
|
||
@@ -255,8 +258,14 @@ mq_notify (mqd_t mqdes, const struct sigevent *notification)
|
||
if (data.attr == NULL)
|
||
return -1;
|
||
|
||
- memcpy (data.attr, notification->sigev_notify_attributes,
|
||
- sizeof (pthread_attr_t));
|
||
+ int ret = __pthread_attr_copy (data.attr,
|
||
+ notification->sigev_notify_attributes);
|
||
+ if (ret != 0)
|
||
+ {
|
||
+ free (data.attr);
|
||
+ __set_errno (ret);
|
||
+ return -1;
|
||
+ }
|
||
}
|
||
|
||
/* Construct the new request. */
|
||
@@ -269,8 +278,11 @@ mq_notify (mqd_t mqdes, const struct sigevent *notification)
|
||
int retval = INLINE_SYSCALL (mq_notify, 2, mqdes, &se);
|
||
|
||
/* If it failed, free the allocated memory. */
|
||
- if (__glibc_unlikely (retval != 0))
|
||
- free (data.attr);
|
||
+ if (retval != 0 && data.attr != NULL)
|
||
+ {
|
||
+ pthread_attr_destroy (data.attr);
|
||
+ free (data.attr);
|
||
+ }
|
||
|
||
return retval;
|
||
}
|
||
diff --git a/sysdeps/unix/sysv/linux/msgctl.c b/sysdeps/unix/sysv/linux/msgctl.c
|
||
index 0776472d5e..a1f24ab242 100644
|
||
--- a/sysdeps/unix/sysv/linux/msgctl.c
|
||
+++ b/sysdeps/unix/sysv/linux/msgctl.c
|
||
@@ -90,8 +90,15 @@ __msgctl64 (int msqid, int cmd, struct __msqid64_ds *buf)
|
||
struct kernel_msqid64_ds ksemid, *arg = NULL;
|
||
if (buf != NULL)
|
||
{
|
||
- msqid64_to_kmsqid64 (buf, &ksemid);
|
||
- arg = &ksemid;
|
||
+ /* This is a Linux extension where kernel returns a 'struct msginfo'
|
||
+ instead. */
|
||
+ if (cmd == IPC_INFO || cmd == MSG_INFO)
|
||
+ arg = (struct kernel_msqid64_ds *) buf;
|
||
+ else
|
||
+ {
|
||
+ msqid64_to_kmsqid64 (buf, &ksemid);
|
||
+ arg = &ksemid;
|
||
+ }
|
||
}
|
||
# ifdef __ASSUME_SYSVIPC_BROKEN_MODE_T
|
||
if (cmd == IPC_SET)
|
||
@@ -169,8 +176,15 @@ __msgctl (int msqid, int cmd, struct msqid_ds *buf)
|
||
struct __msqid64_ds msqid64, *buf64 = NULL;
|
||
if (buf != NULL)
|
||
{
|
||
- msqid_to_msqid64 (&msqid64, buf);
|
||
- buf64 = &msqid64;
|
||
+ /* This is a Linux extension where kernel returns a 'struct msginfo'
|
||
+ instead. */
|
||
+ if (cmd == IPC_INFO || cmd == MSG_INFO)
|
||
+ buf64 = (struct __msqid64_ds *) buf;
|
||
+ else
|
||
+ {
|
||
+ msqid_to_msqid64 (&msqid64, buf);
|
||
+ buf64 = &msqid64;
|
||
+ }
|
||
}
|
||
|
||
int ret = __msgctl64 (msqid, cmd, buf64);
|
||
diff --git a/sysdeps/unix/sysv/linux/semctl.c b/sysdeps/unix/sysv/linux/semctl.c
|
||
index f131a26fc7..1cdabde8f2 100644
|
||
--- a/sysdeps/unix/sysv/linux/semctl.c
|
||
+++ b/sysdeps/unix/sysv/linux/semctl.c
|
||
@@ -102,6 +102,7 @@ semun64_to_ksemun64 (int cmd, union semun64 semun64,
|
||
r.array = semun64.array;
|
||
break;
|
||
case SEM_STAT:
|
||
+ case SEM_STAT_ANY:
|
||
case IPC_STAT:
|
||
case IPC_SET:
|
||
r.buf = buf;
|
||
@@ -150,6 +151,7 @@ __semctl64 (int semid, int semnum, int cmd, ...)
|
||
case IPC_STAT: /* arg.buf */
|
||
case IPC_SET:
|
||
case SEM_STAT:
|
||
+ case SEM_STAT_ANY:
|
||
case IPC_INFO: /* arg.__buf */
|
||
case SEM_INFO:
|
||
va_start (ap, cmd);
|
||
@@ -238,6 +240,7 @@ semun_to_semun64 (int cmd, union semun semun, struct __semid64_ds *semid64)
|
||
r.array = semun.array;
|
||
break;
|
||
case SEM_STAT:
|
||
+ case SEM_STAT_ANY:
|
||
case IPC_STAT:
|
||
case IPC_SET:
|
||
r.buf = semid64;
|
||
@@ -267,6 +270,7 @@ __semctl (int semid, int semnum, int cmd, ...)
|
||
case IPC_STAT: /* arg.buf */
|
||
case IPC_SET:
|
||
case SEM_STAT:
|
||
+ case SEM_STAT_ANY:
|
||
case IPC_INFO: /* arg.__buf */
|
||
case SEM_INFO:
|
||
va_start (ap, cmd);
|
||
@@ -321,6 +325,7 @@ __semctl_mode16 (int semid, int semnum, int cmd, ...)
|
||
case IPC_STAT: /* arg.buf */
|
||
case IPC_SET:
|
||
case SEM_STAT:
|
||
+ case SEM_STAT_ANY:
|
||
case IPC_INFO: /* arg.__buf */
|
||
case SEM_INFO:
|
||
va_start (ap, cmd);
|
||
@@ -354,6 +359,7 @@ __old_semctl (int semid, int semnum, int cmd, ...)
|
||
case IPC_STAT: /* arg.buf */
|
||
case IPC_SET:
|
||
case SEM_STAT:
|
||
+ case SEM_STAT_ANY:
|
||
case IPC_INFO: /* arg.__buf */
|
||
case SEM_INFO:
|
||
va_start (ap, cmd);
|
||
diff --git a/sysdeps/unix/sysv/linux/sh/be/sh4/fpu/Implies b/sysdeps/unix/sysv/linux/sh/be/sh4/fpu/Implies
|
||
new file mode 100644
|
||
index 0000000000..7eeaf15a5a
|
||
--- /dev/null
|
||
+++ b/sysdeps/unix/sysv/linux/sh/be/sh4/fpu/Implies
|
||
@@ -0,0 +1 @@
|
||
+unix/sysv/linux/sh/sh4/fpu
|
||
diff --git a/sysdeps/unix/sysv/linux/sh/le/sh4/fpu/Implies b/sysdeps/unix/sysv/linux/sh/le/sh4/fpu/Implies
|
||
new file mode 100644
|
||
index 0000000000..7eeaf15a5a
|
||
--- /dev/null
|
||
+++ b/sysdeps/unix/sysv/linux/sh/le/sh4/fpu/Implies
|
||
@@ -0,0 +1 @@
|
||
+unix/sysv/linux/sh/sh4/fpu
|
||
diff --git a/sysdeps/unix/sysv/linux/shmctl.c b/sysdeps/unix/sysv/linux/shmctl.c
|
||
index 76d88441f1..1d19a798b1 100644
|
||
--- a/sysdeps/unix/sysv/linux/shmctl.c
|
||
+++ b/sysdeps/unix/sysv/linux/shmctl.c
|
||
@@ -90,8 +90,15 @@ __shmctl64 (int shmid, int cmd, struct __shmid64_ds *buf)
|
||
struct kernel_shmid64_ds kshmid, *arg = NULL;
|
||
if (buf != NULL)
|
||
{
|
||
- shmid64_to_kshmid64 (buf, &kshmid);
|
||
- arg = &kshmid;
|
||
+ /* This is a Linux extension where kernel expects either a
|
||
+ 'struct shminfo' (IPC_INFO) or 'struct shm_info' (SHM_INFO). */
|
||
+ if (cmd == IPC_INFO || cmd == SHM_INFO)
|
||
+ arg = (struct kernel_shmid64_ds *) buf;
|
||
+ else
|
||
+ {
|
||
+ shmid64_to_kshmid64 (buf, &kshmid);
|
||
+ arg = &kshmid;
|
||
+ }
|
||
}
|
||
# ifdef __ASSUME_SYSVIPC_BROKEN_MODE_T
|
||
if (cmd == IPC_SET)
|
||
@@ -107,7 +114,6 @@ __shmctl64 (int shmid, int cmd, struct __shmid64_ds *buf)
|
||
|
||
switch (cmd)
|
||
{
|
||
- case IPC_INFO:
|
||
case IPC_STAT:
|
||
case SHM_STAT:
|
||
case SHM_STAT_ANY:
|
||
@@ -168,8 +174,15 @@ __shmctl (int shmid, int cmd, struct shmid_ds *buf)
|
||
struct __shmid64_ds shmid64, *buf64 = NULL;
|
||
if (buf != NULL)
|
||
{
|
||
- shmid_to_shmid64 (&shmid64, buf);
|
||
- buf64 = &shmid64;
|
||
+ /* This is a Linux extension where kernel expects either a
|
||
+ 'struct shminfo' (IPC_INFO) or 'struct shm_info' (SHM_INFO). */
|
||
+ if (cmd == IPC_INFO || cmd == SHM_INFO)
|
||
+ buf64 = (struct __shmid64_ds *) buf;
|
||
+ else
|
||
+ {
|
||
+ shmid_to_shmid64 (&shmid64, buf);
|
||
+ buf64 = &shmid64;
|
||
+ }
|
||
}
|
||
|
||
int ret = __shmctl64 (shmid, cmd, buf64);
|
||
@@ -178,7 +191,6 @@ __shmctl (int shmid, int cmd, struct shmid_ds *buf)
|
||
|
||
switch (cmd)
|
||
{
|
||
- case IPC_INFO:
|
||
case IPC_STAT:
|
||
case SHM_STAT:
|
||
case SHM_STAT_ANY:
|
||
diff --git a/sysdeps/unix/sysv/linux/tst-sysvmsg-linux.c b/sysdeps/unix/sysv/linux/tst-sysvmsg-linux.c
|
||
new file mode 100644
|
||
index 0000000000..630f4f792c
|
||
--- /dev/null
|
||
+++ b/sysdeps/unix/sysv/linux/tst-sysvmsg-linux.c
|
||
@@ -0,0 +1,177 @@
|
||
+/* Basic tests for Linux SYSV message queue extensions.
|
||
+ Copyright (C) 2020 Free Software Foundation, Inc.
|
||
+ This file is part of the GNU C Library.
|
||
+
|
||
+ The GNU C Library is free software; you can redistribute it and/or
|
||
+ modify it under the terms of the GNU Lesser General Public
|
||
+ License as published by the Free Software Foundation; either
|
||
+ version 2.1 of the License, or (at your option) any later version.
|
||
+
|
||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
+ Lesser General Public License for more details.
|
||
+
|
||
+ You should have received a copy of the GNU Lesser General Public
|
||
+ License along with the GNU C Library; if not, see
|
||
+ <https://www.gnu.org/licenses/>. */
|
||
+
|
||
+#include <sys/ipc.h>
|
||
+#include <sys/msg.h>
|
||
+#include <errno.h>
|
||
+#include <stdlib.h>
|
||
+#include <stdbool.h>
|
||
+#include <stdio.h>
|
||
+
|
||
+#include <support/check.h>
|
||
+#include <support/temp_file.h>
|
||
+
|
||
+#define MSGQ_MODE 0644
|
||
+
|
||
+/* These are for the temporary file we generate. */
|
||
+static char *name;
|
||
+static int msqid;
|
||
+
|
||
+static void
|
||
+remove_msq (void)
|
||
+{
|
||
+ /* Enforce message queue removal in case of early test failure.
|
||
+ Ignore error since the msg may already have being removed. */
|
||
+ msgctl (msqid, IPC_RMID, NULL);
|
||
+}
|
||
+
|
||
+static void
|
||
+do_prepare (int argc, char *argv[])
|
||
+{
|
||
+ TEST_VERIFY_EXIT (create_temp_file ("tst-sysvmsg.", &name) != -1);
|
||
+}
|
||
+
|
||
+#define PREPARE do_prepare
|
||
+
|
||
+struct test_msginfo
|
||
+{
|
||
+ int msgmax;
|
||
+ int msgmnb;
|
||
+ int msgmni;
|
||
+};
|
||
+
|
||
+/* It tries to obtain some system-wide SysV messsage queue information from
|
||
+ /proc to check against IPC_INFO/MSG_INFO. The /proc only returns the
|
||
+ tunables value of MSGMAX, MSGMNB, and MSGMNI.
|
||
+
|
||
+ The kernel also returns constant value for MSGSSZ, MSGSEG and also MSGMAP,
|
||
+ MSGPOOL, and MSGTQL (for IPC_INFO). The issue to check them is they might
|
||
+ change over kernel releases. */
|
||
+
|
||
+static int
|
||
+read_proc_file (const char *file)
|
||
+{
|
||
+ FILE *f = fopen (file, "r");
|
||
+ if (f == NULL)
|
||
+ FAIL_UNSUPPORTED ("/proc is not mounted or %s is not available", file);
|
||
+
|
||
+ int v;
|
||
+ int r = fscanf (f, "%d", & v);
|
||
+ TEST_VERIFY_EXIT (r == 1);
|
||
+
|
||
+ fclose (f);
|
||
+ return v;
|
||
+}
|
||
+
|
||
+
|
||
+/* Check if the message queue with IDX (index into the kernel's internal
|
||
+ array) matches the one with KEY. The CMD is either MSG_STAT or
|
||
+ MSG_STAT_ANY. */
|
||
+
|
||
+static bool
|
||
+check_msginfo (int idx, key_t key, int cmd)
|
||
+{
|
||
+ struct msqid_ds msginfo;
|
||
+ int mid = msgctl (idx, cmd, &msginfo);
|
||
+ /* Ignore unused array slot returned by the kernel or information from
|
||
+ unknown message queue. */
|
||
+ if ((mid == -1 && errno == EINVAL) || mid != msqid)
|
||
+ return false;
|
||
+
|
||
+ if (mid == -1)
|
||
+ FAIL_EXIT1 ("msgctl with %s failed: %m",
|
||
+ cmd == MSG_STAT ? "MSG_STAT" : "MSG_STAT_ANY");
|
||
+
|
||
+ TEST_COMPARE (msginfo.msg_perm.__key, key);
|
||
+ TEST_COMPARE (msginfo.msg_perm.mode, MSGQ_MODE);
|
||
+ TEST_COMPARE (msginfo.msg_qnum, 0);
|
||
+
|
||
+ return true;
|
||
+}
|
||
+
|
||
+static int
|
||
+do_test (void)
|
||
+{
|
||
+ atexit (remove_msq);
|
||
+
|
||
+ key_t key = ftok (name, 'G');
|
||
+ if (key == -1)
|
||
+ FAIL_EXIT1 ("ftok failed: %m");
|
||
+
|
||
+ msqid = msgget (key, MSGQ_MODE | IPC_CREAT);
|
||
+ if (msqid == -1)
|
||
+ FAIL_EXIT1 ("msgget failed: %m");
|
||
+
|
||
+ struct test_msginfo tipcinfo;
|
||
+ tipcinfo.msgmax = read_proc_file ("/proc/sys/kernel/msgmax");
|
||
+ tipcinfo.msgmnb = read_proc_file ("/proc/sys/kernel/msgmnb");
|
||
+ tipcinfo.msgmni = read_proc_file ("/proc/sys/kernel/msgmni");
|
||
+
|
||
+ int msqidx;
|
||
+
|
||
+ {
|
||
+ struct msginfo ipcinfo;
|
||
+ msqidx = msgctl (msqid, IPC_INFO, (struct msqid_ds *) &ipcinfo);
|
||
+ if (msqidx == -1)
|
||
+ FAIL_EXIT1 ("msgctl with IPC_INFO failed: %m");
|
||
+
|
||
+ TEST_COMPARE (ipcinfo.msgmax, tipcinfo.msgmax);
|
||
+ TEST_COMPARE (ipcinfo.msgmnb, tipcinfo.msgmnb);
|
||
+ TEST_COMPARE (ipcinfo.msgmni, tipcinfo.msgmni);
|
||
+ }
|
||
+
|
||
+ /* Same as before but with MSG_INFO. */
|
||
+ {
|
||
+ struct msginfo ipcinfo;
|
||
+ msqidx = msgctl (msqid, MSG_INFO, (struct msqid_ds *) &ipcinfo);
|
||
+ if (msqidx == -1)
|
||
+ FAIL_EXIT1 ("msgctl with IPC_INFO failed: %m");
|
||
+
|
||
+ TEST_COMPARE (ipcinfo.msgmax, tipcinfo.msgmax);
|
||
+ TEST_COMPARE (ipcinfo.msgmnb, tipcinfo.msgmnb);
|
||
+ TEST_COMPARE (ipcinfo.msgmni, tipcinfo.msgmni);
|
||
+ }
|
||
+
|
||
+ /* We check if the created message queue shows in global list. */
|
||
+ bool found = false;
|
||
+ for (int i = 0; i <= msqidx; i++)
|
||
+ {
|
||
+ /* We can't tell apart if MSG_STAT_ANY is not supported (kernel older
|
||
+ than 4.17) or if the index used is invalid. So it just check if the
|
||
+ value returned from a valid call matches the created message
|
||
+ queue. */
|
||
+ check_msginfo (i, key, MSG_STAT_ANY);
|
||
+
|
||
+ if (check_msginfo (i, key, MSG_STAT))
|
||
+ {
|
||
+ found = true;
|
||
+ break;
|
||
+ }
|
||
+ }
|
||
+
|
||
+ if (!found)
|
||
+ FAIL_EXIT1 ("msgctl with MSG_STAT/MSG_STAT_ANY could not find the "
|
||
+ "created message queue");
|
||
+
|
||
+ if (msgctl (msqid, IPC_RMID, NULL) == -1)
|
||
+ FAIL_EXIT1 ("msgctl failed");
|
||
+
|
||
+ return 0;
|
||
+}
|
||
+
|
||
+#include <support/test-driver.c>
|
||
diff --git a/sysdeps/unix/sysv/linux/tst-sysvsem-linux.c b/sysdeps/unix/sysv/linux/tst-sysvsem-linux.c
|
||
new file mode 100644
|
||
index 0000000000..45f19e2d37
|
||
--- /dev/null
|
||
+++ b/sysdeps/unix/sysv/linux/tst-sysvsem-linux.c
|
||
@@ -0,0 +1,184 @@
|
||
+/* Basic tests for Linux SYSV semaphore extensions.
|
||
+ Copyright (C) 2020 Free Software Foundation, Inc.
|
||
+ This file is part of the GNU C Library.
|
||
+
|
||
+ The GNU C Library is free software; you can redistribute it and/or
|
||
+ modify it under the terms of the GNU Lesser General Public
|
||
+ License as published by the Free Software Foundation; either
|
||
+ version 2.1 of the License, or (at your option) any later version.
|
||
+
|
||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
+ Lesser General Public License for more details.
|
||
+
|
||
+ You should have received a copy of the GNU Lesser General Public
|
||
+ License along with the GNU C Library; if not, see
|
||
+ <https://www.gnu.org/licenses/>. */
|
||
+
|
||
+#include <sys/ipc.h>
|
||
+#include <sys/sem.h>
|
||
+#include <errno.h>
|
||
+#include <stdlib.h>
|
||
+#include <stdbool.h>
|
||
+#include <stdio.h>
|
||
+
|
||
+#include <support/check.h>
|
||
+#include <support/temp_file.h>
|
||
+
|
||
+/* These are for the temporary file we generate. */
|
||
+static char *name;
|
||
+static int semid;
|
||
+
|
||
+static void
|
||
+remove_sem (void)
|
||
+{
|
||
+ /* Enforce message queue removal in case of early test failure.
|
||
+ Ignore error since the sem may already have being removed. */
|
||
+ semctl (semid, 0, IPC_RMID, 0);
|
||
+}
|
||
+
|
||
+static void
|
||
+do_prepare (int argc, char *argv[])
|
||
+{
|
||
+ TEST_VERIFY_EXIT (create_temp_file ("tst-sysvsem.", &name) != -1);
|
||
+}
|
||
+
|
||
+#define PREPARE do_prepare
|
||
+
|
||
+#define SEM_MODE 0644
|
||
+
|
||
+union semun
|
||
+{
|
||
+ int val;
|
||
+ struct semid_ds *buf;
|
||
+ unsigned short *array;
|
||
+ struct seminfo *__buf;
|
||
+};
|
||
+
|
||
+struct test_seminfo
|
||
+{
|
||
+ int semmsl;
|
||
+ int semmns;
|
||
+ int semopm;
|
||
+ int semmni;
|
||
+};
|
||
+
|
||
+/* It tries to obtain some system-wide SysV semaphore information from /proc
|
||
+ to check against IPC_INFO/SEM_INFO. The /proc only returns the tunables
|
||
+ value of SEMMSL, SEMMNS, SEMOPM, and SEMMNI.
|
||
+
|
||
+ The kernel also returns constant value for SEMVMX, SEMMNU, SEMMAP, SEMUME,
|
||
+ and also SEMUSZ and SEMAEM (for IPC_INFO). The issue to check them is they
|
||
+ might change over kernel releases. */
|
||
+
|
||
+static void
|
||
+read_sem_stat (struct test_seminfo *tseminfo)
|
||
+{
|
||
+ FILE *f = fopen ("/proc/sys/kernel/sem", "r");
|
||
+ if (f == NULL)
|
||
+ FAIL_UNSUPPORTED ("/proc is not mounted or /proc/sys/kernel/sem is not "
|
||
+ "available");
|
||
+
|
||
+ int r = fscanf (f, "%d %d %d %d",
|
||
+ &tseminfo->semmsl, &tseminfo->semmns, &tseminfo->semopm,
|
||
+ &tseminfo->semmni);
|
||
+ TEST_VERIFY_EXIT (r == 4);
|
||
+
|
||
+ fclose (f);
|
||
+}
|
||
+
|
||
+
|
||
+/* Check if the semaphore with IDX (index into the kernel's internal array)
|
||
+ matches the one with KEY. The CMD is either SEM_STAT or SEM_STAT_ANY. */
|
||
+
|
||
+static bool
|
||
+check_seminfo (int idx, key_t key, int cmd)
|
||
+{
|
||
+ struct semid_ds seminfo;
|
||
+ int sid = semctl (idx, 0, cmd, (union semun) { .buf = &seminfo });
|
||
+ /* Ignore unused array slot returned by the kernel or information from
|
||
+ unknown semaphores. */
|
||
+ if ((sid == -1 && errno == EINVAL) || sid != semid)
|
||
+ return false;
|
||
+
|
||
+ if (sid == -1)
|
||
+ FAIL_EXIT1 ("semctl with SEM_STAT failed (errno=%d)", errno);
|
||
+
|
||
+ TEST_COMPARE (seminfo.sem_perm.__key, key);
|
||
+ TEST_COMPARE (seminfo.sem_perm.mode, SEM_MODE);
|
||
+ TEST_COMPARE (seminfo.sem_nsems, 1);
|
||
+
|
||
+ return true;
|
||
+}
|
||
+
|
||
+static int
|
||
+do_test (void)
|
||
+{
|
||
+ atexit (remove_sem);
|
||
+
|
||
+ key_t key = ftok (name, 'G');
|
||
+ if (key == -1)
|
||
+ FAIL_EXIT1 ("ftok failed: %m");
|
||
+
|
||
+ semid = semget (key, 1, IPC_CREAT | IPC_EXCL | SEM_MODE);
|
||
+ if (semid == -1)
|
||
+ FAIL_EXIT1 ("semget failed: %m");
|
||
+
|
||
+ struct test_seminfo tipcinfo;
|
||
+ read_sem_stat (&tipcinfo);
|
||
+
|
||
+ int semidx;
|
||
+
|
||
+ {
|
||
+ struct seminfo ipcinfo;
|
||
+ semidx = semctl (semid, 0, IPC_INFO, (union semun) { .__buf = &ipcinfo });
|
||
+ if (semidx == -1)
|
||
+ FAIL_EXIT1 ("semctl with IPC_INFO failed: %m");
|
||
+
|
||
+ TEST_COMPARE (ipcinfo.semmsl, tipcinfo.semmsl);
|
||
+ TEST_COMPARE (ipcinfo.semmns, tipcinfo.semmns);
|
||
+ TEST_COMPARE (ipcinfo.semopm, tipcinfo.semopm);
|
||
+ TEST_COMPARE (ipcinfo.semmni, tipcinfo.semmni);
|
||
+ }
|
||
+
|
||
+ /* Same as before but with SEM_INFO. */
|
||
+ {
|
||
+ struct seminfo ipcinfo;
|
||
+ semidx = semctl (semid, 0, SEM_INFO, (union semun) { .__buf = &ipcinfo });
|
||
+ if (semidx == -1)
|
||
+ FAIL_EXIT1 ("semctl with IPC_INFO failed: %m");
|
||
+
|
||
+ TEST_COMPARE (ipcinfo.semmsl, tipcinfo.semmsl);
|
||
+ TEST_COMPARE (ipcinfo.semmns, tipcinfo.semmns);
|
||
+ TEST_COMPARE (ipcinfo.semopm, tipcinfo.semopm);
|
||
+ TEST_COMPARE (ipcinfo.semmni, tipcinfo.semmni);
|
||
+ }
|
||
+
|
||
+ /* We check if the created semaphore shows in the system-wide status. */
|
||
+ bool found = false;
|
||
+ for (int i = 0; i <= semidx; i++)
|
||
+ {
|
||
+ /* We can't tell apart if SEM_STAT_ANY is not supported (kernel older
|
||
+ than 4.17) or if the index used is invalid. So it just check if
|
||
+ value returned from a valid call matches the created semaphore. */
|
||
+ check_seminfo (i, key, SEM_STAT_ANY);
|
||
+
|
||
+ if (check_seminfo (i, key, SEM_STAT))
|
||
+ {
|
||
+ found = true;
|
||
+ break;
|
||
+ }
|
||
+ }
|
||
+
|
||
+ if (!found)
|
||
+ FAIL_EXIT1 ("semctl with SEM_STAT/SEM_STAT_ANY could not find the "
|
||
+ "created semaphore");
|
||
+
|
||
+ if (semctl (semid, 0, IPC_RMID, 0) == -1)
|
||
+ FAIL_EXIT1 ("semctl failed: %m");
|
||
+
|
||
+ return 0;
|
||
+}
|
||
+
|
||
+#include <support/test-driver.c>
|
||
diff --git a/sysdeps/unix/sysv/linux/tst-sysvshm-linux.c b/sysdeps/unix/sysv/linux/tst-sysvshm-linux.c
|
||
new file mode 100644
|
||
index 0000000000..bb154592a6
|
||
--- /dev/null
|
||
+++ b/sysdeps/unix/sysv/linux/tst-sysvshm-linux.c
|
||
@@ -0,0 +1,188 @@
|
||
+/* Basic tests for Linux SYSV shared memory extensions.
|
||
+ Copyright (C) 2020 Free Software Foundation, Inc.
|
||
+ This file is part of the GNU C Library.
|
||
+
|
||
+ The GNU C Library is free software; you can redistribute it and/or
|
||
+ modify it under the terms of the GNU Lesser General Public
|
||
+ License as published by the Free Software Foundation; either
|
||
+ version 2.1 of the License, or (at your option) any later version.
|
||
+
|
||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
+ Lesser General Public License for more details.
|
||
+
|
||
+ You should have received a copy of the GNU Lesser General Public
|
||
+ License along with the GNU C Library; if not, see
|
||
+ <https://www.gnu.org/licenses/>. */
|
||
+
|
||
+#include <sys/ipc.h>
|
||
+#include <sys/shm.h>
|
||
+#include <errno.h>
|
||
+#include <stdlib.h>
|
||
+#include <stdbool.h>
|
||
+#include <stdio.h>
|
||
+#include <unistd.h>
|
||
+#include <inttypes.h>
|
||
+#include <limits.h>
|
||
+
|
||
+#include <support/check.h>
|
||
+#include <support/temp_file.h>
|
||
+
|
||
+#define SHM_MODE 0644
|
||
+
|
||
+/* These are for the temporary file we generate. */
|
||
+static char *name;
|
||
+static int shmid;
|
||
+static long int pgsz;
|
||
+
|
||
+static void
|
||
+remove_shm (void)
|
||
+{
|
||
+ /* Enforce message queue removal in case of early test failure.
|
||
+ Ignore error since the shm may already have being removed. */
|
||
+ shmctl (shmid, IPC_RMID, NULL);
|
||
+}
|
||
+
|
||
+static void
|
||
+do_prepare (int argc, char *argv[])
|
||
+{
|
||
+ TEST_VERIFY_EXIT (create_temp_file ("tst-sysvshm.", &name) != -1);
|
||
+}
|
||
+
|
||
+#define PREPARE do_prepare
|
||
+
|
||
+struct test_shminfo
|
||
+{
|
||
+ __syscall_ulong_t shmall;
|
||
+ __syscall_ulong_t shmmax;
|
||
+ __syscall_ulong_t shmmni;
|
||
+};
|
||
+
|
||
+/* It tries to obtain some system-wide SysV shared memory information from
|
||
+ /proc to check against IPC_INFO/SHM_INFO. The /proc only returns the
|
||
+ tunables value of SHMALL, SHMMAX, and SHMMNI. */
|
||
+
|
||
+static uint64_t
|
||
+read_proc_file (const char *file)
|
||
+{
|
||
+ FILE *f = fopen (file, "r");
|
||
+ if (f == NULL)
|
||
+ FAIL_UNSUPPORTED ("/proc is not mounted or %s is not available", file);
|
||
+
|
||
+ /* Handle 32-bit binaries running on 64-bit kernels. */
|
||
+ uint64_t v;
|
||
+ int r = fscanf (f, "%" SCNu64, &v);
|
||
+ TEST_VERIFY_EXIT (r == 1);
|
||
+
|
||
+ fclose (f);
|
||
+ return v;
|
||
+}
|
||
+
|
||
+
|
||
+/* Check if the message queue with IDX (index into the kernel's internal
|
||
+ array) matches the one with KEY. The CMD is either SHM_STAT or
|
||
+ SHM_STAT_ANY. */
|
||
+
|
||
+static bool
|
||
+check_shminfo (int idx, key_t key, int cmd)
|
||
+{
|
||
+ struct shmid_ds shminfo;
|
||
+ int sid = shmctl (idx, cmd, &shminfo);
|
||
+ /* Ignore unused array slot returned by the kernel or information from
|
||
+ unknown message queue. */
|
||
+ if ((sid == -1 && errno == EINVAL) || sid != shmid)
|
||
+ return false;
|
||
+
|
||
+ if (sid == -1)
|
||
+ FAIL_EXIT1 ("shmctl with %s failed: %m",
|
||
+ cmd == SHM_STAT ? "SHM_STAT" : "SHM_STAT_ANY");
|
||
+
|
||
+ TEST_COMPARE (shminfo.shm_perm.__key, key);
|
||
+ TEST_COMPARE (shminfo.shm_perm.mode, SHM_MODE);
|
||
+ TEST_COMPARE (shminfo.shm_segsz, pgsz);
|
||
+
|
||
+ return true;
|
||
+}
|
||
+
|
||
+static int
|
||
+do_test (void)
|
||
+{
|
||
+ atexit (remove_shm);
|
||
+
|
||
+ pgsz = sysconf (_SC_PAGESIZE);
|
||
+ if (pgsz == -1)
|
||
+ FAIL_EXIT1 ("sysconf (_SC_PAGESIZE) failed: %m");
|
||
+
|
||
+ key_t key = ftok (name, 'G');
|
||
+ if (key == -1)
|
||
+ FAIL_EXIT1 ("ftok failed: %m");
|
||
+
|
||
+ shmid = shmget (key, pgsz, IPC_CREAT | IPC_EXCL | SHM_MODE);
|
||
+ if (shmid == -1)
|
||
+ FAIL_EXIT1 ("shmget failed: %m");
|
||
+
|
||
+ /* It does not check shmmax because kernel clamp its value to INT_MAX for:
|
||
+
|
||
+ 1. Compat symbols with IPC_64, i.e, 32-bit binaries running on 64-bit
|
||
+ kernels.
|
||
+
|
||
+ 2. Default symbol without IPC_64 (defined as IPC_OLD within Linux) and
|
||
+ glibc always use IPC_64 for 32-bit ABIs (to support 64-bit time_t).
|
||
+ It means that 32-bit binaries running on 32-bit kernels will not see
|
||
+ shmmax being clamped.
|
||
+
|
||
+ And finding out whether the compat symbol is used would require checking
|
||
+ the underlying kernel against the current ABI. The shmall and shmmni
|
||
+ already provided enough coverage. */
|
||
+
|
||
+ struct test_shminfo tipcinfo;
|
||
+ tipcinfo.shmall = read_proc_file ("/proc/sys/kernel/shmall");
|
||
+ tipcinfo.shmmni = read_proc_file ("/proc/sys/kernel/shmmni");
|
||
+
|
||
+ int shmidx;
|
||
+
|
||
+ /* Note: SHM_INFO does not return a shminfo, but rather a 'struct shm_info'.
|
||
+ It is tricky to verify its values since the syscall returns system wide
|
||
+ resources consumed by shared memory. The shmctl implementation handles
|
||
+ SHM_INFO as IPC_INFO, so the IPC_INFO test should validate SHM_INFO as
|
||
+ well. */
|
||
+
|
||
+ {
|
||
+ struct shminfo ipcinfo;
|
||
+ shmidx = shmctl (shmid, IPC_INFO, (struct shmid_ds *) &ipcinfo);
|
||
+ if (shmidx == -1)
|
||
+ FAIL_EXIT1 ("shmctl with IPC_INFO failed: %m");
|
||
+
|
||
+ TEST_COMPARE (ipcinfo.shmall, tipcinfo.shmall);
|
||
+ TEST_COMPARE (ipcinfo.shmmni, tipcinfo.shmmni);
|
||
+ }
|
||
+
|
||
+ /* We check if the created shared memory shows in the global list. */
|
||
+ bool found = false;
|
||
+ for (int i = 0; i <= shmidx; i++)
|
||
+ {
|
||
+ /* We can't tell apart if SHM_STAT_ANY is not supported (kernel older
|
||
+ than 4.17) or if the index used is invalid. So it just check if
|
||
+ value returned from a valid call matches the created message
|
||
+ queue. */
|
||
+ check_shminfo (i, key, SHM_STAT_ANY);
|
||
+
|
||
+ if (check_shminfo (i, key, SHM_STAT))
|
||
+ {
|
||
+ found = true;
|
||
+ break;
|
||
+ }
|
||
+ }
|
||
+
|
||
+ if (!found)
|
||
+ FAIL_EXIT1 ("shmctl with SHM_STAT/SHM_STAT_ANY could not find the "
|
||
+ "created shared memory");
|
||
+
|
||
+ if (shmctl (shmid, IPC_RMID, NULL) == -1)
|
||
+ FAIL_EXIT1 ("shmctl failed");
|
||
+
|
||
+ return 0;
|
||
+}
|
||
+
|
||
+#include <support/test-driver.c>
|
||
diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile
|
||
index a6736aef25..e821d95fa3 100644
|
||
--- a/sysdeps/x86/Makefile
|
||
+++ b/sysdeps/x86/Makefile
|
||
@@ -12,10 +12,39 @@ endif
|
||
ifeq ($(subdir),setjmp)
|
||
gen-as-const-headers += jmp_buf-ssp.sym
|
||
sysdep_routines += __longjmp_cancel
|
||
+ifneq ($(enable-cet),no)
|
||
+ifneq ($(have-tunables),no)
|
||
+tests += tst-setjmp-cet
|
||
+tst-setjmp-cet-ENV = GLIBC_TUNABLES=glibc.cpu.x86_ibt=on:glibc.cpu.x86_shstk=on
|
||
+endif
|
||
+endif
|
||
endif
|
||
|
||
ifeq ($(subdir),string)
|
||
sysdep_routines += cacheinfo
|
||
+
|
||
+tests += \
|
||
+ tst-memchr-rtm \
|
||
+ tst-memcmp-rtm \
|
||
+ tst-memmove-rtm \
|
||
+ tst-memrchr-rtm \
|
||
+ tst-memset-rtm \
|
||
+ tst-strchr-rtm \
|
||
+ tst-strcpy-rtm \
|
||
+ tst-strlen-rtm \
|
||
+ tst-strncmp-rtm \
|
||
+ tst-strrchr-rtm
|
||
+
|
||
+CFLAGS-tst-memchr-rtm.c += -mrtm
|
||
+CFLAGS-tst-memcmp-rtm.c += -mrtm
|
||
+CFLAGS-tst-memmove-rtm.c += -mrtm
|
||
+CFLAGS-tst-memrchr-rtm.c += -mrtm
|
||
+CFLAGS-tst-memset-rtm.c += -mrtm
|
||
+CFLAGS-tst-strchr-rtm.c += -mrtm
|
||
+CFLAGS-tst-strcpy-rtm.c += -mrtm
|
||
+CFLAGS-tst-strlen-rtm.c += -mrtm
|
||
+CFLAGS-tst-strncmp-rtm.c += -mrtm
|
||
+CFLAGS-tst-strrchr-rtm.c += -mrtm
|
||
endif
|
||
|
||
ifneq ($(enable-cet),no)
|
||
diff --git a/sysdeps/x86/cacheinfo.c b/sysdeps/x86/cacheinfo.c
|
||
index 217c21c34f..3fb4a028d8 100644
|
||
--- a/sysdeps/x86/cacheinfo.c
|
||
+++ b/sysdeps/x86/cacheinfo.c
|
||
@@ -808,7 +808,7 @@ init_cacheinfo (void)
|
||
threads = 1 << ((ecx >> 12) & 0x0f);
|
||
}
|
||
|
||
- if (threads == 0)
|
||
+ if (threads == 0 || cpu_features->basic.family >= 0x17)
|
||
{
|
||
/* If APIC ID width is not available, use logical
|
||
processor count. */
|
||
@@ -823,8 +823,22 @@ init_cacheinfo (void)
|
||
if (threads > 0)
|
||
shared /= threads;
|
||
|
||
- /* Account for exclusive L2 and L3 caches. */
|
||
- shared += core;
|
||
+ /* Get shared cache per ccx for Zen architectures. */
|
||
+ if (cpu_features->basic.family >= 0x17)
|
||
+ {
|
||
+ unsigned int eax;
|
||
+
|
||
+ /* Get number of threads share the L3 cache in CCX. */
|
||
+ __cpuid_count (0x8000001D, 0x3, eax, ebx, ecx, edx);
|
||
+
|
||
+ unsigned int threads_per_ccx = ((eax >> 14) & 0xfff) + 1;
|
||
+ shared *= threads_per_ccx;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ /* Account for exclusive L2 and L3 caches. */
|
||
+ shared += core;
|
||
+ }
|
||
}
|
||
}
|
||
|
||
@@ -854,14 +868,20 @@ init_cacheinfo (void)
|
||
__x86_shared_cache_size = shared;
|
||
}
|
||
|
||
- /* The large memcpy micro benchmark in glibc shows that 6 times of
|
||
- shared cache size is the approximate value above which non-temporal
|
||
- store becomes faster on a 8-core processor. This is the 3/4 of the
|
||
- total shared cache size. */
|
||
+ /* The default setting for the non_temporal threshold is 3/4 of one
|
||
+ thread's share of the chip's cache. For most Intel and AMD processors
|
||
+ with an initial release date between 2017 and 2020, a thread's typical
|
||
+ share of the cache is from 500 KBytes to 2 MBytes. Using the 3/4
|
||
+ threshold leaves 125 KBytes to 500 KBytes of the thread's data
|
||
+ in cache after a maximum temporal copy, which will maintain
|
||
+ in cache a reasonable portion of the thread's stack and other
|
||
+ active data. If the threshold is set higher than one thread's
|
||
+ share of the cache, it has a substantial risk of negatively
|
||
+ impacting the performance of other threads running on the chip. */
|
||
__x86_shared_non_temporal_threshold
|
||
= (cpu_features->non_temporal_threshold != 0
|
||
? cpu_features->non_temporal_threshold
|
||
- : __x86_shared_cache_size * threads * 3 / 4);
|
||
+ : __x86_shared_cache_size * 3 / 4);
|
||
|
||
/* NB: The REP MOVSB threshold must be greater than VEC_SIZE * 8. */
|
||
unsigned int minimum_rep_movsb_threshold;
|
||
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
|
||
index 4c24ba7c31..484efe7a0f 100644
|
||
--- a/sysdeps/x86/cpu-features.c
|
||
+++ b/sysdeps/x86/cpu-features.c
|
||
@@ -71,7 +71,6 @@ update_usable (struct cpu_features *cpu_features)
|
||
CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_6);
|
||
CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_7);
|
||
CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_9);
|
||
- CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_11);
|
||
CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_12);
|
||
CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_13);
|
||
CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_17);
|
||
@@ -318,6 +317,9 @@ update_usable (struct cpu_features *cpu_features)
|
||
/* Determine if PKU is usable. */
|
||
if (CPU_FEATURES_CPU_P (cpu_features, OSPKE))
|
||
CPU_FEATURE_SET (cpu_features, PKU);
|
||
+
|
||
+ if (CPU_FEATURES_CPU_P (cpu_features, RTM_ALWAYS_ABORT))
|
||
+ CPU_FEATURE_UNSET (cpu_features, RTM);
|
||
}
|
||
|
||
static void
|
||
@@ -516,11 +518,39 @@ init_cpu_features (struct cpu_features *cpu_features)
|
||
break;
|
||
}
|
||
|
||
- /* Disable TSX on some Haswell processors to avoid TSX on kernels that
|
||
- weren't updated with the latest microcode package (which disables
|
||
- broken feature by default). */
|
||
+ /* Disable TSX on some processors to avoid TSX on kernels that
|
||
+ weren't updated with the latest microcode package (which
|
||
+ disables broken feature by default). */
|
||
switch (model)
|
||
{
|
||
+ case 0x55:
|
||
+ if (stepping <= 5)
|
||
+ goto disable_tsx;
|
||
+ break;
|
||
+ case 0x8e:
|
||
+ /* NB: Although the errata documents that for model == 0x8e,
|
||
+ only 0xb stepping or lower are impacted, the intention of
|
||
+ the errata was to disable TSX on all client processors on
|
||
+ all steppings. Include 0xc stepping which is an Intel
|
||
+ Core i7-8665U, a client mobile processor. */
|
||
+ case 0x9e:
|
||
+ if (stepping > 0xc)
|
||
+ break;
|
||
+ /* Fall through. */
|
||
+ case 0x4e:
|
||
+ case 0x5e:
|
||
+ {
|
||
+ /* Disable Intel TSX and enable RTM_ALWAYS_ABORT for
|
||
+ processors listed in:
|
||
+
|
||
+https://www.intel.com/content/www/us/en/support/articles/000059422/processors.html
|
||
+ */
|
||
+disable_tsx:
|
||
+ CPU_FEATURE_UNSET (cpu_features, HLE);
|
||
+ CPU_FEATURE_UNSET (cpu_features, RTM);
|
||
+ CPU_FEATURE_SET (cpu_features, RTM_ALWAYS_ABORT);
|
||
+ }
|
||
+ break;
|
||
case 0x3f:
|
||
/* Xeon E7 v3 with stepping >= 4 has working TSX. */
|
||
if (stepping >= 4)
|
||
@@ -546,8 +576,24 @@ init_cpu_features (struct cpu_features *cpu_features)
|
||
cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER]
|
||
|= bit_arch_Prefer_No_VZEROUPPER;
|
||
else
|
||
- cpu_features->preferred[index_arch_Prefer_No_AVX512]
|
||
- |= bit_arch_Prefer_No_AVX512;
|
||
+ {
|
||
+ cpu_features->preferred[index_arch_Prefer_No_AVX512]
|
||
+ |= bit_arch_Prefer_No_AVX512;
|
||
+
|
||
+ /* Avoid RTM abort triggered by VZEROUPPER inside a
|
||
+ transactionally executing RTM region. */
|
||
+ if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
|
||
+ cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER]
|
||
+ |= bit_arch_Prefer_No_VZEROUPPER;
|
||
+
|
||
+ /* Since to compare 2 32-byte strings, 256-bit EVEX strcmp
|
||
+ requires 2 loads, 3 VPCMPs and 2 KORDs while AVX2 strcmp
|
||
+ requires 1 load, 2 VPCMPEQs, 1 VPMINU and 1 VPMOVMSKB,
|
||
+ AVX2 strcmp is faster than EVEX strcmp. */
|
||
+ if (CPU_FEATURE_USABLE_P (cpu_features, AVX2))
|
||
+ cpu_features->preferred[index_arch_Prefer_AVX2_STRCMP]
|
||
+ |= bit_arch_Prefer_AVX2_STRCMP;
|
||
+ }
|
||
}
|
||
/* This spells out "AuthenticAMD" or "HygonGenuine". */
|
||
else if ((ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
|
||
diff --git a/sysdeps/x86/cpu-features.h b/sysdeps/x86/cpu-features.h
|
||
index a0b9b9177c..8995a15f09 100644
|
||
--- a/sysdeps/x86/cpu-features.h
|
||
+++ b/sysdeps/x86/cpu-features.h
|
||
@@ -295,7 +295,7 @@ extern const struct cpu_features *__get_cpu_features (void)
|
||
#define bit_cpu_AVX512_VP2INTERSECT (1u << 8)
|
||
#define bit_cpu_INDEX_7_EDX_9 (1u << 9)
|
||
#define bit_cpu_MD_CLEAR (1u << 10)
|
||
-#define bit_cpu_INDEX_7_EDX_11 (1u << 11)
|
||
+#define bit_cpu_RTM_ALWAYS_ABORT (1u << 11)
|
||
#define bit_cpu_INDEX_7_EDX_12 (1u << 12)
|
||
#define bit_cpu_INDEX_7_EDX_13 (1u << 13)
|
||
#define bit_cpu_SERIALIZE (1u << 14)
|
||
@@ -508,7 +508,7 @@ extern const struct cpu_features *__get_cpu_features (void)
|
||
#define index_cpu_AVX512_VP2INTERSECT COMMON_CPUID_INDEX_7
|
||
#define index_cpu_INDEX_7_EDX_9 COMMON_CPUID_INDEX_7
|
||
#define index_cpu_MD_CLEAR COMMON_CPUID_INDEX_7
|
||
-#define index_cpu_INDEX_7_EDX_11 COMMON_CPUID_INDEX_7
|
||
+#define index_cpu_RTM_ALWAYS_ABORT COMMON_CPUID_INDEX_7
|
||
#define index_cpu_INDEX_7_EDX_12 COMMON_CPUID_INDEX_7
|
||
#define index_cpu_INDEX_7_EDX_13 COMMON_CPUID_INDEX_7
|
||
#define index_cpu_SERIALIZE COMMON_CPUID_INDEX_7
|
||
@@ -721,7 +721,7 @@ extern const struct cpu_features *__get_cpu_features (void)
|
||
#define reg_AVX512_VP2INTERSECT edx
|
||
#define reg_INDEX_7_EDX_9 edx
|
||
#define reg_MD_CLEAR edx
|
||
-#define reg_INDEX_7_EDX_11 edx
|
||
+#define reg_RTM_ALWAYS_ABORT edx
|
||
#define reg_INDEX_7_EDX_12 edx
|
||
#define reg_INDEX_7_EDX_13 edx
|
||
#define reg_SERIALIZE edx
|
||
@@ -804,6 +804,7 @@ extern const struct cpu_features *__get_cpu_features (void)
|
||
#define bit_arch_Prefer_FSRM (1u << 13)
|
||
#define bit_arch_Prefer_No_AVX512 (1u << 14)
|
||
#define bit_arch_MathVec_Prefer_No_AVX512 (1u << 15)
|
||
+#define bit_arch_Prefer_AVX2_STRCMP (1u << 16)
|
||
|
||
#define index_arch_Fast_Rep_String PREFERRED_FEATURE_INDEX_1
|
||
#define index_arch_Fast_Copy_Backward PREFERRED_FEATURE_INDEX_1
|
||
@@ -821,6 +822,7 @@ extern const struct cpu_features *__get_cpu_features (void)
|
||
#define index_arch_Prefer_No_AVX512 PREFERRED_FEATURE_INDEX_1
|
||
#define index_arch_MathVec_Prefer_No_AVX512 PREFERRED_FEATURE_INDEX_1
|
||
#define index_arch_Prefer_FSRM PREFERRED_FEATURE_INDEX_1
|
||
+#define index_arch_Prefer_AVX2_STRCMP PREFERRED_FEATURE_INDEX_1
|
||
|
||
/* XCR0 Feature flags. */
|
||
#define bit_XMM_state (1u << 1)
|
||
diff --git a/sysdeps/x86/cpu-tunables.c b/sysdeps/x86/cpu-tunables.c
|
||
index 588bbf9448..b251a91af3 100644
|
||
--- a/sysdeps/x86/cpu-tunables.c
|
||
+++ b/sysdeps/x86/cpu-tunables.c
|
||
@@ -238,6 +238,8 @@ TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp)
|
||
CHECK_GLIBC_IFUNC_PREFERRED_BOTH (n, cpu_features,
|
||
Fast_Copy_Backward,
|
||
disable, 18);
|
||
+ CHECK_GLIBC_IFUNC_PREFERRED_NEED_BOTH
|
||
+ (n, cpu_features, Prefer_AVX2_STRCMP, AVX2, disable, 18);
|
||
}
|
||
break;
|
||
case 19:
|
||
diff --git a/sysdeps/x86/dl-cet.c b/sysdeps/x86/dl-cet.c
|
||
index 03572f7af6..3cc54a8d53 100644
|
||
--- a/sysdeps/x86/dl-cet.c
|
||
+++ b/sysdeps/x86/dl-cet.c
|
||
@@ -47,7 +47,10 @@ dl_cet_check (struct link_map *m, const char *program)
|
||
/* No legacy object check if both IBT and SHSTK are always on. */
|
||
if (enable_ibt_type == cet_always_on
|
||
&& enable_shstk_type == cet_always_on)
|
||
- return;
|
||
+ {
|
||
+ THREAD_SETMEM (THREAD_SELF, header.feature_1, GL(dl_x86_feature_1));
|
||
+ return;
|
||
+ }
|
||
|
||
/* Check if IBT is enabled by kernel. */
|
||
bool ibt_enabled
|
||
diff --git a/sysdeps/x86/dl-prop.h b/sysdeps/x86/dl-prop.h
|
||
index 89911e19e2..4eb3b85a7b 100644
|
||
--- a/sysdeps/x86/dl-prop.h
|
||
+++ b/sysdeps/x86/dl-prop.h
|
||
@@ -145,15 +145,15 @@ _dl_process_cet_property_note (struct link_map *l,
|
||
}
|
||
|
||
static inline void __attribute__ ((unused))
|
||
-_dl_process_pt_note (struct link_map *l, const ElfW(Phdr) *ph)
|
||
+_dl_process_pt_note (struct link_map *l, int fd, const ElfW(Phdr) *ph)
|
||
{
|
||
const ElfW(Nhdr) *note = (const void *) (ph->p_vaddr + l->l_addr);
|
||
_dl_process_cet_property_note (l, note, ph->p_memsz, ph->p_align);
|
||
}
|
||
|
||
static inline int __attribute__ ((always_inline))
|
||
-_dl_process_gnu_property (struct link_map *l, uint32_t type, uint32_t datasz,
|
||
- void *data)
|
||
+_dl_process_gnu_property (struct link_map *l, int fd, uint32_t type,
|
||
+ uint32_t datasz, void *data)
|
||
{
|
||
return 0;
|
||
}
|
||
diff --git a/sysdeps/x86/tst-get-cpu-features.c b/sysdeps/x86/tst-get-cpu-features.c
|
||
index 080c58e70b..527de3b5d9 100644
|
||
--- a/sysdeps/x86/tst-get-cpu-features.c
|
||
+++ b/sysdeps/x86/tst-get-cpu-features.c
|
||
@@ -183,6 +183,7 @@ do_test (void)
|
||
CHECK_CPU_FEATURE (FSRM);
|
||
CHECK_CPU_FEATURE (AVX512_VP2INTERSECT);
|
||
CHECK_CPU_FEATURE (MD_CLEAR);
|
||
+ CHECK_CPU_FEATURE (RTM_ALWAYS_ABORT);
|
||
CHECK_CPU_FEATURE (SERIALIZE);
|
||
CHECK_CPU_FEATURE (HYBRID);
|
||
CHECK_CPU_FEATURE (TSXLDTRK);
|
||
@@ -336,6 +337,7 @@ do_test (void)
|
||
CHECK_CPU_FEATURE_USABLE (FSRM);
|
||
CHECK_CPU_FEATURE_USABLE (AVX512_VP2INTERSECT);
|
||
CHECK_CPU_FEATURE_USABLE (MD_CLEAR);
|
||
+ CHECK_CPU_FEATURE_USABLE (RTM_ALWAYS_ABORT);
|
||
CHECK_CPU_FEATURE_USABLE (SERIALIZE);
|
||
CHECK_CPU_FEATURE_USABLE (HYBRID);
|
||
CHECK_CPU_FEATURE_USABLE (TSXLDTRK);
|
||
diff --git a/sysdeps/x86/tst-memchr-rtm.c b/sysdeps/x86/tst-memchr-rtm.c
|
||
new file mode 100644
|
||
index 0000000000..e47494011e
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86/tst-memchr-rtm.c
|
||
@@ -0,0 +1,54 @@
|
||
+/* Test case for memchr inside a transactionally executing RTM region.
|
||
+ Copyright (C) 2021 Free Software Foundation, Inc.
|
||
+ This file is part of the GNU C Library.
|
||
+
|
||
+ The GNU C Library is free software; you can redistribute it and/or
|
||
+ modify it under the terms of the GNU Lesser General Public
|
||
+ License as published by the Free Software Foundation; either
|
||
+ version 2.1 of the License, or (at your option) any later version.
|
||
+
|
||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
+ Lesser General Public License for more details.
|
||
+
|
||
+ You should have received a copy of the GNU Lesser General Public
|
||
+ License along with the GNU C Library; if not, see
|
||
+ <https://www.gnu.org/licenses/>. */
|
||
+
|
||
+#include <tst-string-rtm.h>
|
||
+
|
||
+#define LOOP 3000
|
||
+#define STRING_SIZE 1024
|
||
+char string1[STRING_SIZE];
|
||
+
|
||
+__attribute__ ((noinline, noclone))
|
||
+static int
|
||
+prepare (void)
|
||
+{
|
||
+ memset (string1, 'a', STRING_SIZE);
|
||
+ string1[100] = 'c';
|
||
+ string1[STRING_SIZE - 100] = 'c';
|
||
+ char *p = memchr (string1, 'c', STRING_SIZE);
|
||
+ if (p == &string1[100])
|
||
+ return EXIT_SUCCESS;
|
||
+ else
|
||
+ return EXIT_FAILURE;
|
||
+}
|
||
+
|
||
+__attribute__ ((noinline, noclone))
|
||
+static int
|
||
+function (void)
|
||
+{
|
||
+ char *p = memchr (string1, 'c', STRING_SIZE);
|
||
+ if (p == &string1[100])
|
||
+ return 0;
|
||
+ else
|
||
+ return 1;
|
||
+}
|
||
+
|
||
+static int
|
||
+do_test (void)
|
||
+{
|
||
+ return do_test_1 ("memchr", LOOP, prepare, function);
|
||
+}
|
||
diff --git a/sysdeps/x86/tst-memcmp-rtm.c b/sysdeps/x86/tst-memcmp-rtm.c
|
||
new file mode 100644
|
||
index 0000000000..e4c8a623bb
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86/tst-memcmp-rtm.c
|
||
@@ -0,0 +1,52 @@
|
||
+/* Test case for memcmp inside a transactionally executing RTM region.
|
||
+ Copyright (C) 2021 Free Software Foundation, Inc.
|
||
+ This file is part of the GNU C Library.
|
||
+
|
||
+ The GNU C Library is free software; you can redistribute it and/or
|
||
+ modify it under the terms of the GNU Lesser General Public
|
||
+ License as published by the Free Software Foundation; either
|
||
+ version 2.1 of the License, or (at your option) any later version.
|
||
+
|
||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
+ Lesser General Public License for more details.
|
||
+
|
||
+ You should have received a copy of the GNU Lesser General Public
|
||
+ License along with the GNU C Library; if not, see
|
||
+ <https://www.gnu.org/licenses/>. */
|
||
+
|
||
+#include <tst-string-rtm.h>
|
||
+
|
||
+#define LOOP 3000
|
||
+#define STRING_SIZE 1024
|
||
+char string1[STRING_SIZE];
|
||
+char string2[STRING_SIZE];
|
||
+
|
||
+__attribute__ ((noinline, noclone))
|
||
+static int
|
||
+prepare (void)
|
||
+{
|
||
+ memset (string1, 'a', STRING_SIZE);
|
||
+ memset (string2, 'a', STRING_SIZE);
|
||
+ if (memcmp (string1, string2, STRING_SIZE) == 0)
|
||
+ return EXIT_SUCCESS;
|
||
+ else
|
||
+ return EXIT_FAILURE;
|
||
+}
|
||
+
|
||
+__attribute__ ((noinline, noclone))
|
||
+static int
|
||
+function (void)
|
||
+{
|
||
+ if (memcmp (string1, string2, STRING_SIZE) == 0)
|
||
+ return 0;
|
||
+ else
|
||
+ return 1;
|
||
+}
|
||
+
|
||
+static int
|
||
+do_test (void)
|
||
+{
|
||
+ return do_test_1 ("memcmp", LOOP, prepare, function);
|
||
+}
|
||
diff --git a/sysdeps/x86/tst-memmove-rtm.c b/sysdeps/x86/tst-memmove-rtm.c
|
||
new file mode 100644
|
||
index 0000000000..4bf97ef1e3
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86/tst-memmove-rtm.c
|
||
@@ -0,0 +1,53 @@
|
||
+/* Test case for memmove inside a transactionally executing RTM region.
|
||
+ Copyright (C) 2021 Free Software Foundation, Inc.
|
||
+ This file is part of the GNU C Library.
|
||
+
|
||
+ The GNU C Library is free software; you can redistribute it and/or
|
||
+ modify it under the terms of the GNU Lesser General Public
|
||
+ License as published by the Free Software Foundation; either
|
||
+ version 2.1 of the License, or (at your option) any later version.
|
||
+
|
||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
+ Lesser General Public License for more details.
|
||
+
|
||
+ You should have received a copy of the GNU Lesser General Public
|
||
+ License along with the GNU C Library; if not, see
|
||
+ <https://www.gnu.org/licenses/>. */
|
||
+
|
||
+#include <tst-string-rtm.h>
|
||
+
|
||
+#define LOOP 3000
|
||
+#define STRING_SIZE 1024
|
||
+char string1[STRING_SIZE];
|
||
+char string2[STRING_SIZE];
|
||
+
|
||
+__attribute__ ((noinline, noclone))
|
||
+static int
|
||
+prepare (void)
|
||
+{
|
||
+ memset (string1, 'a', STRING_SIZE);
|
||
+ if (memmove (string2, string1, STRING_SIZE) == string2
|
||
+ && memcmp (string2, string1, STRING_SIZE) == 0)
|
||
+ return EXIT_SUCCESS;
|
||
+ else
|
||
+ return EXIT_FAILURE;
|
||
+}
|
||
+
|
||
+__attribute__ ((noinline, noclone))
|
||
+static int
|
||
+function (void)
|
||
+{
|
||
+ if (memmove (string2, string1, STRING_SIZE) == string2
|
||
+ && memcmp (string2, string1, STRING_SIZE) == 0)
|
||
+ return 0;
|
||
+ else
|
||
+ return 1;
|
||
+}
|
||
+
|
||
+static int
|
||
+do_test (void)
|
||
+{
|
||
+ return do_test_1 ("memmove", LOOP, prepare, function);
|
||
+}
|
||
diff --git a/sysdeps/x86/tst-memrchr-rtm.c b/sysdeps/x86/tst-memrchr-rtm.c
|
||
new file mode 100644
|
||
index 0000000000..a57a5a8eb9
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86/tst-memrchr-rtm.c
|
||
@@ -0,0 +1,54 @@
|
||
+/* Test case for memrchr inside a transactionally executing RTM region.
|
||
+ Copyright (C) 2021 Free Software Foundation, Inc.
|
||
+ This file is part of the GNU C Library.
|
||
+
|
||
+ The GNU C Library is free software; you can redistribute it and/or
|
||
+ modify it under the terms of the GNU Lesser General Public
|
||
+ License as published by the Free Software Foundation; either
|
||
+ version 2.1 of the License, or (at your option) any later version.
|
||
+
|
||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
+ Lesser General Public License for more details.
|
||
+
|
||
+ You should have received a copy of the GNU Lesser General Public
|
||
+ License along with the GNU C Library; if not, see
|
||
+ <https://www.gnu.org/licenses/>. */
|
||
+
|
||
+#include <tst-string-rtm.h>
|
||
+
|
||
+#define LOOP 3000
|
||
+#define STRING_SIZE 1024
|
||
+char string1[STRING_SIZE];
|
||
+
|
||
+__attribute__ ((noinline, noclone))
|
||
+static int
|
||
+prepare (void)
|
||
+{
|
||
+ memset (string1, 'a', STRING_SIZE);
|
||
+ string1[100] = 'c';
|
||
+ string1[STRING_SIZE - 100] = 'c';
|
||
+ char *p = memrchr (string1, 'c', STRING_SIZE);
|
||
+ if (p == &string1[STRING_SIZE - 100])
|
||
+ return EXIT_SUCCESS;
|
||
+ else
|
||
+ return EXIT_FAILURE;
|
||
+}
|
||
+
|
||
+__attribute__ ((noinline, noclone))
|
||
+static int
|
||
+function (void)
|
||
+{
|
||
+ char *p = memrchr (string1, 'c', STRING_SIZE);
|
||
+ if (p == &string1[STRING_SIZE - 100])
|
||
+ return 0;
|
||
+ else
|
||
+ return 1;
|
||
+}
|
||
+
|
||
+static int
|
||
+do_test (void)
|
||
+{
|
||
+ return do_test_1 ("memrchr", LOOP, prepare, function);
|
||
+}
|
||
diff --git a/sysdeps/x86/tst-memset-rtm.c b/sysdeps/x86/tst-memset-rtm.c
|
||
new file mode 100644
|
||
index 0000000000..bf343a4dad
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86/tst-memset-rtm.c
|
||
@@ -0,0 +1,45 @@
|
||
+/* Test case for memset inside a transactionally executing RTM region.
|
||
+ Copyright (C) 2021 Free Software Foundation, Inc.
|
||
+ This file is part of the GNU C Library.
|
||
+
|
||
+ The GNU C Library is free software; you can redistribute it and/or
|
||
+ modify it under the terms of the GNU Lesser General Public
|
||
+ License as published by the Free Software Foundation; either
|
||
+ version 2.1 of the License, or (at your option) any later version.
|
||
+
|
||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
+ Lesser General Public License for more details.
|
||
+
|
||
+ You should have received a copy of the GNU Lesser General Public
|
||
+ License along with the GNU C Library; if not, see
|
||
+ <https://www.gnu.org/licenses/>. */
|
||
+
|
||
+#include <tst-string-rtm.h>
|
||
+
|
||
+#define LOOP 3000
|
||
+#define STRING_SIZE 1024
|
||
+char string1[STRING_SIZE];
|
||
+
|
||
+__attribute__ ((noinline, noclone))
|
||
+static int
|
||
+prepare (void)
|
||
+{
|
||
+ memset (string1, 'a', STRING_SIZE);
|
||
+ return EXIT_SUCCESS;
|
||
+}
|
||
+
|
||
+__attribute__ ((noinline, noclone))
|
||
+static int
|
||
+function (void)
|
||
+{
|
||
+ memset (string1, 'a', STRING_SIZE);
|
||
+ return 0;
|
||
+}
|
||
+
|
||
+static int
|
||
+do_test (void)
|
||
+{
|
||
+ return do_test_1 ("memset", LOOP, prepare, function);
|
||
+}
|
||
diff --git a/sysdeps/x86/tst-setjmp-cet.c b/sysdeps/x86/tst-setjmp-cet.c
|
||
new file mode 100644
|
||
index 0000000000..42c795d2a8
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86/tst-setjmp-cet.c
|
||
@@ -0,0 +1 @@
|
||
+#include <setjmp/tst-setjmp.c>
|
||
diff --git a/sysdeps/x86/tst-strchr-rtm.c b/sysdeps/x86/tst-strchr-rtm.c
|
||
new file mode 100644
|
||
index 0000000000..a82e29c072
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86/tst-strchr-rtm.c
|
||
@@ -0,0 +1,54 @@
|
||
+/* Test case for strchr inside a transactionally executing RTM region.
|
||
+ Copyright (C) 2021 Free Software Foundation, Inc.
|
||
+ This file is part of the GNU C Library.
|
||
+
|
||
+ The GNU C Library is free software; you can redistribute it and/or
|
||
+ modify it under the terms of the GNU Lesser General Public
|
||
+ License as published by the Free Software Foundation; either
|
||
+ version 2.1 of the License, or (at your option) any later version.
|
||
+
|
||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
+ Lesser General Public License for more details.
|
||
+
|
||
+ You should have received a copy of the GNU Lesser General Public
|
||
+ License along with the GNU C Library; if not, see
|
||
+ <https://www.gnu.org/licenses/>. */
|
||
+
|
||
+#include <tst-string-rtm.h>
|
||
+
|
||
+#define LOOP 3000
|
||
+#define STRING_SIZE 1024
|
||
+char string1[STRING_SIZE];
|
||
+
|
||
+__attribute__ ((noinline, noclone))
|
||
+static int
|
||
+prepare (void)
|
||
+{
|
||
+ memset (string1, 'a', STRING_SIZE - 1);
|
||
+ string1[100] = 'c';
|
||
+ string1[STRING_SIZE - 100] = 'c';
|
||
+ char *p = strchr (string1, 'c');
|
||
+ if (p == &string1[100])
|
||
+ return EXIT_SUCCESS;
|
||
+ else
|
||
+ return EXIT_FAILURE;
|
||
+}
|
||
+
|
||
+__attribute__ ((noinline, noclone))
|
||
+static int
|
||
+function (void)
|
||
+{
|
||
+ char *p = strchr (string1, 'c');
|
||
+ if (p == &string1[100])
|
||
+ return 0;
|
||
+ else
|
||
+ return 1;
|
||
+}
|
||
+
|
||
+static int
|
||
+do_test (void)
|
||
+{
|
||
+ return do_test_1 ("strchr", LOOP, prepare, function);
|
||
+}
|
||
diff --git a/sysdeps/x86/tst-strcpy-rtm.c b/sysdeps/x86/tst-strcpy-rtm.c
|
||
new file mode 100644
|
||
index 0000000000..2b2a583fb4
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86/tst-strcpy-rtm.c
|
||
@@ -0,0 +1,53 @@
|
||
+/* Test case for strcpy inside a transactionally executing RTM region.
|
||
+ Copyright (C) 2021 Free Software Foundation, Inc.
|
||
+ This file is part of the GNU C Library.
|
||
+
|
||
+ The GNU C Library is free software; you can redistribute it and/or
|
||
+ modify it under the terms of the GNU Lesser General Public
|
||
+ License as published by the Free Software Foundation; either
|
||
+ version 2.1 of the License, or (at your option) any later version.
|
||
+
|
||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
+ Lesser General Public License for more details.
|
||
+
|
||
+ You should have received a copy of the GNU Lesser General Public
|
||
+ License along with the GNU C Library; if not, see
|
||
+ <https://www.gnu.org/licenses/>. */
|
||
+
|
||
+#include <tst-string-rtm.h>
|
||
+
|
||
+#define LOOP 3000
|
||
+#define STRING_SIZE 1024
|
||
+char string1[STRING_SIZE];
|
||
+char string2[STRING_SIZE];
|
||
+
|
||
+__attribute__ ((noinline, noclone))
|
||
+static int
|
||
+prepare (void)
|
||
+{
|
||
+ memset (string1, 'a', STRING_SIZE - 1);
|
||
+ if (strcpy (string2, string1) == string2
|
||
+ && strcmp (string2, string1) == 0)
|
||
+ return EXIT_SUCCESS;
|
||
+ else
|
||
+ return EXIT_FAILURE;
|
||
+}
|
||
+
|
||
+__attribute__ ((noinline, noclone))
|
||
+static int
|
||
+function (void)
|
||
+{
|
||
+ if (strcpy (string2, string1) == string2
|
||
+ && strcmp (string2, string1) == 0)
|
||
+ return 0;
|
||
+ else
|
||
+ return 1;
|
||
+}
|
||
+
|
||
+static int
|
||
+do_test (void)
|
||
+{
|
||
+ return do_test_1 ("strcpy", LOOP, prepare, function);
|
||
+}
|
||
diff --git a/sysdeps/x86/tst-string-rtm.h b/sysdeps/x86/tst-string-rtm.h
|
||
new file mode 100644
|
||
index 0000000000..6ed9eca017
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86/tst-string-rtm.h
|
||
@@ -0,0 +1,72 @@
|
||
+/* Test string function in a transactionally executing RTM region.
|
||
+ Copyright (C) 2021 Free Software Foundation, Inc.
|
||
+ This file is part of the GNU C Library.
|
||
+
|
||
+ The GNU C Library is free software; you can redistribute it and/or
|
||
+ modify it under the terms of the GNU Lesser General Public
|
||
+ License as published by the Free Software Foundation; either
|
||
+ version 2.1 of the License, or (at your option) any later version.
|
||
+
|
||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
+ Lesser General Public License for more details.
|
||
+
|
||
+ You should have received a copy of the GNU Lesser General Public
|
||
+ License along with the GNU C Library; if not, see
|
||
+ <https://www.gnu.org/licenses/>. */
|
||
+
|
||
+#include <string.h>
|
||
+#include <x86intrin.h>
|
||
+#include <cpu-features.h>
|
||
+#include <support/check.h>
|
||
+#include <support/test-driver.h>
|
||
+
|
||
+static int
|
||
+do_test_1 (const char *name, unsigned int loop, int (*prepare) (void),
|
||
+ int (*function) (void))
|
||
+{
|
||
+ if (!CPU_FEATURE_USABLE (RTM))
|
||
+ return EXIT_UNSUPPORTED;
|
||
+
|
||
+ int status = prepare ();
|
||
+ if (status != EXIT_SUCCESS)
|
||
+ return status;
|
||
+
|
||
+ unsigned int i;
|
||
+ unsigned int naborts = 0;
|
||
+ unsigned int failed = 0;
|
||
+ for (i = 0; i < loop; i++)
|
||
+ {
|
||
+ failed |= function ();
|
||
+ if (_xbegin() == _XBEGIN_STARTED)
|
||
+ {
|
||
+ failed |= function ();
|
||
+ _xend();
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ failed |= function ();
|
||
+ ++naborts;
|
||
+ }
|
||
+ }
|
||
+
|
||
+ if (failed)
|
||
+ FAIL_EXIT1 ("%s() failed", name);
|
||
+
|
||
+ if (naborts)
|
||
+ {
|
||
+ /* NB: Low single digit (<= 5%) noise-level aborts are normal for
|
||
+ TSX. */
|
||
+ double rate = 100 * ((double) naborts) / ((double) loop);
|
||
+ if (rate > 5)
|
||
+ FAIL_EXIT1 ("TSX abort rate: %.2f%% (%d out of %d)",
|
||
+ rate, naborts, loop);
|
||
+ }
|
||
+
|
||
+ return EXIT_SUCCESS;
|
||
+}
|
||
+
|
||
+static int do_test (void);
|
||
+
|
||
+#include <support/test-driver.c>
|
||
diff --git a/sysdeps/x86/tst-strlen-rtm.c b/sysdeps/x86/tst-strlen-rtm.c
|
||
new file mode 100644
|
||
index 0000000000..0dcf14db87
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86/tst-strlen-rtm.c
|
||
@@ -0,0 +1,53 @@
|
||
+/* Test case for strlen inside a transactionally executing RTM region.
|
||
+ Copyright (C) 2021 Free Software Foundation, Inc.
|
||
+ This file is part of the GNU C Library.
|
||
+
|
||
+ The GNU C Library is free software; you can redistribute it and/or
|
||
+ modify it under the terms of the GNU Lesser General Public
|
||
+ License as published by the Free Software Foundation; either
|
||
+ version 2.1 of the License, or (at your option) any later version.
|
||
+
|
||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
+ Lesser General Public License for more details.
|
||
+
|
||
+ You should have received a copy of the GNU Lesser General Public
|
||
+ License along with the GNU C Library; if not, see
|
||
+ <https://www.gnu.org/licenses/>. */
|
||
+
|
||
+#include <tst-string-rtm.h>
|
||
+
|
||
+#define LOOP 3000
|
||
+#define STRING_SIZE 1024
|
||
+char string1[STRING_SIZE];
|
||
+
|
||
+__attribute__ ((noinline, noclone))
|
||
+static int
|
||
+prepare (void)
|
||
+{
|
||
+ memset (string1, 'a', STRING_SIZE - 1);
|
||
+ string1[STRING_SIZE - 100] = '\0';
|
||
+ size_t len = strlen (string1);
|
||
+ if (len == STRING_SIZE - 100)
|
||
+ return EXIT_SUCCESS;
|
||
+ else
|
||
+ return EXIT_FAILURE;
|
||
+}
|
||
+
|
||
+__attribute__ ((noinline, noclone))
|
||
+static int
|
||
+function (void)
|
||
+{
|
||
+ size_t len = strlen (string1);
|
||
+ if (len == STRING_SIZE - 100)
|
||
+ return 0;
|
||
+ else
|
||
+ return 1;
|
||
+}
|
||
+
|
||
+static int
|
||
+do_test (void)
|
||
+{
|
||
+ return do_test_1 ("strlen", LOOP, prepare, function);
|
||
+}
|
||
diff --git a/sysdeps/x86/tst-strncmp-rtm.c b/sysdeps/x86/tst-strncmp-rtm.c
|
||
new file mode 100644
|
||
index 0000000000..236ad951b5
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86/tst-strncmp-rtm.c
|
||
@@ -0,0 +1,52 @@
|
||
+/* Test case for strncmp inside a transactionally executing RTM region.
|
||
+ Copyright (C) 2021 Free Software Foundation, Inc.
|
||
+ This file is part of the GNU C Library.
|
||
+
|
||
+ The GNU C Library is free software; you can redistribute it and/or
|
||
+ modify it under the terms of the GNU Lesser General Public
|
||
+ License as published by the Free Software Foundation; either
|
||
+ version 2.1 of the License, or (at your option) any later version.
|
||
+
|
||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
+ Lesser General Public License for more details.
|
||
+
|
||
+ You should have received a copy of the GNU Lesser General Public
|
||
+ License along with the GNU C Library; if not, see
|
||
+ <https://www.gnu.org/licenses/>. */
|
||
+
|
||
+#include <tst-string-rtm.h>
|
||
+
|
||
+#define LOOP 3000
|
||
+#define STRING_SIZE 1024
|
||
+char string1[STRING_SIZE];
|
||
+char string2[STRING_SIZE];
|
||
+
|
||
+__attribute__ ((noinline, noclone))
|
||
+static int
|
||
+prepare (void)
|
||
+{
|
||
+ memset (string1, 'a', STRING_SIZE - 1);
|
||
+ memset (string2, 'a', STRING_SIZE - 1);
|
||
+ if (strncmp (string1, string2, STRING_SIZE) == 0)
|
||
+ return EXIT_SUCCESS;
|
||
+ else
|
||
+ return EXIT_FAILURE;
|
||
+}
|
||
+
|
||
+__attribute__ ((noinline, noclone))
|
||
+static int
|
||
+function (void)
|
||
+{
|
||
+ if (strncmp (string1, string2, STRING_SIZE) == 0)
|
||
+ return 0;
|
||
+ else
|
||
+ return 1;
|
||
+}
|
||
+
|
||
+static int
|
||
+do_test (void)
|
||
+{
|
||
+ return do_test_1 ("strncmp", LOOP, prepare, function);
|
||
+}
|
||
diff --git a/sysdeps/x86/tst-strrchr-rtm.c b/sysdeps/x86/tst-strrchr-rtm.c
|
||
new file mode 100644
|
||
index 0000000000..e32bfaf5f5
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86/tst-strrchr-rtm.c
|
||
@@ -0,0 +1,53 @@
|
||
+/* Test case for strrchr inside a transactionally executing RTM region.
|
||
+ Copyright (C) 2021 Free Software Foundation, Inc.
|
||
+ This file is part of the GNU C Library.
|
||
+
|
||
+ The GNU C Library is free software; you can redistribute it and/or
|
||
+ modify it under the terms of the GNU Lesser General Public
|
||
+ License as published by the Free Software Foundation; either
|
||
+ version 2.1 of the License, or (at your option) any later version.
|
||
+
|
||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
+ Lesser General Public License for more details.
|
||
+
|
||
+ You should have received a copy of the GNU Lesser General Public
|
||
+ License along with the GNU C Library; if not, see
|
||
+ <https://www.gnu.org/licenses/>. */
|
||
+
|
||
+#include <tst-string-rtm.h>
|
||
+
|
||
+#define LOOP 3000
|
||
+#define STRING_SIZE 1024
|
||
+char string1[STRING_SIZE];
|
||
+
|
||
+__attribute__ ((noinline, noclone))
|
||
+static int
|
||
+prepare (void)
|
||
+{
|
||
+ memset (string1, 'a', STRING_SIZE - 1);
|
||
+ string1[STRING_SIZE - 100] = 'c';
|
||
+ char *p = strrchr (string1, 'c');
|
||
+ if (p == &string1[STRING_SIZE - 100])
|
||
+ return EXIT_SUCCESS;
|
||
+ else
|
||
+ return EXIT_FAILURE;
|
||
+}
|
||
+
|
||
+__attribute__ ((noinline, noclone))
|
||
+static int
|
||
+function (void)
|
||
+{
|
||
+ char *p = strrchr (string1, 'c');
|
||
+ if (p == &string1[STRING_SIZE - 100])
|
||
+ return 0;
|
||
+ else
|
||
+ return 1;
|
||
+}
|
||
+
|
||
+static int
|
||
+do_test (void)
|
||
+{
|
||
+ return do_test_1 ("strrchr", LOOP, prepare, function);
|
||
+}
|
||
diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
|
||
index 42b97c5cc7..020044da80 100644
|
||
--- a/sysdeps/x86_64/Makefile
|
||
+++ b/sysdeps/x86_64/Makefile
|
||
@@ -20,6 +20,8 @@ endif
|
||
ifeq ($(subdir),string)
|
||
sysdep_routines += strcasecmp_l-nonascii strncase_l-nonascii
|
||
gen-as-const-headers += locale-defines.sym
|
||
+tests += \
|
||
+ tst-rsi-strlen
|
||
endif
|
||
|
||
ifeq ($(subdir),elf)
|
||
@@ -150,6 +152,11 @@ ifeq ($(subdir),csu)
|
||
gen-as-const-headers += tlsdesc.sym rtld-offsets.sym
|
||
endif
|
||
|
||
+ifeq ($(subdir),wcsmbs)
|
||
+tests += \
|
||
+ tst-rsi-wcslen
|
||
+endif
|
||
+
|
||
$(objpfx)x86_64/tst-x86_64mod-1.os: $(objpfx)tst-x86_64mod-1.os
|
||
$(make-target-directory)
|
||
rm -f $@
|
||
diff --git a/sysdeps/x86_64/configure b/sysdeps/x86_64/configure
|
||
old mode 100644
|
||
new mode 100755
|
||
index 84f82c2406..fc1840e23f
|
||
--- a/sysdeps/x86_64/configure
|
||
+++ b/sysdeps/x86_64/configure
|
||
@@ -107,39 +107,6 @@ if test x"$build_mathvec" = xnotset; then
|
||
build_mathvec=yes
|
||
fi
|
||
|
||
-if test "$static_pie" = yes; then
|
||
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for linker static PIE support" >&5
|
||
-$as_echo_n "checking for linker static PIE support... " >&6; }
|
||
-if ${libc_cv_ld_static_pie+:} false; then :
|
||
- $as_echo_n "(cached) " >&6
|
||
-else
|
||
- cat > conftest.s <<\EOF
|
||
- .text
|
||
- .global _start
|
||
- .weak foo
|
||
-_start:
|
||
- leaq foo(%rip), %rax
|
||
-EOF
|
||
- libc_cv_pie_option="-Wl,-pie"
|
||
- if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -nostartfiles -nostdlib $no_ssp $libc_cv_pie_option -o conftest conftest.s 1>&5'
|
||
- { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
|
||
- (eval $ac_try) 2>&5
|
||
- ac_status=$?
|
||
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
|
||
- test $ac_status = 0; }; }; then
|
||
- libc_cv_ld_static_pie=yes
|
||
- else
|
||
- libc_cv_ld_static_pie=no
|
||
- fi
|
||
-rm -f conftest*
|
||
-fi
|
||
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_ld_static_pie" >&5
|
||
-$as_echo "$libc_cv_ld_static_pie" >&6; }
|
||
- if test "$libc_cv_ld_static_pie" != yes; then
|
||
- as_fn_error $? "linker support for static PIE needed" "$LINENO" 5
|
||
- fi
|
||
-fi
|
||
-
|
||
$as_echo "#define PI_STATIC_AND_HIDDEN 1" >>confdefs.h
|
||
|
||
|
||
diff --git a/sysdeps/x86_64/configure.ac b/sysdeps/x86_64/configure.ac
|
||
index cdaba0c075..611a7d9ba3 100644
|
||
--- a/sysdeps/x86_64/configure.ac
|
||
+++ b/sysdeps/x86_64/configure.ac
|
||
@@ -53,31 +53,6 @@ if test x"$build_mathvec" = xnotset; then
|
||
build_mathvec=yes
|
||
fi
|
||
|
||
-dnl Check if linker supports static PIE with the fix for
|
||
-dnl
|
||
-dnl https://sourceware.org/bugzilla/show_bug.cgi?id=21782
|
||
-dnl
|
||
-if test "$static_pie" = yes; then
|
||
- AC_CACHE_CHECK(for linker static PIE support, libc_cv_ld_static_pie, [dnl
|
||
-cat > conftest.s <<\EOF
|
||
- .text
|
||
- .global _start
|
||
- .weak foo
|
||
-_start:
|
||
- leaq foo(%rip), %rax
|
||
-EOF
|
||
- libc_cv_pie_option="-Wl,-pie"
|
||
- if AC_TRY_COMMAND(${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -nostartfiles -nostdlib $no_ssp $libc_cv_pie_option -o conftest conftest.s 1>&AS_MESSAGE_LOG_FD); then
|
||
- libc_cv_ld_static_pie=yes
|
||
- else
|
||
- libc_cv_ld_static_pie=no
|
||
- fi
|
||
-rm -f conftest*])
|
||
- if test "$libc_cv_ld_static_pie" != yes; then
|
||
- AC_MSG_ERROR([linker support for static PIE needed])
|
||
- fi
|
||
-fi
|
||
-
|
||
dnl It is always possible to access static and hidden symbols in an
|
||
dnl position independent way.
|
||
AC_DEFINE(PI_STATIC_AND_HIDDEN)
|
||
diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h
|
||
index ca73d8fef9..363a749cb2 100644
|
||
--- a/sysdeps/x86_64/dl-machine.h
|
||
+++ b/sysdeps/x86_64/dl-machine.h
|
||
@@ -315,16 +315,22 @@ elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc,
|
||
{
|
||
# ifndef RTLD_BOOTSTRAP
|
||
if (sym_map != map
|
||
- && sym_map->l_type != lt_executable
|
||
&& !sym_map->l_relocated)
|
||
{
|
||
const char *strtab
|
||
= (const char *) D_PTR (map, l_info[DT_STRTAB]);
|
||
- _dl_error_printf ("\
|
||
+ if (sym_map->l_type == lt_executable)
|
||
+ _dl_fatal_printf ("\
|
||
+%s: IFUNC symbol '%s' referenced in '%s' is defined in the executable \
|
||
+and creates an unsatisfiable circular dependency.\n",
|
||
+ RTLD_PROGNAME, strtab + refsym->st_name,
|
||
+ map->l_name);
|
||
+ else
|
||
+ _dl_error_printf ("\
|
||
%s: Relink `%s' with `%s' for IFUNC symbol `%s'\n",
|
||
- RTLD_PROGNAME, map->l_name,
|
||
- sym_map->l_name,
|
||
- strtab + refsym->st_name);
|
||
+ RTLD_PROGNAME, map->l_name,
|
||
+ sym_map->l_name,
|
||
+ strtab + refsym->st_name);
|
||
}
|
||
# endif
|
||
value = ((ElfW(Addr) (*) (void)) value) ();
|
||
diff --git a/sysdeps/x86_64/fpu/multiarch/ifunc-fma4.h b/sysdeps/x86_64/fpu/multiarch/ifunc-fma4.h
|
||
index 7659758972..e5fd5ac9cb 100644
|
||
--- a/sysdeps/x86_64/fpu/multiarch/ifunc-fma4.h
|
||
+++ b/sysdeps/x86_64/fpu/multiarch/ifunc-fma4.h
|
||
@@ -32,7 +32,7 @@ IFUNC_SELECTOR (void)
|
||
&& CPU_FEATURE_USABLE_P (cpu_features, AVX2))
|
||
return OPTIMIZE (fma);
|
||
|
||
- if (CPU_FEATURE_USABLE_P (cpu_features, FMA))
|
||
+ if (CPU_FEATURE_USABLE_P (cpu_features, FMA4))
|
||
return OPTIMIZE (fma4);
|
||
|
||
return OPTIMIZE (sse2);
|
||
diff --git a/sysdeps/x86_64/memchr.S b/sysdeps/x86_64/memchr.S
|
||
index a5c879d2af..070e5ef90b 100644
|
||
--- a/sysdeps/x86_64/memchr.S
|
||
+++ b/sysdeps/x86_64/memchr.S
|
||
@@ -21,9 +21,11 @@
|
||
#ifdef USE_AS_WMEMCHR
|
||
# define MEMCHR wmemchr
|
||
# define PCMPEQ pcmpeqd
|
||
+# define CHAR_PER_VEC 4
|
||
#else
|
||
# define MEMCHR memchr
|
||
# define PCMPEQ pcmpeqb
|
||
+# define CHAR_PER_VEC 16
|
||
#endif
|
||
|
||
/* fast SSE2 version with using pmaxub and 64 byte loop */
|
||
@@ -33,15 +35,14 @@ ENTRY(MEMCHR)
|
||
movd %esi, %xmm1
|
||
mov %edi, %ecx
|
||
|
||
+#ifdef __ILP32__
|
||
+ /* Clear the upper 32 bits. */
|
||
+ movl %edx, %edx
|
||
+#endif
|
||
#ifdef USE_AS_WMEMCHR
|
||
test %RDX_LP, %RDX_LP
|
||
jz L(return_null)
|
||
- shl $2, %RDX_LP
|
||
#else
|
||
-# ifdef __ILP32__
|
||
- /* Clear the upper 32 bits. */
|
||
- movl %edx, %edx
|
||
-# endif
|
||
punpcklbw %xmm1, %xmm1
|
||
test %RDX_LP, %RDX_LP
|
||
jz L(return_null)
|
||
@@ -60,13 +61,16 @@ ENTRY(MEMCHR)
|
||
test %eax, %eax
|
||
|
||
jnz L(matches_1)
|
||
- sub $16, %rdx
|
||
+ sub $CHAR_PER_VEC, %rdx
|
||
jbe L(return_null)
|
||
add $16, %rdi
|
||
and $15, %ecx
|
||
and $-16, %rdi
|
||
+#ifdef USE_AS_WMEMCHR
|
||
+ shr $2, %ecx
|
||
+#endif
|
||
add %rcx, %rdx
|
||
- sub $64, %rdx
|
||
+ sub $(CHAR_PER_VEC * 4), %rdx
|
||
jbe L(exit_loop)
|
||
jmp L(loop_prolog)
|
||
|
||
@@ -77,16 +81,21 @@ L(crosscache):
|
||
movdqa (%rdi), %xmm0
|
||
|
||
PCMPEQ %xmm1, %xmm0
|
||
-/* Check if there is a match. */
|
||
+ /* Check if there is a match. */
|
||
pmovmskb %xmm0, %eax
|
||
-/* Remove the leading bytes. */
|
||
+ /* Remove the leading bytes. */
|
||
sar %cl, %eax
|
||
test %eax, %eax
|
||
je L(unaligned_no_match)
|
||
-/* Check which byte is a match. */
|
||
+ /* Check which byte is a match. */
|
||
bsf %eax, %eax
|
||
-
|
||
+#ifdef USE_AS_WMEMCHR
|
||
+ mov %eax, %esi
|
||
+ shr $2, %esi
|
||
+ sub %rsi, %rdx
|
||
+#else
|
||
sub %rax, %rdx
|
||
+#endif
|
||
jbe L(return_null)
|
||
add %rdi, %rax
|
||
add %rcx, %rax
|
||
@@ -94,15 +103,18 @@ L(crosscache):
|
||
|
||
.p2align 4
|
||
L(unaligned_no_match):
|
||
- /* "rcx" is less than 16. Calculate "rdx + rcx - 16" by using
|
||
+ /* "rcx" is less than 16. Calculate "rdx + rcx - 16" by using
|
||
"rdx - (16 - rcx)" instead of "(rdx + rcx) - 16" to void
|
||
possible addition overflow. */
|
||
neg %rcx
|
||
add $16, %rcx
|
||
+#ifdef USE_AS_WMEMCHR
|
||
+ shr $2, %ecx
|
||
+#endif
|
||
sub %rcx, %rdx
|
||
jbe L(return_null)
|
||
add $16, %rdi
|
||
- sub $64, %rdx
|
||
+ sub $(CHAR_PER_VEC * 4), %rdx
|
||
jbe L(exit_loop)
|
||
|
||
.p2align 4
|
||
@@ -135,7 +147,7 @@ L(loop_prolog):
|
||
test $0x3f, %rdi
|
||
jz L(align64_loop)
|
||
|
||
- sub $64, %rdx
|
||
+ sub $(CHAR_PER_VEC * 4), %rdx
|
||
jbe L(exit_loop)
|
||
|
||
movdqa (%rdi), %xmm0
|
||
@@ -167,11 +179,14 @@ L(loop_prolog):
|
||
mov %rdi, %rcx
|
||
and $-64, %rdi
|
||
and $63, %ecx
|
||
+#ifdef USE_AS_WMEMCHR
|
||
+ shr $2, %ecx
|
||
+#endif
|
||
add %rcx, %rdx
|
||
|
||
.p2align 4
|
||
L(align64_loop):
|
||
- sub $64, %rdx
|
||
+ sub $(CHAR_PER_VEC * 4), %rdx
|
||
jbe L(exit_loop)
|
||
movdqa (%rdi), %xmm0
|
||
movdqa 16(%rdi), %xmm2
|
||
@@ -218,7 +233,7 @@ L(align64_loop):
|
||
|
||
.p2align 4
|
||
L(exit_loop):
|
||
- add $32, %edx
|
||
+ add $(CHAR_PER_VEC * 2), %edx
|
||
jle L(exit_loop_32)
|
||
|
||
movdqa (%rdi), %xmm0
|
||
@@ -238,7 +253,7 @@ L(exit_loop):
|
||
pmovmskb %xmm3, %eax
|
||
test %eax, %eax
|
||
jnz L(matches32_1)
|
||
- sub $16, %edx
|
||
+ sub $CHAR_PER_VEC, %edx
|
||
jle L(return_null)
|
||
|
||
PCMPEQ 48(%rdi), %xmm1
|
||
@@ -250,13 +265,13 @@ L(exit_loop):
|
||
|
||
.p2align 4
|
||
L(exit_loop_32):
|
||
- add $32, %edx
|
||
+ add $(CHAR_PER_VEC * 2), %edx
|
||
movdqa (%rdi), %xmm0
|
||
PCMPEQ %xmm1, %xmm0
|
||
pmovmskb %xmm0, %eax
|
||
test %eax, %eax
|
||
jnz L(matches_1)
|
||
- sub $16, %edx
|
||
+ sub $CHAR_PER_VEC, %edx
|
||
jbe L(return_null)
|
||
|
||
PCMPEQ 16(%rdi), %xmm1
|
||
@@ -293,7 +308,13 @@ L(matches32):
|
||
.p2align 4
|
||
L(matches_1):
|
||
bsf %eax, %eax
|
||
+#ifdef USE_AS_WMEMCHR
|
||
+ mov %eax, %esi
|
||
+ shr $2, %esi
|
||
+ sub %rsi, %rdx
|
||
+#else
|
||
sub %rax, %rdx
|
||
+#endif
|
||
jbe L(return_null)
|
||
add %rdi, %rax
|
||
ret
|
||
@@ -301,7 +322,13 @@ L(matches_1):
|
||
.p2align 4
|
||
L(matches16_1):
|
||
bsf %eax, %eax
|
||
+#ifdef USE_AS_WMEMCHR
|
||
+ mov %eax, %esi
|
||
+ shr $2, %esi
|
||
+ sub %rsi, %rdx
|
||
+#else
|
||
sub %rax, %rdx
|
||
+#endif
|
||
jbe L(return_null)
|
||
lea 16(%rdi, %rax), %rax
|
||
ret
|
||
@@ -309,7 +336,13 @@ L(matches16_1):
|
||
.p2align 4
|
||
L(matches32_1):
|
||
bsf %eax, %eax
|
||
+#ifdef USE_AS_WMEMCHR
|
||
+ mov %eax, %esi
|
||
+ shr $2, %esi
|
||
+ sub %rsi, %rdx
|
||
+#else
|
||
sub %rax, %rdx
|
||
+#endif
|
||
jbe L(return_null)
|
||
lea 32(%rdi, %rax), %rax
|
||
ret
|
||
@@ -317,7 +350,13 @@ L(matches32_1):
|
||
.p2align 4
|
||
L(matches48_1):
|
||
bsf %eax, %eax
|
||
+#ifdef USE_AS_WMEMCHR
|
||
+ mov %eax, %esi
|
||
+ shr $2, %esi
|
||
+ sub %rsi, %rdx
|
||
+#else
|
||
sub %rax, %rdx
|
||
+#endif
|
||
jbe L(return_null)
|
||
lea 48(%rdi, %rax), %rax
|
||
ret
|
||
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
|
||
index 395e432c09..da1446d731 100644
|
||
--- a/sysdeps/x86_64/multiarch/Makefile
|
||
+++ b/sysdeps/x86_64/multiarch/Makefile
|
||
@@ -43,7 +43,45 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c \
|
||
memmove-avx512-unaligned-erms \
|
||
memset-sse2-unaligned-erms \
|
||
memset-avx2-unaligned-erms \
|
||
- memset-avx512-unaligned-erms
|
||
+ memset-avx512-unaligned-erms \
|
||
+ memchr-avx2-rtm \
|
||
+ memcmp-avx2-movbe-rtm \
|
||
+ memmove-avx-unaligned-erms-rtm \
|
||
+ memrchr-avx2-rtm \
|
||
+ memset-avx2-unaligned-erms-rtm \
|
||
+ rawmemchr-avx2-rtm \
|
||
+ strchr-avx2-rtm \
|
||
+ strcmp-avx2-rtm \
|
||
+ strchrnul-avx2-rtm \
|
||
+ stpcpy-avx2-rtm \
|
||
+ stpncpy-avx2-rtm \
|
||
+ strcat-avx2-rtm \
|
||
+ strcpy-avx2-rtm \
|
||
+ strlen-avx2-rtm \
|
||
+ strncat-avx2-rtm \
|
||
+ strncmp-avx2-rtm \
|
||
+ strncpy-avx2-rtm \
|
||
+ strnlen-avx2-rtm \
|
||
+ strrchr-avx2-rtm \
|
||
+ memchr-evex \
|
||
+ memcmp-evex-movbe \
|
||
+ memmove-evex-unaligned-erms \
|
||
+ memrchr-evex \
|
||
+ memset-evex-unaligned-erms \
|
||
+ rawmemchr-evex \
|
||
+ stpcpy-evex \
|
||
+ stpncpy-evex \
|
||
+ strcat-evex \
|
||
+ strchr-evex \
|
||
+ strchrnul-evex \
|
||
+ strcmp-evex \
|
||
+ strcpy-evex \
|
||
+ strlen-evex \
|
||
+ strncat-evex \
|
||
+ strncmp-evex \
|
||
+ strncpy-evex \
|
||
+ strnlen-evex \
|
||
+ strrchr-evex
|
||
CFLAGS-varshift.c += -msse4
|
||
CFLAGS-strcspn-c.c += -msse4
|
||
CFLAGS-strpbrk-c.c += -msse4
|
||
@@ -59,8 +97,24 @@ sysdep_routines += wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c \
|
||
wcscpy-ssse3 wcscpy-c \
|
||
wcschr-sse2 wcschr-avx2 \
|
||
wcsrchr-sse2 wcsrchr-avx2 \
|
||
- wcsnlen-sse4_1 wcsnlen-c \
|
||
- wcslen-sse2 wcslen-avx2 wcsnlen-avx2
|
||
+ wcslen-sse2 wcslen-sse4_1 wcslen-avx2 \
|
||
+ wcsnlen-c wcsnlen-sse4_1 wcsnlen-avx2 \
|
||
+ wcschr-avx2-rtm \
|
||
+ wcscmp-avx2-rtm \
|
||
+ wcslen-avx2-rtm \
|
||
+ wcsncmp-avx2-rtm \
|
||
+ wcsnlen-avx2-rtm \
|
||
+ wcsrchr-avx2-rtm \
|
||
+ wmemchr-avx2-rtm \
|
||
+ wmemcmp-avx2-movbe-rtm \
|
||
+ wcschr-evex \
|
||
+ wcscmp-evex \
|
||
+ wcslen-evex \
|
||
+ wcsncmp-evex \
|
||
+ wcsnlen-evex \
|
||
+ wcsrchr-evex \
|
||
+ wmemchr-evex \
|
||
+ wmemcmp-evex-movbe
|
||
endif
|
||
|
||
ifeq ($(subdir),debug)
|
||
diff --git a/sysdeps/x86_64/multiarch/ifunc-avx2.h b/sysdeps/x86_64/multiarch/ifunc-avx2.h
|
||
index f4e311d470..f450c786f0 100644
|
||
--- a/sysdeps/x86_64/multiarch/ifunc-avx2.h
|
||
+++ b/sysdeps/x86_64/multiarch/ifunc-avx2.h
|
||
@@ -21,16 +21,28 @@
|
||
|
||
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
|
||
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
|
||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
|
||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
|
||
|
||
static inline void *
|
||
IFUNC_SELECTOR (void)
|
||
{
|
||
const struct cpu_features* cpu_features = __get_cpu_features ();
|
||
|
||
- if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)
|
||
- && CPU_FEATURE_USABLE_P (cpu_features, AVX2)
|
||
+ if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
|
||
&& CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
|
||
- return OPTIMIZE (avx2);
|
||
+ {
|
||
+ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
|
||
+ && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
|
||
+ && CPU_FEATURE_USABLE_P (cpu_features, BMI2))
|
||
+ return OPTIMIZE (evex);
|
||
+
|
||
+ if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
|
||
+ return OPTIMIZE (avx2_rtm);
|
||
+
|
||
+ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
|
||
+ return OPTIMIZE (avx2);
|
||
+ }
|
||
|
||
return OPTIMIZE (sse2);
|
||
}
|
||
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
|
||
index f93ec39d98..920e64241e 100644
|
||
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
|
||
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
|
||
@@ -43,6 +43,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
IFUNC_IMPL_ADD (array, i, memchr,
|
||
CPU_FEATURE_USABLE (AVX2),
|
||
__memchr_avx2)
|
||
+ IFUNC_IMPL_ADD (array, i, memchr,
|
||
+ (CPU_FEATURE_USABLE (AVX2)
|
||
+ && CPU_FEATURE_USABLE (RTM)),
|
||
+ __memchr_avx2_rtm)
|
||
+ IFUNC_IMPL_ADD (array, i, memchr,
|
||
+ (CPU_FEATURE_USABLE (AVX512VL)
|
||
+ && CPU_FEATURE_USABLE (AVX512BW)
|
||
+ && CPU_FEATURE_USABLE (BMI2)),
|
||
+ __memchr_evex)
|
||
IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_sse2))
|
||
|
||
/* Support sysdeps/x86_64/multiarch/memcmp.c. */
|
||
@@ -51,6 +60,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
(CPU_FEATURE_USABLE (AVX2)
|
||
&& CPU_FEATURE_USABLE (MOVBE)),
|
||
__memcmp_avx2_movbe)
|
||
+ IFUNC_IMPL_ADD (array, i, memcmp,
|
||
+ (CPU_FEATURE_USABLE (AVX2)
|
||
+ && CPU_FEATURE_USABLE (MOVBE)
|
||
+ && CPU_FEATURE_USABLE (RTM)),
|
||
+ __memcmp_avx2_movbe_rtm)
|
||
+ IFUNC_IMPL_ADD (array, i, memcmp,
|
||
+ (CPU_FEATURE_USABLE (AVX512VL)
|
||
+ && CPU_FEATURE_USABLE (AVX512BW)
|
||
+ && CPU_FEATURE_USABLE (MOVBE)),
|
||
+ __memcmp_evex_movbe)
|
||
IFUNC_IMPL_ADD (array, i, memcmp, CPU_FEATURE_USABLE (SSE4_1),
|
||
__memcmp_sse4_1)
|
||
IFUNC_IMPL_ADD (array, i, memcmp, CPU_FEATURE_USABLE (SSSE3),
|
||
@@ -64,10 +83,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
CPU_FEATURE_USABLE (AVX512F),
|
||
__memmove_chk_avx512_no_vzeroupper)
|
||
IFUNC_IMPL_ADD (array, i, __memmove_chk,
|
||
- CPU_FEATURE_USABLE (AVX512F),
|
||
+ CPU_FEATURE_USABLE (AVX512VL),
|
||
__memmove_chk_avx512_unaligned)
|
||
IFUNC_IMPL_ADD (array, i, __memmove_chk,
|
||
- CPU_FEATURE_USABLE (AVX512F),
|
||
+ CPU_FEATURE_USABLE (AVX512VL),
|
||
__memmove_chk_avx512_unaligned_erms)
|
||
IFUNC_IMPL_ADD (array, i, __memmove_chk,
|
||
CPU_FEATURE_USABLE (AVX),
|
||
@@ -75,6 +94,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
IFUNC_IMPL_ADD (array, i, __memmove_chk,
|
||
CPU_FEATURE_USABLE (AVX),
|
||
__memmove_chk_avx_unaligned_erms)
|
||
+ IFUNC_IMPL_ADD (array, i, __memmove_chk,
|
||
+ (CPU_FEATURE_USABLE (AVX)
|
||
+ && CPU_FEATURE_USABLE (RTM)),
|
||
+ __memmove_chk_avx_unaligned_rtm)
|
||
+ IFUNC_IMPL_ADD (array, i, __memmove_chk,
|
||
+ (CPU_FEATURE_USABLE (AVX)
|
||
+ && CPU_FEATURE_USABLE (RTM)),
|
||
+ __memmove_chk_avx_unaligned_erms_rtm)
|
||
+ IFUNC_IMPL_ADD (array, i, __memmove_chk,
|
||
+ CPU_FEATURE_USABLE (AVX512VL),
|
||
+ __memmove_chk_evex_unaligned)
|
||
+ IFUNC_IMPL_ADD (array, i, __memmove_chk,
|
||
+ CPU_FEATURE_USABLE (AVX512VL),
|
||
+ __memmove_chk_evex_unaligned_erms)
|
||
IFUNC_IMPL_ADD (array, i, __memmove_chk,
|
||
CPU_FEATURE_USABLE (SSSE3),
|
||
__memmove_chk_ssse3_back)
|
||
@@ -97,14 +130,28 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
IFUNC_IMPL_ADD (array, i, memmove,
|
||
CPU_FEATURE_USABLE (AVX),
|
||
__memmove_avx_unaligned_erms)
|
||
+ IFUNC_IMPL_ADD (array, i, memmove,
|
||
+ (CPU_FEATURE_USABLE (AVX)
|
||
+ && CPU_FEATURE_USABLE (RTM)),
|
||
+ __memmove_avx_unaligned_rtm)
|
||
+ IFUNC_IMPL_ADD (array, i, memmove,
|
||
+ (CPU_FEATURE_USABLE (AVX)
|
||
+ && CPU_FEATURE_USABLE (RTM)),
|
||
+ __memmove_avx_unaligned_erms_rtm)
|
||
+ IFUNC_IMPL_ADD (array, i, memmove,
|
||
+ CPU_FEATURE_USABLE (AVX512VL),
|
||
+ __memmove_evex_unaligned)
|
||
+ IFUNC_IMPL_ADD (array, i, memmove,
|
||
+ CPU_FEATURE_USABLE (AVX512VL),
|
||
+ __memmove_evex_unaligned_erms)
|
||
IFUNC_IMPL_ADD (array, i, memmove,
|
||
CPU_FEATURE_USABLE (AVX512F),
|
||
__memmove_avx512_no_vzeroupper)
|
||
IFUNC_IMPL_ADD (array, i, memmove,
|
||
- CPU_FEATURE_USABLE (AVX512F),
|
||
+ CPU_FEATURE_USABLE (AVX512VL),
|
||
__memmove_avx512_unaligned)
|
||
IFUNC_IMPL_ADD (array, i, memmove,
|
||
- CPU_FEATURE_USABLE (AVX512F),
|
||
+ CPU_FEATURE_USABLE (AVX512VL),
|
||
__memmove_avx512_unaligned_erms)
|
||
IFUNC_IMPL_ADD (array, i, memmove, CPU_FEATURE_USABLE (SSSE3),
|
||
__memmove_ssse3_back)
|
||
@@ -121,6 +168,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
IFUNC_IMPL_ADD (array, i, memrchr,
|
||
CPU_FEATURE_USABLE (AVX2),
|
||
__memrchr_avx2)
|
||
+ IFUNC_IMPL_ADD (array, i, memrchr,
|
||
+ (CPU_FEATURE_USABLE (AVX2)
|
||
+ && CPU_FEATURE_USABLE (RTM)),
|
||
+ __memrchr_avx2_rtm)
|
||
+ IFUNC_IMPL_ADD (array, i, memrchr,
|
||
+ (CPU_FEATURE_USABLE (AVX512VL)
|
||
+ && CPU_FEATURE_USABLE (AVX512BW)),
|
||
+ __memrchr_evex)
|
||
+
|
||
IFUNC_IMPL_ADD (array, i, memrchr, 1, __memrchr_sse2))
|
||
|
||
#ifdef SHARED
|
||
@@ -139,10 +195,28 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
CPU_FEATURE_USABLE (AVX2),
|
||
__memset_chk_avx2_unaligned_erms)
|
||
IFUNC_IMPL_ADD (array, i, __memset_chk,
|
||
- CPU_FEATURE_USABLE (AVX512F),
|
||
+ (CPU_FEATURE_USABLE (AVX2)
|
||
+ && CPU_FEATURE_USABLE (RTM)),
|
||
+ __memset_chk_avx2_unaligned_rtm)
|
||
+ IFUNC_IMPL_ADD (array, i, __memset_chk,
|
||
+ (CPU_FEATURE_USABLE (AVX2)
|
||
+ && CPU_FEATURE_USABLE (RTM)),
|
||
+ __memset_chk_avx2_unaligned_erms_rtm)
|
||
+ IFUNC_IMPL_ADD (array, i, __memset_chk,
|
||
+ (CPU_FEATURE_USABLE (AVX512VL)
|
||
+ && CPU_FEATURE_USABLE (AVX512BW)),
|
||
+ __memset_chk_evex_unaligned)
|
||
+ IFUNC_IMPL_ADD (array, i, __memset_chk,
|
||
+ (CPU_FEATURE_USABLE (AVX512VL)
|
||
+ && CPU_FEATURE_USABLE (AVX512BW)),
|
||
+ __memset_chk_evex_unaligned_erms)
|
||
+ IFUNC_IMPL_ADD (array, i, __memset_chk,
|
||
+ (CPU_FEATURE_USABLE (AVX512VL)
|
||
+ && CPU_FEATURE_USABLE (AVX512BW)),
|
||
__memset_chk_avx512_unaligned_erms)
|
||
IFUNC_IMPL_ADD (array, i, __memset_chk,
|
||
- CPU_FEATURE_USABLE (AVX512F),
|
||
+ (CPU_FEATURE_USABLE (AVX512VL)
|
||
+ && CPU_FEATURE_USABLE (AVX512BW)),
|
||
__memset_chk_avx512_unaligned)
|
||
IFUNC_IMPL_ADD (array, i, __memset_chk,
|
||
CPU_FEATURE_USABLE (AVX512F),
|
||
@@ -164,10 +238,28 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
CPU_FEATURE_USABLE (AVX2),
|
||
__memset_avx2_unaligned_erms)
|
||
IFUNC_IMPL_ADD (array, i, memset,
|
||
- CPU_FEATURE_USABLE (AVX512F),
|
||
+ (CPU_FEATURE_USABLE (AVX2)
|
||
+ && CPU_FEATURE_USABLE (RTM)),
|
||
+ __memset_avx2_unaligned_rtm)
|
||
+ IFUNC_IMPL_ADD (array, i, memset,
|
||
+ (CPU_FEATURE_USABLE (AVX2)
|
||
+ && CPU_FEATURE_USABLE (RTM)),
|
||
+ __memset_avx2_unaligned_erms_rtm)
|
||
+ IFUNC_IMPL_ADD (array, i, memset,
|
||
+ (CPU_FEATURE_USABLE (AVX512VL)
|
||
+ && CPU_FEATURE_USABLE (AVX512BW)),
|
||
+ __memset_evex_unaligned)
|
||
+ IFUNC_IMPL_ADD (array, i, memset,
|
||
+ (CPU_FEATURE_USABLE (AVX512VL)
|
||
+ && CPU_FEATURE_USABLE (AVX512BW)),
|
||
+ __memset_evex_unaligned_erms)
|
||
+ IFUNC_IMPL_ADD (array, i, memset,
|
||
+ (CPU_FEATURE_USABLE (AVX512VL)
|
||
+ && CPU_FEATURE_USABLE (AVX512BW)),
|
||
__memset_avx512_unaligned_erms)
|
||
IFUNC_IMPL_ADD (array, i, memset,
|
||
- CPU_FEATURE_USABLE (AVX512F),
|
||
+ (CPU_FEATURE_USABLE (AVX512VL)
|
||
+ && CPU_FEATURE_USABLE (AVX512BW)),
|
||
__memset_avx512_unaligned)
|
||
IFUNC_IMPL_ADD (array, i, memset,
|
||
CPU_FEATURE_USABLE (AVX512F),
|
||
@@ -179,20 +271,51 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
IFUNC_IMPL_ADD (array, i, rawmemchr,
|
||
CPU_FEATURE_USABLE (AVX2),
|
||
__rawmemchr_avx2)
|
||
+ IFUNC_IMPL_ADD (array, i, rawmemchr,
|
||
+ (CPU_FEATURE_USABLE (AVX2)
|
||
+ && CPU_FEATURE_USABLE (RTM)),
|
||
+ __rawmemchr_avx2_rtm)
|
||
+ IFUNC_IMPL_ADD (array, i, rawmemchr,
|
||
+ (CPU_FEATURE_USABLE (AVX512VL)
|
||
+ && CPU_FEATURE_USABLE (AVX512BW)
|
||
+ && CPU_FEATURE_USABLE (BMI2)),
|
||
+ __rawmemchr_evex)
|
||
IFUNC_IMPL_ADD (array, i, rawmemchr, 1, __rawmemchr_sse2))
|
||
|
||
/* Support sysdeps/x86_64/multiarch/strlen.c. */
|
||
IFUNC_IMPL (i, name, strlen,
|
||
IFUNC_IMPL_ADD (array, i, strlen,
|
||
- CPU_FEATURE_USABLE (AVX2),
|
||
+ (CPU_FEATURE_USABLE (AVX2)
|
||
+ && CPU_FEATURE_USABLE (BMI2)),
|
||
__strlen_avx2)
|
||
+ IFUNC_IMPL_ADD (array, i, strlen,
|
||
+ (CPU_FEATURE_USABLE (AVX2)
|
||
+ && CPU_FEATURE_USABLE (BMI2)
|
||
+ && CPU_FEATURE_USABLE (RTM)),
|
||
+ __strlen_avx2_rtm)
|
||
+ IFUNC_IMPL_ADD (array, i, strlen,
|
||
+ (CPU_FEATURE_USABLE (AVX512VL)
|
||
+ && CPU_FEATURE_USABLE (AVX512BW)
|
||
+ && CPU_FEATURE_USABLE (BMI2)),
|
||
+ __strlen_evex)
|
||
IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_sse2))
|
||
|
||
/* Support sysdeps/x86_64/multiarch/strnlen.c. */
|
||
IFUNC_IMPL (i, name, strnlen,
|
||
IFUNC_IMPL_ADD (array, i, strnlen,
|
||
- CPU_FEATURE_USABLE (AVX2),
|
||
+ (CPU_FEATURE_USABLE (AVX2)
|
||
+ && CPU_FEATURE_USABLE (BMI2)),
|
||
__strnlen_avx2)
|
||
+ IFUNC_IMPL_ADD (array, i, strnlen,
|
||
+ (CPU_FEATURE_USABLE (AVX2)
|
||
+ && CPU_FEATURE_USABLE (BMI2)
|
||
+ && CPU_FEATURE_USABLE (RTM)),
|
||
+ __strnlen_avx2_rtm)
|
||
+ IFUNC_IMPL_ADD (array, i, strnlen,
|
||
+ (CPU_FEATURE_USABLE (AVX512VL)
|
||
+ && CPU_FEATURE_USABLE (AVX512BW)
|
||
+ && CPU_FEATURE_USABLE (BMI2)),
|
||
+ __strnlen_evex)
|
||
IFUNC_IMPL_ADD (array, i, strnlen, 1, __strnlen_sse2))
|
||
|
||
/* Support sysdeps/x86_64/multiarch/stpncpy.c. */
|
||
@@ -201,6 +324,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
__stpncpy_ssse3)
|
||
IFUNC_IMPL_ADD (array, i, stpncpy, CPU_FEATURE_USABLE (AVX2),
|
||
__stpncpy_avx2)
|
||
+ IFUNC_IMPL_ADD (array, i, stpncpy,
|
||
+ (CPU_FEATURE_USABLE (AVX2)
|
||
+ && CPU_FEATURE_USABLE (RTM)),
|
||
+ __stpncpy_avx2_rtm)
|
||
+ IFUNC_IMPL_ADD (array, i, stpncpy,
|
||
+ (CPU_FEATURE_USABLE (AVX512VL)
|
||
+ && CPU_FEATURE_USABLE (AVX512BW)),
|
||
+ __stpncpy_evex)
|
||
IFUNC_IMPL_ADD (array, i, stpncpy, 1,
|
||
__stpncpy_sse2_unaligned)
|
||
IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_sse2))
|
||
@@ -211,6 +342,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
__stpcpy_ssse3)
|
||
IFUNC_IMPL_ADD (array, i, stpcpy, CPU_FEATURE_USABLE (AVX2),
|
||
__stpcpy_avx2)
|
||
+ IFUNC_IMPL_ADD (array, i, stpcpy,
|
||
+ (CPU_FEATURE_USABLE (AVX2)
|
||
+ && CPU_FEATURE_USABLE (RTM)),
|
||
+ __stpcpy_avx2_rtm)
|
||
+ IFUNC_IMPL_ADD (array, i, stpcpy,
|
||
+ (CPU_FEATURE_USABLE (AVX512VL)
|
||
+ && CPU_FEATURE_USABLE (AVX512BW)),
|
||
+ __stpcpy_evex)
|
||
IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_sse2_unaligned)
|
||
IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_sse2))
|
||
|
||
@@ -245,6 +384,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
IFUNC_IMPL (i, name, strcat,
|
||
IFUNC_IMPL_ADD (array, i, strcat, CPU_FEATURE_USABLE (AVX2),
|
||
__strcat_avx2)
|
||
+ IFUNC_IMPL_ADD (array, i, strcat,
|
||
+ (CPU_FEATURE_USABLE (AVX2)
|
||
+ && CPU_FEATURE_USABLE (RTM)),
|
||
+ __strcat_avx2_rtm)
|
||
+ IFUNC_IMPL_ADD (array, i, strcat,
|
||
+ (CPU_FEATURE_USABLE (AVX512VL)
|
||
+ && CPU_FEATURE_USABLE (AVX512BW)),
|
||
+ __strcat_evex)
|
||
IFUNC_IMPL_ADD (array, i, strcat, CPU_FEATURE_USABLE (SSSE3),
|
||
__strcat_ssse3)
|
||
IFUNC_IMPL_ADD (array, i, strcat, 1, __strcat_sse2_unaligned)
|
||
@@ -255,6 +402,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
IFUNC_IMPL_ADD (array, i, strchr,
|
||
CPU_FEATURE_USABLE (AVX2),
|
||
__strchr_avx2)
|
||
+ IFUNC_IMPL_ADD (array, i, strchr,
|
||
+ (CPU_FEATURE_USABLE (AVX2)
|
||
+ && CPU_FEATURE_USABLE (RTM)),
|
||
+ __strchr_avx2_rtm)
|
||
+ IFUNC_IMPL_ADD (array, i, strchr,
|
||
+ (CPU_FEATURE_USABLE (AVX512VL)
|
||
+ && CPU_FEATURE_USABLE (AVX512BW)
|
||
+ && CPU_FEATURE_USABLE (BMI2)),
|
||
+ __strchr_evex)
|
||
IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_sse2_no_bsf)
|
||
IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_sse2))
|
||
|
||
@@ -263,6 +419,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
IFUNC_IMPL_ADD (array, i, strchrnul,
|
||
CPU_FEATURE_USABLE (AVX2),
|
||
__strchrnul_avx2)
|
||
+ IFUNC_IMPL_ADD (array, i, strchrnul,
|
||
+ (CPU_FEATURE_USABLE (AVX2)
|
||
+ && CPU_FEATURE_USABLE (RTM)),
|
||
+ __strchrnul_avx2_rtm)
|
||
+ IFUNC_IMPL_ADD (array, i, strchrnul,
|
||
+ (CPU_FEATURE_USABLE (AVX512VL)
|
||
+ && CPU_FEATURE_USABLE (AVX512BW)
|
||
+ && CPU_FEATURE_USABLE (BMI2)),
|
||
+ __strchrnul_evex)
|
||
IFUNC_IMPL_ADD (array, i, strchrnul, 1, __strchrnul_sse2))
|
||
|
||
/* Support sysdeps/x86_64/multiarch/strrchr.c. */
|
||
@@ -270,6 +435,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
IFUNC_IMPL_ADD (array, i, strrchr,
|
||
CPU_FEATURE_USABLE (AVX2),
|
||
__strrchr_avx2)
|
||
+ IFUNC_IMPL_ADD (array, i, strrchr,
|
||
+ (CPU_FEATURE_USABLE (AVX2)
|
||
+ && CPU_FEATURE_USABLE (RTM)),
|
||
+ __strrchr_avx2_rtm)
|
||
+ IFUNC_IMPL_ADD (array, i, strrchr,
|
||
+ (CPU_FEATURE_USABLE (AVX512VL)
|
||
+ && CPU_FEATURE_USABLE (AVX512BW)),
|
||
+ __strrchr_evex)
|
||
IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_sse2))
|
||
|
||
/* Support sysdeps/x86_64/multiarch/strcmp.c. */
|
||
@@ -277,6 +450,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
IFUNC_IMPL_ADD (array, i, strcmp,
|
||
CPU_FEATURE_USABLE (AVX2),
|
||
__strcmp_avx2)
|
||
+ IFUNC_IMPL_ADD (array, i, strcmp,
|
||
+ (CPU_FEATURE_USABLE (AVX2)
|
||
+ && CPU_FEATURE_USABLE (RTM)),
|
||
+ __strcmp_avx2_rtm)
|
||
+ IFUNC_IMPL_ADD (array, i, strcmp,
|
||
+ (CPU_FEATURE_USABLE (AVX512VL)
|
||
+ && CPU_FEATURE_USABLE (AVX512BW)
|
||
+ && CPU_FEATURE_USABLE (BMI2)),
|
||
+ __strcmp_evex)
|
||
IFUNC_IMPL_ADD (array, i, strcmp, CPU_FEATURE_USABLE (SSE4_2),
|
||
__strcmp_sse42)
|
||
IFUNC_IMPL_ADD (array, i, strcmp, CPU_FEATURE_USABLE (SSSE3),
|
||
@@ -288,6 +470,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
IFUNC_IMPL (i, name, strcpy,
|
||
IFUNC_IMPL_ADD (array, i, strcpy, CPU_FEATURE_USABLE (AVX2),
|
||
__strcpy_avx2)
|
||
+ IFUNC_IMPL_ADD (array, i, strcpy,
|
||
+ (CPU_FEATURE_USABLE (AVX2)
|
||
+ && CPU_FEATURE_USABLE (RTM)),
|
||
+ __strcpy_avx2_rtm)
|
||
+ IFUNC_IMPL_ADD (array, i, strcpy,
|
||
+ (CPU_FEATURE_USABLE (AVX512VL)
|
||
+ && CPU_FEATURE_USABLE (AVX512BW)),
|
||
+ __strcpy_evex)
|
||
IFUNC_IMPL_ADD (array, i, strcpy, CPU_FEATURE_USABLE (SSSE3),
|
||
__strcpy_ssse3)
|
||
IFUNC_IMPL_ADD (array, i, strcpy, 1, __strcpy_sse2_unaligned)
|
||
@@ -331,6 +521,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
IFUNC_IMPL (i, name, strncat,
|
||
IFUNC_IMPL_ADD (array, i, strncat, CPU_FEATURE_USABLE (AVX2),
|
||
__strncat_avx2)
|
||
+ IFUNC_IMPL_ADD (array, i, strncat,
|
||
+ (CPU_FEATURE_USABLE (AVX2)
|
||
+ && CPU_FEATURE_USABLE (RTM)),
|
||
+ __strncat_avx2_rtm)
|
||
+ IFUNC_IMPL_ADD (array, i, strncat,
|
||
+ (CPU_FEATURE_USABLE (AVX512VL)
|
||
+ && CPU_FEATURE_USABLE (AVX512BW)),
|
||
+ __strncat_evex)
|
||
IFUNC_IMPL_ADD (array, i, strncat, CPU_FEATURE_USABLE (SSSE3),
|
||
__strncat_ssse3)
|
||
IFUNC_IMPL_ADD (array, i, strncat, 1,
|
||
@@ -341,6 +539,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
IFUNC_IMPL (i, name, strncpy,
|
||
IFUNC_IMPL_ADD (array, i, strncpy, CPU_FEATURE_USABLE (AVX2),
|
||
__strncpy_avx2)
|
||
+ IFUNC_IMPL_ADD (array, i, strncpy,
|
||
+ (CPU_FEATURE_USABLE (AVX2)
|
||
+ && CPU_FEATURE_USABLE (RTM)),
|
||
+ __strncpy_avx2_rtm)
|
||
+ IFUNC_IMPL_ADD (array, i, strncpy,
|
||
+ (CPU_FEATURE_USABLE (AVX512VL)
|
||
+ && CPU_FEATURE_USABLE (AVX512BW)),
|
||
+ __strncpy_evex)
|
||
IFUNC_IMPL_ADD (array, i, strncpy, CPU_FEATURE_USABLE (SSSE3),
|
||
__strncpy_ssse3)
|
||
IFUNC_IMPL_ADD (array, i, strncpy, 1,
|
||
@@ -370,6 +576,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
IFUNC_IMPL_ADD (array, i, wcschr,
|
||
CPU_FEATURE_USABLE (AVX2),
|
||
__wcschr_avx2)
|
||
+ IFUNC_IMPL_ADD (array, i, wcschr,
|
||
+ (CPU_FEATURE_USABLE (AVX2)
|
||
+ && CPU_FEATURE_USABLE (RTM)),
|
||
+ __wcschr_avx2_rtm)
|
||
+ IFUNC_IMPL_ADD (array, i, wcschr,
|
||
+ (CPU_FEATURE_USABLE (AVX512VL)
|
||
+ && CPU_FEATURE_USABLE (AVX512BW)
|
||
+ && CPU_FEATURE_USABLE (BMI2)),
|
||
+ __wcschr_evex)
|
||
IFUNC_IMPL_ADD (array, i, wcschr, 1, __wcschr_sse2))
|
||
|
||
/* Support sysdeps/x86_64/multiarch/wcsrchr.c. */
|
||
@@ -377,6 +592,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
IFUNC_IMPL_ADD (array, i, wcsrchr,
|
||
CPU_FEATURE_USABLE (AVX2),
|
||
__wcsrchr_avx2)
|
||
+ IFUNC_IMPL_ADD (array, i, wcsrchr,
|
||
+ (CPU_FEATURE_USABLE (AVX2)
|
||
+ && CPU_FEATURE_USABLE (RTM)),
|
||
+ __wcsrchr_avx2_rtm)
|
||
+ IFUNC_IMPL_ADD (array, i, wcsrchr,
|
||
+ (CPU_FEATURE_USABLE (AVX512VL)
|
||
+ && CPU_FEATURE_USABLE (AVX512BW)
|
||
+ && CPU_FEATURE_USABLE (BMI2)),
|
||
+ __wcsrchr_evex)
|
||
IFUNC_IMPL_ADD (array, i, wcsrchr, 1, __wcsrchr_sse2))
|
||
|
||
/* Support sysdeps/x86_64/multiarch/wcscmp.c. */
|
||
@@ -384,6 +608,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
IFUNC_IMPL_ADD (array, i, wcscmp,
|
||
CPU_FEATURE_USABLE (AVX2),
|
||
__wcscmp_avx2)
|
||
+ IFUNC_IMPL_ADD (array, i, wcscmp,
|
||
+ (CPU_FEATURE_USABLE (AVX2)
|
||
+ && CPU_FEATURE_USABLE (RTM)),
|
||
+ __wcscmp_avx2_rtm)
|
||
+ IFUNC_IMPL_ADD (array, i, wcscmp,
|
||
+ (CPU_FEATURE_USABLE (AVX512VL)
|
||
+ && CPU_FEATURE_USABLE (AVX512BW)
|
||
+ && CPU_FEATURE_USABLE (BMI2)),
|
||
+ __wcscmp_evex)
|
||
IFUNC_IMPL_ADD (array, i, wcscmp, 1, __wcscmp_sse2))
|
||
|
||
/* Support sysdeps/x86_64/multiarch/wcsncmp.c. */
|
||
@@ -391,6 +624,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
IFUNC_IMPL_ADD (array, i, wcsncmp,
|
||
CPU_FEATURE_USABLE (AVX2),
|
||
__wcsncmp_avx2)
|
||
+ IFUNC_IMPL_ADD (array, i, wcsncmp,
|
||
+ (CPU_FEATURE_USABLE (AVX2)
|
||
+ && CPU_FEATURE_USABLE (RTM)),
|
||
+ __wcsncmp_avx2_rtm)
|
||
+ IFUNC_IMPL_ADD (array, i, wcsncmp,
|
||
+ (CPU_FEATURE_USABLE (AVX512VL)
|
||
+ && CPU_FEATURE_USABLE (AVX512BW)
|
||
+ && CPU_FEATURE_USABLE (BMI2)),
|
||
+ __wcsncmp_evex)
|
||
IFUNC_IMPL_ADD (array, i, wcsncmp, 1, __wcsncmp_sse2))
|
||
|
||
/* Support sysdeps/x86_64/multiarch/wcscpy.c. */
|
||
@@ -402,15 +644,40 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
/* Support sysdeps/x86_64/multiarch/wcslen.c. */
|
||
IFUNC_IMPL (i, name, wcslen,
|
||
IFUNC_IMPL_ADD (array, i, wcslen,
|
||
- CPU_FEATURE_USABLE (AVX2),
|
||
+ (CPU_FEATURE_USABLE (AVX2)
|
||
+ && CPU_FEATURE_USABLE (BMI2)),
|
||
__wcslen_avx2)
|
||
+ IFUNC_IMPL_ADD (array, i, wcslen,
|
||
+ (CPU_FEATURE_USABLE (AVX2)
|
||
+ && CPU_FEATURE_USABLE (BMI2)
|
||
+ && CPU_FEATURE_USABLE (RTM)),
|
||
+ __wcslen_avx2_rtm)
|
||
+ IFUNC_IMPL_ADD (array, i, wcslen,
|
||
+ (CPU_FEATURE_USABLE (AVX512VL)
|
||
+ && CPU_FEATURE_USABLE (AVX512BW)
|
||
+ && CPU_FEATURE_USABLE (BMI2)),
|
||
+ __wcslen_evex)
|
||
+ IFUNC_IMPL_ADD (array, i, wcslen,
|
||
+ CPU_FEATURE_USABLE (SSE4_1),
|
||
+ __wcslen_sse4_1)
|
||
IFUNC_IMPL_ADD (array, i, wcslen, 1, __wcslen_sse2))
|
||
|
||
/* Support sysdeps/x86_64/multiarch/wcsnlen.c. */
|
||
IFUNC_IMPL (i, name, wcsnlen,
|
||
IFUNC_IMPL_ADD (array, i, wcsnlen,
|
||
- CPU_FEATURE_USABLE (AVX2),
|
||
+ (CPU_FEATURE_USABLE (AVX2)
|
||
+ && CPU_FEATURE_USABLE (BMI2)),
|
||
__wcsnlen_avx2)
|
||
+ IFUNC_IMPL_ADD (array, i, wcsnlen,
|
||
+ (CPU_FEATURE_USABLE (AVX2)
|
||
+ && CPU_FEATURE_USABLE (BMI2)
|
||
+ && CPU_FEATURE_USABLE (RTM)),
|
||
+ __wcsnlen_avx2_rtm)
|
||
+ IFUNC_IMPL_ADD (array, i, wcsnlen,
|
||
+ (CPU_FEATURE_USABLE (AVX512VL)
|
||
+ && CPU_FEATURE_USABLE (AVX512BW)
|
||
+ && CPU_FEATURE_USABLE (BMI2)),
|
||
+ __wcsnlen_evex)
|
||
IFUNC_IMPL_ADD (array, i, wcsnlen,
|
||
CPU_FEATURE_USABLE (SSE4_1),
|
||
__wcsnlen_sse4_1)
|
||
@@ -421,6 +688,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
IFUNC_IMPL_ADD (array, i, wmemchr,
|
||
CPU_FEATURE_USABLE (AVX2),
|
||
__wmemchr_avx2)
|
||
+ IFUNC_IMPL_ADD (array, i, wmemchr,
|
||
+ (CPU_FEATURE_USABLE (AVX2)
|
||
+ && CPU_FEATURE_USABLE (RTM)),
|
||
+ __wmemchr_avx2_rtm)
|
||
+ IFUNC_IMPL_ADD (array, i, wmemchr,
|
||
+ (CPU_FEATURE_USABLE (AVX512VL)
|
||
+ && CPU_FEATURE_USABLE (AVX512BW)
|
||
+ && CPU_FEATURE_USABLE (BMI2)),
|
||
+ __wmemchr_evex)
|
||
IFUNC_IMPL_ADD (array, i, wmemchr, 1, __wmemchr_sse2))
|
||
|
||
/* Support sysdeps/x86_64/multiarch/wmemcmp.c. */
|
||
@@ -429,6 +705,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
(CPU_FEATURE_USABLE (AVX2)
|
||
&& CPU_FEATURE_USABLE (MOVBE)),
|
||
__wmemcmp_avx2_movbe)
|
||
+ IFUNC_IMPL_ADD (array, i, wmemcmp,
|
||
+ (CPU_FEATURE_USABLE (AVX2)
|
||
+ && CPU_FEATURE_USABLE (MOVBE)
|
||
+ && CPU_FEATURE_USABLE (RTM)),
|
||
+ __wmemcmp_avx2_movbe_rtm)
|
||
+ IFUNC_IMPL_ADD (array, i, wmemcmp,
|
||
+ (CPU_FEATURE_USABLE (AVX512VL)
|
||
+ && CPU_FEATURE_USABLE (AVX512BW)
|
||
+ && CPU_FEATURE_USABLE (MOVBE)),
|
||
+ __wmemcmp_evex_movbe)
|
||
IFUNC_IMPL_ADD (array, i, wmemcmp, CPU_FEATURE_USABLE (SSE4_1),
|
||
__wmemcmp_sse4_1)
|
||
IFUNC_IMPL_ADD (array, i, wmemcmp, CPU_FEATURE_USABLE (SSSE3),
|
||
@@ -443,7 +729,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
CPU_FEATURE_USABLE (AVX2),
|
||
__wmemset_avx2_unaligned)
|
||
IFUNC_IMPL_ADD (array, i, wmemset,
|
||
- CPU_FEATURE_USABLE (AVX512F),
|
||
+ (CPU_FEATURE_USABLE (AVX2)
|
||
+ && CPU_FEATURE_USABLE (RTM)),
|
||
+ __wmemset_avx2_unaligned_rtm)
|
||
+ IFUNC_IMPL_ADD (array, i, wmemset,
|
||
+ CPU_FEATURE_USABLE (AVX512VL),
|
||
+ __wmemset_evex_unaligned)
|
||
+ IFUNC_IMPL_ADD (array, i, wmemset,
|
||
+ CPU_FEATURE_USABLE (AVX512VL),
|
||
__wmemset_avx512_unaligned))
|
||
|
||
#ifdef SHARED
|
||
@@ -453,10 +746,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
CPU_FEATURE_USABLE (AVX512F),
|
||
__memcpy_chk_avx512_no_vzeroupper)
|
||
IFUNC_IMPL_ADD (array, i, __memcpy_chk,
|
||
- CPU_FEATURE_USABLE (AVX512F),
|
||
+ CPU_FEATURE_USABLE (AVX512VL),
|
||
__memcpy_chk_avx512_unaligned)
|
||
IFUNC_IMPL_ADD (array, i, __memcpy_chk,
|
||
- CPU_FEATURE_USABLE (AVX512F),
|
||
+ CPU_FEATURE_USABLE (AVX512VL),
|
||
__memcpy_chk_avx512_unaligned_erms)
|
||
IFUNC_IMPL_ADD (array, i, __memcpy_chk,
|
||
CPU_FEATURE_USABLE (AVX),
|
||
@@ -464,6 +757,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
IFUNC_IMPL_ADD (array, i, __memcpy_chk,
|
||
CPU_FEATURE_USABLE (AVX),
|
||
__memcpy_chk_avx_unaligned_erms)
|
||
+ IFUNC_IMPL_ADD (array, i, __memcpy_chk,
|
||
+ (CPU_FEATURE_USABLE (AVX)
|
||
+ && CPU_FEATURE_USABLE (RTM)),
|
||
+ __memcpy_chk_avx_unaligned_rtm)
|
||
+ IFUNC_IMPL_ADD (array, i, __memcpy_chk,
|
||
+ (CPU_FEATURE_USABLE (AVX)
|
||
+ && CPU_FEATURE_USABLE (RTM)),
|
||
+ __memcpy_chk_avx_unaligned_erms_rtm)
|
||
+ IFUNC_IMPL_ADD (array, i, __memcpy_chk,
|
||
+ CPU_FEATURE_USABLE (AVX512VL),
|
||
+ __memcpy_chk_evex_unaligned)
|
||
+ IFUNC_IMPL_ADD (array, i, __memcpy_chk,
|
||
+ CPU_FEATURE_USABLE (AVX512VL),
|
||
+ __memcpy_chk_evex_unaligned_erms)
|
||
IFUNC_IMPL_ADD (array, i, __memcpy_chk,
|
||
CPU_FEATURE_USABLE (SSSE3),
|
||
__memcpy_chk_ssse3_back)
|
||
@@ -486,6 +793,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
IFUNC_IMPL_ADD (array, i, memcpy,
|
||
CPU_FEATURE_USABLE (AVX),
|
||
__memcpy_avx_unaligned_erms)
|
||
+ IFUNC_IMPL_ADD (array, i, memcpy,
|
||
+ (CPU_FEATURE_USABLE (AVX)
|
||
+ && CPU_FEATURE_USABLE (RTM)),
|
||
+ __memcpy_avx_unaligned_rtm)
|
||
+ IFUNC_IMPL_ADD (array, i, memcpy,
|
||
+ (CPU_FEATURE_USABLE (AVX)
|
||
+ && CPU_FEATURE_USABLE (RTM)),
|
||
+ __memcpy_avx_unaligned_erms_rtm)
|
||
+ IFUNC_IMPL_ADD (array, i, memcpy,
|
||
+ CPU_FEATURE_USABLE (AVX512VL),
|
||
+ __memcpy_evex_unaligned)
|
||
+ IFUNC_IMPL_ADD (array, i, memcpy,
|
||
+ CPU_FEATURE_USABLE (AVX512VL),
|
||
+ __memcpy_evex_unaligned_erms)
|
||
IFUNC_IMPL_ADD (array, i, memcpy, CPU_FEATURE_USABLE (SSSE3),
|
||
__memcpy_ssse3_back)
|
||
IFUNC_IMPL_ADD (array, i, memcpy, CPU_FEATURE_USABLE (SSSE3),
|
||
@@ -494,10 +815,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
CPU_FEATURE_USABLE (AVX512F),
|
||
__memcpy_avx512_no_vzeroupper)
|
||
IFUNC_IMPL_ADD (array, i, memcpy,
|
||
- CPU_FEATURE_USABLE (AVX512F),
|
||
+ CPU_FEATURE_USABLE (AVX512VL),
|
||
__memcpy_avx512_unaligned)
|
||
IFUNC_IMPL_ADD (array, i, memcpy,
|
||
- CPU_FEATURE_USABLE (AVX512F),
|
||
+ CPU_FEATURE_USABLE (AVX512VL),
|
||
__memcpy_avx512_unaligned_erms)
|
||
IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_sse2_unaligned)
|
||
IFUNC_IMPL_ADD (array, i, memcpy, 1,
|
||
@@ -511,10 +832,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
CPU_FEATURE_USABLE (AVX512F),
|
||
__mempcpy_chk_avx512_no_vzeroupper)
|
||
IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
|
||
- CPU_FEATURE_USABLE (AVX512F),
|
||
+ CPU_FEATURE_USABLE (AVX512VL),
|
||
__mempcpy_chk_avx512_unaligned)
|
||
IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
|
||
- CPU_FEATURE_USABLE (AVX512F),
|
||
+ CPU_FEATURE_USABLE (AVX512VL),
|
||
__mempcpy_chk_avx512_unaligned_erms)
|
||
IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
|
||
CPU_FEATURE_USABLE (AVX),
|
||
@@ -522,6 +843,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
|
||
CPU_FEATURE_USABLE (AVX),
|
||
__mempcpy_chk_avx_unaligned_erms)
|
||
+ IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
|
||
+ (CPU_FEATURE_USABLE (AVX)
|
||
+ && CPU_FEATURE_USABLE (RTM)),
|
||
+ __mempcpy_chk_avx_unaligned_rtm)
|
||
+ IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
|
||
+ (CPU_FEATURE_USABLE (AVX)
|
||
+ && CPU_FEATURE_USABLE (RTM)),
|
||
+ __mempcpy_chk_avx_unaligned_erms_rtm)
|
||
+ IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
|
||
+ CPU_FEATURE_USABLE (AVX512VL),
|
||
+ __mempcpy_chk_evex_unaligned)
|
||
+ IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
|
||
+ CPU_FEATURE_USABLE (AVX512VL),
|
||
+ __mempcpy_chk_evex_unaligned_erms)
|
||
IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
|
||
CPU_FEATURE_USABLE (SSSE3),
|
||
__mempcpy_chk_ssse3_back)
|
||
@@ -542,10 +877,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
CPU_FEATURE_USABLE (AVX512F),
|
||
__mempcpy_avx512_no_vzeroupper)
|
||
IFUNC_IMPL_ADD (array, i, mempcpy,
|
||
- CPU_FEATURE_USABLE (AVX512F),
|
||
+ CPU_FEATURE_USABLE (AVX512VL),
|
||
__mempcpy_avx512_unaligned)
|
||
IFUNC_IMPL_ADD (array, i, mempcpy,
|
||
- CPU_FEATURE_USABLE (AVX512F),
|
||
+ CPU_FEATURE_USABLE (AVX512VL),
|
||
__mempcpy_avx512_unaligned_erms)
|
||
IFUNC_IMPL_ADD (array, i, mempcpy,
|
||
CPU_FEATURE_USABLE (AVX),
|
||
@@ -553,6 +888,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
IFUNC_IMPL_ADD (array, i, mempcpy,
|
||
CPU_FEATURE_USABLE (AVX),
|
||
__mempcpy_avx_unaligned_erms)
|
||
+ IFUNC_IMPL_ADD (array, i, mempcpy,
|
||
+ (CPU_FEATURE_USABLE (AVX)
|
||
+ && CPU_FEATURE_USABLE (RTM)),
|
||
+ __mempcpy_avx_unaligned_rtm)
|
||
+ IFUNC_IMPL_ADD (array, i, mempcpy,
|
||
+ (CPU_FEATURE_USABLE (AVX)
|
||
+ && CPU_FEATURE_USABLE (RTM)),
|
||
+ __mempcpy_avx_unaligned_erms_rtm)
|
||
+ IFUNC_IMPL_ADD (array, i, mempcpy,
|
||
+ CPU_FEATURE_USABLE (AVX512VL),
|
||
+ __mempcpy_evex_unaligned)
|
||
+ IFUNC_IMPL_ADD (array, i, mempcpy,
|
||
+ CPU_FEATURE_USABLE (AVX512VL),
|
||
+ __mempcpy_evex_unaligned_erms)
|
||
IFUNC_IMPL_ADD (array, i, mempcpy, CPU_FEATURE_USABLE (SSSE3),
|
||
__mempcpy_ssse3_back)
|
||
IFUNC_IMPL_ADD (array, i, mempcpy, CPU_FEATURE_USABLE (SSSE3),
|
||
@@ -568,6 +917,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
IFUNC_IMPL_ADD (array, i, strncmp,
|
||
CPU_FEATURE_USABLE (AVX2),
|
||
__strncmp_avx2)
|
||
+ IFUNC_IMPL_ADD (array, i, strncmp,
|
||
+ (CPU_FEATURE_USABLE (AVX2)
|
||
+ && CPU_FEATURE_USABLE (RTM)),
|
||
+ __strncmp_avx2_rtm)
|
||
+ IFUNC_IMPL_ADD (array, i, strncmp,
|
||
+ (CPU_FEATURE_USABLE (AVX512VL)
|
||
+ && CPU_FEATURE_USABLE (AVX512BW)),
|
||
+ __strncmp_evex)
|
||
IFUNC_IMPL_ADD (array, i, strncmp, CPU_FEATURE_USABLE (SSE4_2),
|
||
__strncmp_sse42)
|
||
IFUNC_IMPL_ADD (array, i, strncmp, CPU_FEATURE_USABLE (SSSE3),
|
||
@@ -582,6 +939,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||
IFUNC_IMPL_ADD (array, i, __wmemset_chk,
|
||
CPU_FEATURE_USABLE (AVX2),
|
||
__wmemset_chk_avx2_unaligned)
|
||
+ IFUNC_IMPL_ADD (array, i, __wmemset_chk,
|
||
+ CPU_FEATURE_USABLE (AVX512VL),
|
||
+ __wmemset_chk_evex_unaligned)
|
||
IFUNC_IMPL_ADD (array, i, __wmemset_chk,
|
||
CPU_FEATURE_USABLE (AVX512F),
|
||
__wmemset_chk_avx512_unaligned))
|
||
diff --git a/sysdeps/x86_64/multiarch/ifunc-memcmp.h b/sysdeps/x86_64/multiarch/ifunc-memcmp.h
|
||
index 0e21b3a628..4f96c2764a 100644
|
||
--- a/sysdeps/x86_64/multiarch/ifunc-memcmp.h
|
||
+++ b/sysdeps/x86_64/multiarch/ifunc-memcmp.h
|
||
@@ -23,17 +23,28 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
|
||
extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
|
||
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden;
|
||
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_movbe) attribute_hidden;
|
||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_movbe_rtm) attribute_hidden;
|
||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_movbe) attribute_hidden;
|
||
|
||
static inline void *
|
||
IFUNC_SELECTOR (void)
|
||
{
|
||
const struct cpu_features* cpu_features = __get_cpu_features ();
|
||
|
||
- if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)
|
||
- && CPU_FEATURE_USABLE_P (cpu_features, AVX2)
|
||
+ if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
|
||
&& CPU_FEATURE_USABLE_P (cpu_features, MOVBE)
|
||
&& CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
|
||
- return OPTIMIZE (avx2_movbe);
|
||
+ {
|
||
+ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
|
||
+ && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
|
||
+ return OPTIMIZE (evex_movbe);
|
||
+
|
||
+ if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
|
||
+ return OPTIMIZE (avx2_movbe_rtm);
|
||
+
|
||
+ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
|
||
+ return OPTIMIZE (avx2_movbe);
|
||
+ }
|
||
|
||
if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1))
|
||
return OPTIMIZE (sse4_1);
|
||
diff --git a/sysdeps/x86_64/multiarch/ifunc-memmove.h b/sysdeps/x86_64/multiarch/ifunc-memmove.h
|
||
index 9ada03aa43..db26210e3b 100644
|
||
--- a/sysdeps/x86_64/multiarch/ifunc-memmove.h
|
||
+++ b/sysdeps/x86_64/multiarch/ifunc-memmove.h
|
||
@@ -29,6 +29,14 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3_back) attribute_hidden;
|
||
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned) attribute_hidden;
|
||
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned_erms)
|
||
attribute_hidden;
|
||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned_rtm)
|
||
+ attribute_hidden;
|
||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned_erms_rtm)
|
||
+ attribute_hidden;
|
||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned)
|
||
+ attribute_hidden;
|
||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned_erms)
|
||
+ attribute_hidden;
|
||
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned)
|
||
attribute_hidden;
|
||
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned_erms)
|
||
@@ -48,21 +56,42 @@ IFUNC_SELECTOR (void)
|
||
if (CPU_FEATURE_USABLE_P (cpu_features, AVX512F)
|
||
&& !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512))
|
||
{
|
||
- if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
|
||
- return OPTIMIZE (avx512_no_vzeroupper);
|
||
+ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL))
|
||
+ {
|
||
+ if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
|
||
+ return OPTIMIZE (avx512_unaligned_erms);
|
||
|
||
- if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
|
||
- return OPTIMIZE (avx512_unaligned_erms);
|
||
+ return OPTIMIZE (avx512_unaligned);
|
||
+ }
|
||
|
||
- return OPTIMIZE (avx512_unaligned);
|
||
+ return OPTIMIZE (avx512_no_vzeroupper);
|
||
}
|
||
|
||
if (CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
|
||
{
|
||
- if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
|
||
- return OPTIMIZE (avx_unaligned_erms);
|
||
+ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL))
|
||
+ {
|
||
+ if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
|
||
+ return OPTIMIZE (evex_unaligned_erms);
|
||
+
|
||
+ return OPTIMIZE (evex_unaligned);
|
||
+ }
|
||
+
|
||
+ if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
|
||
+ {
|
||
+ if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
|
||
+ return OPTIMIZE (avx_unaligned_erms_rtm);
|
||
+
|
||
+ return OPTIMIZE (avx_unaligned_rtm);
|
||
+ }
|
||
+
|
||
+ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
|
||
+ {
|
||
+ if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
|
||
+ return OPTIMIZE (avx_unaligned_erms);
|
||
|
||
- return OPTIMIZE (avx_unaligned);
|
||
+ return OPTIMIZE (avx_unaligned);
|
||
+ }
|
||
}
|
||
|
||
if (!CPU_FEATURE_USABLE_P (cpu_features, SSSE3)
|
||
diff --git a/sysdeps/x86_64/multiarch/ifunc-memset.h b/sysdeps/x86_64/multiarch/ifunc-memset.h
|
||
index f52613d372..57029fc17b 100644
|
||
--- a/sysdeps/x86_64/multiarch/ifunc-memset.h
|
||
+++ b/sysdeps/x86_64/multiarch/ifunc-memset.h
|
||
@@ -27,6 +27,14 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned_erms)
|
||
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned) attribute_hidden;
|
||
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_erms)
|
||
attribute_hidden;
|
||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_rtm)
|
||
+ attribute_hidden;
|
||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_erms_rtm)
|
||
+ attribute_hidden;
|
||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned)
|
||
+ attribute_hidden;
|
||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned_erms)
|
||
+ attribute_hidden;
|
||
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned)
|
||
attribute_hidden;
|
||
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned_erms)
|
||
@@ -45,21 +53,44 @@ IFUNC_SELECTOR (void)
|
||
if (CPU_FEATURE_USABLE_P (cpu_features, AVX512F)
|
||
&& !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512))
|
||
{
|
||
- if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
|
||
- return OPTIMIZE (avx512_no_vzeroupper);
|
||
+ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
|
||
+ && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
|
||
+ {
|
||
+ if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
|
||
+ return OPTIMIZE (avx512_unaligned_erms);
|
||
|
||
- if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
|
||
- return OPTIMIZE (avx512_unaligned_erms);
|
||
+ return OPTIMIZE (avx512_unaligned);
|
||
+ }
|
||
|
||
- return OPTIMIZE (avx512_unaligned);
|
||
+ return OPTIMIZE (avx512_no_vzeroupper);
|
||
}
|
||
|
||
if (CPU_FEATURE_USABLE_P (cpu_features, AVX2))
|
||
{
|
||
- if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
|
||
- return OPTIMIZE (avx2_unaligned_erms);
|
||
- else
|
||
- return OPTIMIZE (avx2_unaligned);
|
||
+ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
|
||
+ && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
|
||
+ {
|
||
+ if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
|
||
+ return OPTIMIZE (evex_unaligned_erms);
|
||
+
|
||
+ return OPTIMIZE (evex_unaligned);
|
||
+ }
|
||
+
|
||
+ if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
|
||
+ {
|
||
+ if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
|
||
+ return OPTIMIZE (avx2_unaligned_erms_rtm);
|
||
+
|
||
+ return OPTIMIZE (avx2_unaligned_rtm);
|
||
+ }
|
||
+
|
||
+ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
|
||
+ {
|
||
+ if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
|
||
+ return OPTIMIZE (avx2_unaligned_erms);
|
||
+
|
||
+ return OPTIMIZE (avx2_unaligned);
|
||
+ }
|
||
}
|
||
|
||
if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
|
||
diff --git a/sysdeps/x86_64/multiarch/ifunc-strcpy.h b/sysdeps/x86_64/multiarch/ifunc-strcpy.h
|
||
index 63b0dc0d96..35741f3ec8 100644
|
||
--- a/sysdeps/x86_64/multiarch/ifunc-strcpy.h
|
||
+++ b/sysdeps/x86_64/multiarch/ifunc-strcpy.h
|
||
@@ -25,16 +25,27 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned)
|
||
attribute_hidden;
|
||
extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
|
||
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
|
||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
|
||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
|
||
|
||
static inline void *
|
||
IFUNC_SELECTOR (void)
|
||
{
|
||
const struct cpu_features* cpu_features = __get_cpu_features ();
|
||
|
||
- if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)
|
||
- && CPU_FEATURE_USABLE_P (cpu_features, AVX2)
|
||
+ if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
|
||
&& CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
|
||
- return OPTIMIZE (avx2);
|
||
+ {
|
||
+ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
|
||
+ && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
|
||
+ return OPTIMIZE (evex);
|
||
+
|
||
+ if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
|
||
+ return OPTIMIZE (avx2_rtm);
|
||
+
|
||
+ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
|
||
+ return OPTIMIZE (avx2);
|
||
+ }
|
||
|
||
if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
|
||
return OPTIMIZE (sse2_unaligned);
|
||
diff --git a/sysdeps/x86_64/multiarch/ifunc-wcslen.h b/sysdeps/x86_64/multiarch/ifunc-wcslen.h
|
||
new file mode 100644
|
||
index 0000000000..39e3347378
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/ifunc-wcslen.h
|
||
@@ -0,0 +1,52 @@
|
||
+/* Common definition for ifunc selections for wcslen and wcsnlen
|
||
+ All versions must be listed in ifunc-impl-list.c.
|
||
+ Copyright (C) 2017-2021 Free Software Foundation, Inc.
|
||
+ This file is part of the GNU C Library.
|
||
+
|
||
+ The GNU C Library is free software; you can redistribute it and/or
|
||
+ modify it under the terms of the GNU Lesser General Public
|
||
+ License as published by the Free Software Foundation; either
|
||
+ version 2.1 of the License, or (at your option) any later version.
|
||
+
|
||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
+ Lesser General Public License for more details.
|
||
+
|
||
+ You should have received a copy of the GNU Lesser General Public
|
||
+ License along with the GNU C Library; if not, see
|
||
+ <https://www.gnu.org/licenses/>. */
|
||
+
|
||
+#include <init-arch.h>
|
||
+
|
||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
|
||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden;
|
||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
|
||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
|
||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
|
||
+
|
||
+static inline void *
|
||
+IFUNC_SELECTOR (void)
|
||
+{
|
||
+ const struct cpu_features* cpu_features = __get_cpu_features ();
|
||
+
|
||
+ if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
|
||
+ && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
|
||
+ && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
|
||
+ {
|
||
+ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
|
||
+ && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
|
||
+ return OPTIMIZE (evex);
|
||
+
|
||
+ if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
|
||
+ return OPTIMIZE (avx2_rtm);
|
||
+
|
||
+ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
|
||
+ return OPTIMIZE (avx2);
|
||
+ }
|
||
+
|
||
+ if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1))
|
||
+ return OPTIMIZE (sse4_1);
|
||
+
|
||
+ return OPTIMIZE (sse2);
|
||
+}
|
||
diff --git a/sysdeps/x86_64/multiarch/ifunc-wmemset.h b/sysdeps/x86_64/multiarch/ifunc-wmemset.h
|
||
index 8cfce562fc..e06e8b4d80 100644
|
||
--- a/sysdeps/x86_64/multiarch/ifunc-wmemset.h
|
||
+++ b/sysdeps/x86_64/multiarch/ifunc-wmemset.h
|
||
@@ -20,6 +20,9 @@
|
||
|
||
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) attribute_hidden;
|
||
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned) attribute_hidden;
|
||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_rtm)
|
||
+ attribute_hidden;
|
||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned) attribute_hidden;
|
||
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned) attribute_hidden;
|
||
|
||
static inline void *
|
||
@@ -27,14 +30,21 @@ IFUNC_SELECTOR (void)
|
||
{
|
||
const struct cpu_features* cpu_features = __get_cpu_features ();
|
||
|
||
- if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)
|
||
- && CPU_FEATURE_USABLE_P (cpu_features, AVX2)
|
||
+ if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
|
||
&& CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
|
||
{
|
||
- if (CPU_FEATURE_USABLE_P (cpu_features, AVX512F)
|
||
- && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512))
|
||
- return OPTIMIZE (avx512_unaligned);
|
||
- else
|
||
+ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL))
|
||
+ {
|
||
+ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512))
|
||
+ return OPTIMIZE (avx512_unaligned);
|
||
+
|
||
+ return OPTIMIZE (evex_unaligned);
|
||
+ }
|
||
+
|
||
+ if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
|
||
+ return OPTIMIZE (avx2_unaligned_rtm);
|
||
+
|
||
+ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
|
||
return OPTIMIZE (avx2_unaligned);
|
||
}
|
||
|
||
diff --git a/sysdeps/x86_64/multiarch/memchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/memchr-avx2-rtm.S
|
||
new file mode 100644
|
||
index 0000000000..87b076c7c4
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/memchr-avx2-rtm.S
|
||
@@ -0,0 +1,12 @@
|
||
+#ifndef MEMCHR
|
||
+# define MEMCHR __memchr_avx2_rtm
|
||
+#endif
|
||
+
|
||
+#define ZERO_UPPER_VEC_REGISTERS_RETURN \
|
||
+ ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
|
||
+
|
||
+#define VZEROUPPER_RETURN jmp L(return_vzeroupper)
|
||
+
|
||
+#define SECTION(p) p##.avx.rtm
|
||
+
|
||
+#include "memchr-avx2.S"
|
||
diff --git a/sysdeps/x86_64/multiarch/memchr-avx2.S b/sysdeps/x86_64/multiarch/memchr-avx2.S
|
||
index e5a9abd211..0987616a1b 100644
|
||
--- a/sysdeps/x86_64/multiarch/memchr-avx2.S
|
||
+++ b/sysdeps/x86_64/multiarch/memchr-avx2.S
|
||
@@ -26,319 +26,407 @@
|
||
|
||
# ifdef USE_AS_WMEMCHR
|
||
# define VPCMPEQ vpcmpeqd
|
||
+# define VPBROADCAST vpbroadcastd
|
||
+# define CHAR_SIZE 4
|
||
# else
|
||
# define VPCMPEQ vpcmpeqb
|
||
+# define VPBROADCAST vpbroadcastb
|
||
+# define CHAR_SIZE 1
|
||
+# endif
|
||
+
|
||
+# ifdef USE_AS_RAWMEMCHR
|
||
+# define ERAW_PTR_REG ecx
|
||
+# define RRAW_PTR_REG rcx
|
||
+# define ALGN_PTR_REG rdi
|
||
+# else
|
||
+# define ERAW_PTR_REG edi
|
||
+# define RRAW_PTR_REG rdi
|
||
+# define ALGN_PTR_REG rcx
|
||
# endif
|
||
|
||
# ifndef VZEROUPPER
|
||
# define VZEROUPPER vzeroupper
|
||
# endif
|
||
|
||
+# ifndef SECTION
|
||
+# define SECTION(p) p##.avx
|
||
+# endif
|
||
+
|
||
# define VEC_SIZE 32
|
||
+# define PAGE_SIZE 4096
|
||
+# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE)
|
||
|
||
- .section .text.avx,"ax",@progbits
|
||
+ .section SECTION(.text),"ax",@progbits
|
||
ENTRY (MEMCHR)
|
||
# ifndef USE_AS_RAWMEMCHR
|
||
/* Check for zero length. */
|
||
+# ifdef __ILP32__
|
||
+ /* Clear upper bits. */
|
||
+ and %RDX_LP, %RDX_LP
|
||
+# else
|
||
test %RDX_LP, %RDX_LP
|
||
+# endif
|
||
jz L(null)
|
||
# endif
|
||
- movl %edi, %ecx
|
||
- /* Broadcast CHAR to YMM0. */
|
||
+ /* Broadcast CHAR to YMMMATCH. */
|
||
vmovd %esi, %xmm0
|
||
-# ifdef USE_AS_WMEMCHR
|
||
- shl $2, %RDX_LP
|
||
- vpbroadcastd %xmm0, %ymm0
|
||
-# else
|
||
-# ifdef __ILP32__
|
||
- /* Clear the upper 32 bits. */
|
||
- movl %edx, %edx
|
||
-# endif
|
||
- vpbroadcastb %xmm0, %ymm0
|
||
-# endif
|
||
+ VPBROADCAST %xmm0, %ymm0
|
||
/* Check if we may cross page boundary with one vector load. */
|
||
- andl $(2 * VEC_SIZE - 1), %ecx
|
||
- cmpl $VEC_SIZE, %ecx
|
||
- ja L(cros_page_boundary)
|
||
+ movl %edi, %eax
|
||
+ andl $(PAGE_SIZE - 1), %eax
|
||
+ cmpl $(PAGE_SIZE - VEC_SIZE), %eax
|
||
+ ja L(cross_page_boundary)
|
||
|
||
/* Check the first VEC_SIZE bytes. */
|
||
- VPCMPEQ (%rdi), %ymm0, %ymm1
|
||
+ VPCMPEQ (%rdi), %ymm0, %ymm1
|
||
vpmovmskb %ymm1, %eax
|
||
- testl %eax, %eax
|
||
-
|
||
# ifndef USE_AS_RAWMEMCHR
|
||
- jnz L(first_vec_x0_check)
|
||
- /* Adjust length and check the end of data. */
|
||
- subq $VEC_SIZE, %rdx
|
||
- jbe L(zero)
|
||
-# else
|
||
- jnz L(first_vec_x0)
|
||
+ /* If length < CHAR_PER_VEC handle special. */
|
||
+ cmpq $CHAR_PER_VEC, %rdx
|
||
+ jbe L(first_vec_x0)
|
||
# endif
|
||
-
|
||
- /* Align data for aligned loads in the loop. */
|
||
- addq $VEC_SIZE, %rdi
|
||
- andl $(VEC_SIZE - 1), %ecx
|
||
- andq $-VEC_SIZE, %rdi
|
||
+ testl %eax, %eax
|
||
+ jz L(aligned_more)
|
||
+ tzcntl %eax, %eax
|
||
+ addq %rdi, %rax
|
||
+ VZEROUPPER_RETURN
|
||
|
||
# ifndef USE_AS_RAWMEMCHR
|
||
- /* Adjust length. */
|
||
- addq %rcx, %rdx
|
||
+ .p2align 5
|
||
+L(first_vec_x0):
|
||
+ /* Check if first match was before length. */
|
||
+ tzcntl %eax, %eax
|
||
+# ifdef USE_AS_WMEMCHR
|
||
+ /* NB: Multiply length by 4 to get byte count. */
|
||
+ sall $2, %edx
|
||
+# endif
|
||
+ xorl %ecx, %ecx
|
||
+ cmpl %eax, %edx
|
||
+ leaq (%rdi, %rax), %rax
|
||
+ cmovle %rcx, %rax
|
||
+ VZEROUPPER_RETURN
|
||
|
||
- subq $(VEC_SIZE * 4), %rdx
|
||
- jbe L(last_4x_vec_or_less)
|
||
+L(null):
|
||
+ xorl %eax, %eax
|
||
+ ret
|
||
# endif
|
||
- jmp L(more_4x_vec)
|
||
-
|
||
.p2align 4
|
||
-L(cros_page_boundary):
|
||
- andl $(VEC_SIZE - 1), %ecx
|
||
- andq $-VEC_SIZE, %rdi
|
||
- VPCMPEQ (%rdi), %ymm0, %ymm1
|
||
+L(cross_page_boundary):
|
||
+ /* Save pointer before aligning as its original value is
|
||
+ necessary for computer return address if byte is found or
|
||
+ adjusting length if it is not and this is memchr. */
|
||
+ movq %rdi, %rcx
|
||
+ /* Align data to VEC_SIZE - 1. ALGN_PTR_REG is rcx for memchr
|
||
+ and rdi for rawmemchr. */
|
||
+ orq $(VEC_SIZE - 1), %ALGN_PTR_REG
|
||
+ VPCMPEQ -(VEC_SIZE - 1)(%ALGN_PTR_REG), %ymm0, %ymm1
|
||
vpmovmskb %ymm1, %eax
|
||
+# ifndef USE_AS_RAWMEMCHR
|
||
+ /* Calculate length until end of page (length checked for a
|
||
+ match). */
|
||
+ leaq 1(%ALGN_PTR_REG), %rsi
|
||
+ subq %RRAW_PTR_REG, %rsi
|
||
+# ifdef USE_AS_WMEMCHR
|
||
+ /* NB: Divide bytes by 4 to get wchar_t count. */
|
||
+ shrl $2, %esi
|
||
+# endif
|
||
+# endif
|
||
/* Remove the leading bytes. */
|
||
- sarl %cl, %eax
|
||
- testl %eax, %eax
|
||
- jz L(aligned_more)
|
||
- tzcntl %eax, %eax
|
||
+ sarxl %ERAW_PTR_REG, %eax, %eax
|
||
# ifndef USE_AS_RAWMEMCHR
|
||
/* Check the end of data. */
|
||
- cmpq %rax, %rdx
|
||
- jbe L(zero)
|
||
+ cmpq %rsi, %rdx
|
||
+ jbe L(first_vec_x0)
|
||
# endif
|
||
+ testl %eax, %eax
|
||
+ jz L(cross_page_continue)
|
||
+ tzcntl %eax, %eax
|
||
+ addq %RRAW_PTR_REG, %rax
|
||
+L(return_vzeroupper):
|
||
+ ZERO_UPPER_VEC_REGISTERS_RETURN
|
||
+
|
||
+ .p2align 4
|
||
+L(first_vec_x1):
|
||
+ tzcntl %eax, %eax
|
||
+ incq %rdi
|
||
addq %rdi, %rax
|
||
- addq %rcx, %rax
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
-L(aligned_more):
|
||
-# ifndef USE_AS_RAWMEMCHR
|
||
- /* Calculate "rdx + rcx - VEC_SIZE" with "rdx - (VEC_SIZE - rcx)"
|
||
- instead of "(rdx + rcx) - VEC_SIZE" to void possible addition
|
||
- overflow. */
|
||
- negq %rcx
|
||
- addq $VEC_SIZE, %rcx
|
||
+L(first_vec_x2):
|
||
+ tzcntl %eax, %eax
|
||
+ addq $(VEC_SIZE + 1), %rdi
|
||
+ addq %rdi, %rax
|
||
+ VZEROUPPER_RETURN
|
||
|
||
- /* Check the end of data. */
|
||
- subq %rcx, %rdx
|
||
- jbe L(zero)
|
||
-# endif
|
||
+ .p2align 4
|
||
+L(first_vec_x3):
|
||
+ tzcntl %eax, %eax
|
||
+ addq $(VEC_SIZE * 2 + 1), %rdi
|
||
+ addq %rdi, %rax
|
||
+ VZEROUPPER_RETURN
|
||
|
||
- addq $VEC_SIZE, %rdi
|
||
|
||
-# ifndef USE_AS_RAWMEMCHR
|
||
- subq $(VEC_SIZE * 4), %rdx
|
||
- jbe L(last_4x_vec_or_less)
|
||
-# endif
|
||
+ .p2align 4
|
||
+L(first_vec_x4):
|
||
+ tzcntl %eax, %eax
|
||
+ addq $(VEC_SIZE * 3 + 1), %rdi
|
||
+ addq %rdi, %rax
|
||
+ VZEROUPPER_RETURN
|
||
|
||
-L(more_4x_vec):
|
||
+ .p2align 4
|
||
+L(aligned_more):
|
||
/* Check the first 4 * VEC_SIZE. Only one VEC_SIZE at a time
|
||
since data is only aligned to VEC_SIZE. */
|
||
- VPCMPEQ (%rdi), %ymm0, %ymm1
|
||
- vpmovmskb %ymm1, %eax
|
||
- testl %eax, %eax
|
||
- jnz L(first_vec_x0)
|
||
|
||
- VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1
|
||
+# ifndef USE_AS_RAWMEMCHR
|
||
+L(cross_page_continue):
|
||
+ /* Align data to VEC_SIZE - 1. */
|
||
+ xorl %ecx, %ecx
|
||
+ subl %edi, %ecx
|
||
+ orq $(VEC_SIZE - 1), %rdi
|
||
+ /* esi is for adjusting length to see if near the end. */
|
||
+ leal (VEC_SIZE * 4 + 1)(%rdi, %rcx), %esi
|
||
+# ifdef USE_AS_WMEMCHR
|
||
+ /* NB: Divide bytes by 4 to get the wchar_t count. */
|
||
+ sarl $2, %esi
|
||
+# endif
|
||
+# else
|
||
+ orq $(VEC_SIZE - 1), %rdi
|
||
+L(cross_page_continue):
|
||
+# endif
|
||
+ /* Load first VEC regardless. */
|
||
+ VPCMPEQ 1(%rdi), %ymm0, %ymm1
|
||
vpmovmskb %ymm1, %eax
|
||
+# ifndef USE_AS_RAWMEMCHR
|
||
+ /* Adjust length. If near end handle specially. */
|
||
+ subq %rsi, %rdx
|
||
+ jbe L(last_4x_vec_or_less)
|
||
+# endif
|
||
testl %eax, %eax
|
||
jnz L(first_vec_x1)
|
||
|
||
- VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm1
|
||
+ VPCMPEQ (VEC_SIZE + 1)(%rdi), %ymm0, %ymm1
|
||
vpmovmskb %ymm1, %eax
|
||
testl %eax, %eax
|
||
jnz L(first_vec_x2)
|
||
|
||
- VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1
|
||
+ VPCMPEQ (VEC_SIZE * 2 + 1)(%rdi), %ymm0, %ymm1
|
||
vpmovmskb %ymm1, %eax
|
||
testl %eax, %eax
|
||
jnz L(first_vec_x3)
|
||
|
||
- addq $(VEC_SIZE * 4), %rdi
|
||
-
|
||
-# ifndef USE_AS_RAWMEMCHR
|
||
- subq $(VEC_SIZE * 4), %rdx
|
||
- jbe L(last_4x_vec_or_less)
|
||
-# endif
|
||
-
|
||
- /* Align data to 4 * VEC_SIZE. */
|
||
- movq %rdi, %rcx
|
||
- andl $(4 * VEC_SIZE - 1), %ecx
|
||
- andq $-(4 * VEC_SIZE), %rdi
|
||
+ VPCMPEQ (VEC_SIZE * 3 + 1)(%rdi), %ymm0, %ymm1
|
||
+ vpmovmskb %ymm1, %eax
|
||
+ testl %eax, %eax
|
||
+ jnz L(first_vec_x4)
|
||
|
||
# ifndef USE_AS_RAWMEMCHR
|
||
- /* Adjust length. */
|
||
+ /* Check if at last VEC_SIZE * 4 length. */
|
||
+ subq $(CHAR_PER_VEC * 4), %rdx
|
||
+ jbe L(last_4x_vec_or_less_cmpeq)
|
||
+ /* Align data to VEC_SIZE * 4 - 1 for the loop and readjust
|
||
+ length. */
|
||
+ incq %rdi
|
||
+ movl %edi, %ecx
|
||
+ orq $(VEC_SIZE * 4 - 1), %rdi
|
||
+ andl $(VEC_SIZE * 4 - 1), %ecx
|
||
+# ifdef USE_AS_WMEMCHR
|
||
+ /* NB: Divide bytes by 4 to get the wchar_t count. */
|
||
+ sarl $2, %ecx
|
||
+# endif
|
||
addq %rcx, %rdx
|
||
+# else
|
||
+ /* Align data to VEC_SIZE * 4 - 1 for loop. */
|
||
+ incq %rdi
|
||
+ orq $(VEC_SIZE * 4 - 1), %rdi
|
||
# endif
|
||
|
||
+ /* Compare 4 * VEC at a time forward. */
|
||
.p2align 4
|
||
L(loop_4x_vec):
|
||
- /* Compare 4 * VEC at a time forward. */
|
||
- VPCMPEQ (%rdi), %ymm0, %ymm1
|
||
- VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm2
|
||
- VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm3
|
||
- VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm4
|
||
-
|
||
+ VPCMPEQ 1(%rdi), %ymm0, %ymm1
|
||
+ VPCMPEQ (VEC_SIZE + 1)(%rdi), %ymm0, %ymm2
|
||
+ VPCMPEQ (VEC_SIZE * 2 + 1)(%rdi), %ymm0, %ymm3
|
||
+ VPCMPEQ (VEC_SIZE * 3 + 1)(%rdi), %ymm0, %ymm4
|
||
vpor %ymm1, %ymm2, %ymm5
|
||
vpor %ymm3, %ymm4, %ymm6
|
||
vpor %ymm5, %ymm6, %ymm5
|
||
|
||
- vpmovmskb %ymm5, %eax
|
||
- testl %eax, %eax
|
||
- jnz L(4x_vec_end)
|
||
-
|
||
- addq $(VEC_SIZE * 4), %rdi
|
||
-
|
||
+ vpmovmskb %ymm5, %ecx
|
||
# ifdef USE_AS_RAWMEMCHR
|
||
- jmp L(loop_4x_vec)
|
||
+ subq $-(VEC_SIZE * 4), %rdi
|
||
+ testl %ecx, %ecx
|
||
+ jz L(loop_4x_vec)
|
||
# else
|
||
- subq $(VEC_SIZE * 4), %rdx
|
||
- ja L(loop_4x_vec)
|
||
+ testl %ecx, %ecx
|
||
+ jnz L(loop_4x_vec_end)
|
||
|
||
-L(last_4x_vec_or_less):
|
||
- /* Less than 4 * VEC and aligned to VEC_SIZE. */
|
||
- addl $(VEC_SIZE * 2), %edx
|
||
- jle L(last_2x_vec)
|
||
+ subq $-(VEC_SIZE * 4), %rdi
|
||
|
||
- VPCMPEQ (%rdi), %ymm0, %ymm1
|
||
- vpmovmskb %ymm1, %eax
|
||
- testl %eax, %eax
|
||
- jnz L(first_vec_x0)
|
||
+ subq $(CHAR_PER_VEC * 4), %rdx
|
||
+ ja L(loop_4x_vec)
|
||
|
||
- VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1
|
||
+ /* Fall through into less than 4 remaining vectors of length
|
||
+ case. */
|
||
+ VPCMPEQ (VEC_SIZE * 0 + 1)(%rdi), %ymm0, %ymm1
|
||
vpmovmskb %ymm1, %eax
|
||
+ .p2align 4
|
||
+L(last_4x_vec_or_less):
|
||
+# ifdef USE_AS_WMEMCHR
|
||
+ /* NB: Multiply length by 4 to get byte count. */
|
||
+ sall $2, %edx
|
||
+# endif
|
||
+ /* Check if first VEC contained match. */
|
||
testl %eax, %eax
|
||
- jnz L(first_vec_x1)
|
||
+ jnz L(first_vec_x1_check)
|
||
|
||
- VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm1
|
||
- vpmovmskb %ymm1, %eax
|
||
- testl %eax, %eax
|
||
+ /* If remaining length > VEC_SIZE * 2. */
|
||
+ addl $(VEC_SIZE * 2), %edx
|
||
+ jg L(last_4x_vec)
|
||
|
||
- jnz L(first_vec_x2_check)
|
||
- subl $VEC_SIZE, %edx
|
||
- jle L(zero)
|
||
+L(last_2x_vec):
|
||
+ /* If remaining length < VEC_SIZE. */
|
||
+ addl $VEC_SIZE, %edx
|
||
+ jle L(zero_end)
|
||
|
||
- VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1
|
||
+ /* Check VEC2 and compare any match with remaining length. */
|
||
+ VPCMPEQ (VEC_SIZE + 1)(%rdi), %ymm0, %ymm1
|
||
vpmovmskb %ymm1, %eax
|
||
- testl %eax, %eax
|
||
-
|
||
- jnz L(first_vec_x3_check)
|
||
- xorl %eax, %eax
|
||
- VZEROUPPER
|
||
- ret
|
||
+ tzcntl %eax, %eax
|
||
+ cmpl %eax, %edx
|
||
+ jbe L(set_zero_end)
|
||
+ addq $(VEC_SIZE + 1), %rdi
|
||
+ addq %rdi, %rax
|
||
+L(zero_end):
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
-L(last_2x_vec):
|
||
- addl $(VEC_SIZE * 2), %edx
|
||
- VPCMPEQ (%rdi), %ymm0, %ymm1
|
||
+L(loop_4x_vec_end):
|
||
+# endif
|
||
+ /* rawmemchr will fall through into this if match was found in
|
||
+ loop. */
|
||
+
|
||
vpmovmskb %ymm1, %eax
|
||
testl %eax, %eax
|
||
+ jnz L(last_vec_x1_return)
|
||
|
||
- jnz L(first_vec_x0_check)
|
||
- subl $VEC_SIZE, %edx
|
||
- jle L(zero)
|
||
-
|
||
- VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1
|
||
- vpmovmskb %ymm1, %eax
|
||
+ vpmovmskb %ymm2, %eax
|
||
testl %eax, %eax
|
||
- jnz L(first_vec_x1_check)
|
||
- xorl %eax, %eax
|
||
- VZEROUPPER
|
||
- ret
|
||
+ jnz L(last_vec_x2_return)
|
||
|
||
- .p2align 4
|
||
-L(first_vec_x0_check):
|
||
- tzcntl %eax, %eax
|
||
- /* Check the end of data. */
|
||
- cmpq %rax, %rdx
|
||
- jbe L(zero)
|
||
+ vpmovmskb %ymm3, %eax
|
||
+ /* Combine VEC3 matches (eax) with VEC4 matches (ecx). */
|
||
+ salq $32, %rcx
|
||
+ orq %rcx, %rax
|
||
+ tzcntq %rax, %rax
|
||
+# ifdef USE_AS_RAWMEMCHR
|
||
+ subq $(VEC_SIZE * 2 - 1), %rdi
|
||
+# else
|
||
+ subq $-(VEC_SIZE * 2 + 1), %rdi
|
||
+# endif
|
||
addq %rdi, %rax
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
+# ifndef USE_AS_RAWMEMCHR
|
||
|
||
.p2align 4
|
||
L(first_vec_x1_check):
|
||
tzcntl %eax, %eax
|
||
- /* Check the end of data. */
|
||
- cmpq %rax, %rdx
|
||
- jbe L(zero)
|
||
- addq $VEC_SIZE, %rax
|
||
+ /* Adjust length. */
|
||
+ subl $-(VEC_SIZE * 4), %edx
|
||
+ /* Check if match within remaining length. */
|
||
+ cmpl %eax, %edx
|
||
+ jbe L(set_zero_end)
|
||
+ incq %rdi
|
||
addq %rdi, %rax
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
+ .p2align 4
|
||
+L(set_zero_end):
|
||
+ xorl %eax, %eax
|
||
+ VZEROUPPER_RETURN
|
||
+# endif
|
||
|
||
.p2align 4
|
||
-L(first_vec_x2_check):
|
||
+L(last_vec_x1_return):
|
||
tzcntl %eax, %eax
|
||
- /* Check the end of data. */
|
||
- cmpq %rax, %rdx
|
||
- jbe L(zero)
|
||
- addq $(VEC_SIZE * 2), %rax
|
||
+# ifdef USE_AS_RAWMEMCHR
|
||
+ subq $(VEC_SIZE * 4 - 1), %rdi
|
||
+# else
|
||
+ incq %rdi
|
||
+# endif
|
||
addq %rdi, %rax
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
-L(first_vec_x3_check):
|
||
+L(last_vec_x2_return):
|
||
tzcntl %eax, %eax
|
||
- /* Check the end of data. */
|
||
- cmpq %rax, %rdx
|
||
- jbe L(zero)
|
||
- addq $(VEC_SIZE * 3), %rax
|
||
+# ifdef USE_AS_RAWMEMCHR
|
||
+ subq $(VEC_SIZE * 3 - 1), %rdi
|
||
+# else
|
||
+ subq $-(VEC_SIZE + 1), %rdi
|
||
+# endif
|
||
addq %rdi, %rax
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
+# ifndef USE_AS_RAWMEMCHR
|
||
.p2align 4
|
||
-L(zero):
|
||
- VZEROUPPER
|
||
-L(null):
|
||
- xorl %eax, %eax
|
||
- ret
|
||
-# endif
|
||
+L(last_4x_vec_or_less_cmpeq):
|
||
+ VPCMPEQ (VEC_SIZE * 4 + 1)(%rdi), %ymm0, %ymm1
|
||
+ vpmovmskb %ymm1, %eax
|
||
+# ifdef USE_AS_WMEMCHR
|
||
+ /* NB: Multiply length by 4 to get byte count. */
|
||
+ sall $2, %edx
|
||
+# endif
|
||
+ subq $-(VEC_SIZE * 4), %rdi
|
||
+ /* Check first VEC regardless. */
|
||
+ testl %eax, %eax
|
||
+ jnz L(first_vec_x1_check)
|
||
|
||
+ /* If remaining length <= CHAR_PER_VEC * 2. */
|
||
+ addl $(VEC_SIZE * 2), %edx
|
||
+ jle L(last_2x_vec)
|
||
.p2align 4
|
||
-L(first_vec_x0):
|
||
- tzcntl %eax, %eax
|
||
- addq %rdi, %rax
|
||
- VZEROUPPER
|
||
- ret
|
||
+L(last_4x_vec):
|
||
+ VPCMPEQ (VEC_SIZE + 1)(%rdi), %ymm0, %ymm1
|
||
+ vpmovmskb %ymm1, %eax
|
||
+ testl %eax, %eax
|
||
+ jnz L(last_vec_x2_return)
|
||
|
||
- .p2align 4
|
||
-L(first_vec_x1):
|
||
- tzcntl %eax, %eax
|
||
- addq $VEC_SIZE, %rax
|
||
- addq %rdi, %rax
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VPCMPEQ (VEC_SIZE * 2 + 1)(%rdi), %ymm0, %ymm1
|
||
+ vpmovmskb %ymm1, %eax
|
||
|
||
- .p2align 4
|
||
-L(first_vec_x2):
|
||
+ /* Create mask for possible matches within remaining length. */
|
||
+ movq $-1, %rcx
|
||
+ bzhiq %rdx, %rcx, %rcx
|
||
+
|
||
+ /* Test matches in data against length match. */
|
||
+ andl %ecx, %eax
|
||
+ jnz L(last_vec_x3)
|
||
+
|
||
+ /* if remaining length <= VEC_SIZE * 3 (Note this is after
|
||
+ remaining length was found to be > VEC_SIZE * 2. */
|
||
+ subl $VEC_SIZE, %edx
|
||
+ jbe L(zero_end2)
|
||
+
|
||
+ VPCMPEQ (VEC_SIZE * 3 + 1)(%rdi), %ymm0, %ymm1
|
||
+ vpmovmskb %ymm1, %eax
|
||
+ /* Shift remaining length mask for last VEC. */
|
||
+ shrq $32, %rcx
|
||
+ andl %ecx, %eax
|
||
+ jz L(zero_end2)
|
||
tzcntl %eax, %eax
|
||
- addq $(VEC_SIZE * 2), %rax
|
||
+ addq $(VEC_SIZE * 3 + 1), %rdi
|
||
addq %rdi, %rax
|
||
- VZEROUPPER
|
||
- ret
|
||
+L(zero_end2):
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
-L(4x_vec_end):
|
||
- vpmovmskb %ymm1, %eax
|
||
- testl %eax, %eax
|
||
- jnz L(first_vec_x0)
|
||
- vpmovmskb %ymm2, %eax
|
||
- testl %eax, %eax
|
||
- jnz L(first_vec_x1)
|
||
- vpmovmskb %ymm3, %eax
|
||
- testl %eax, %eax
|
||
- jnz L(first_vec_x2)
|
||
- vpmovmskb %ymm4, %eax
|
||
- testl %eax, %eax
|
||
-L(first_vec_x3):
|
||
+L(last_vec_x3):
|
||
tzcntl %eax, %eax
|
||
- addq $(VEC_SIZE * 3), %rax
|
||
+ subq $-(VEC_SIZE * 2 + 1), %rdi
|
||
addq %rdi, %rax
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
+# endif
|
||
|
||
END (MEMCHR)
|
||
#endif
|
||
diff --git a/sysdeps/x86_64/multiarch/memchr-evex.S b/sysdeps/x86_64/multiarch/memchr-evex.S
|
||
new file mode 100644
|
||
index 0000000000..f3fdad4fda
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/memchr-evex.S
|
||
@@ -0,0 +1,478 @@
|
||
+/* memchr/wmemchr optimized with 256-bit EVEX instructions.
|
||
+ Copyright (C) 2021 Free Software Foundation, Inc.
|
||
+ This file is part of the GNU C Library.
|
||
+
|
||
+ The GNU C Library is free software; you can redistribute it and/or
|
||
+ modify it under the terms of the GNU Lesser General Public
|
||
+ License as published by the Free Software Foundation; either
|
||
+ version 2.1 of the License, or (at your option) any later version.
|
||
+
|
||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
+ Lesser General Public License for more details.
|
||
+
|
||
+ You should have received a copy of the GNU Lesser General Public
|
||
+ License along with the GNU C Library; if not, see
|
||
+ <https://www.gnu.org/licenses/>. */
|
||
+
|
||
+#if IS_IN (libc)
|
||
+
|
||
+# include <sysdep.h>
|
||
+
|
||
+# ifndef MEMCHR
|
||
+# define MEMCHR __memchr_evex
|
||
+# endif
|
||
+
|
||
+# ifdef USE_AS_WMEMCHR
|
||
+# define VPBROADCAST vpbroadcastd
|
||
+# define VPMINU vpminud
|
||
+# define VPCMP vpcmpd
|
||
+# define VPCMPEQ vpcmpeqd
|
||
+# define CHAR_SIZE 4
|
||
+# else
|
||
+# define VPBROADCAST vpbroadcastb
|
||
+# define VPMINU vpminub
|
||
+# define VPCMP vpcmpb
|
||
+# define VPCMPEQ vpcmpeqb
|
||
+# define CHAR_SIZE 1
|
||
+# endif
|
||
+
|
||
+# ifdef USE_AS_RAWMEMCHR
|
||
+# define RAW_PTR_REG rcx
|
||
+# define ALGN_PTR_REG rdi
|
||
+# else
|
||
+# define RAW_PTR_REG rdi
|
||
+# define ALGN_PTR_REG rcx
|
||
+# endif
|
||
+
|
||
+# define XMMZERO xmm23
|
||
+# define YMMZERO ymm23
|
||
+# define XMMMATCH xmm16
|
||
+# define YMMMATCH ymm16
|
||
+# define YMM1 ymm17
|
||
+# define YMM2 ymm18
|
||
+# define YMM3 ymm19
|
||
+# define YMM4 ymm20
|
||
+# define YMM5 ymm21
|
||
+# define YMM6 ymm22
|
||
+
|
||
+# define VEC_SIZE 32
|
||
+# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE)
|
||
+# define PAGE_SIZE 4096
|
||
+
|
||
+ .section .text.evex,"ax",@progbits
|
||
+ENTRY (MEMCHR)
|
||
+# ifndef USE_AS_RAWMEMCHR
|
||
+ /* Check for zero length. */
|
||
+ test %RDX_LP, %RDX_LP
|
||
+ jz L(zero)
|
||
+
|
||
+# ifdef __ILP32__
|
||
+ /* Clear the upper 32 bits. */
|
||
+ movl %edx, %edx
|
||
+# endif
|
||
+# endif
|
||
+ /* Broadcast CHAR to YMMMATCH. */
|
||
+ VPBROADCAST %esi, %YMMMATCH
|
||
+ /* Check if we may cross page boundary with one vector load. */
|
||
+ movl %edi, %eax
|
||
+ andl $(PAGE_SIZE - 1), %eax
|
||
+ cmpl $(PAGE_SIZE - VEC_SIZE), %eax
|
||
+ ja L(cross_page_boundary)
|
||
+
|
||
+ /* Check the first VEC_SIZE bytes. */
|
||
+ VPCMP $0, (%rdi), %YMMMATCH, %k0
|
||
+ kmovd %k0, %eax
|
||
+# ifndef USE_AS_RAWMEMCHR
|
||
+ /* If length < CHAR_PER_VEC handle special. */
|
||
+ cmpq $CHAR_PER_VEC, %rdx
|
||
+ jbe L(first_vec_x0)
|
||
+# endif
|
||
+ testl %eax, %eax
|
||
+ jz L(aligned_more)
|
||
+ tzcntl %eax, %eax
|
||
+# ifdef USE_AS_WMEMCHR
|
||
+ /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */
|
||
+ leaq (%rdi, %rax, CHAR_SIZE), %rax
|
||
+# else
|
||
+ addq %rdi, %rax
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+# ifndef USE_AS_RAWMEMCHR
|
||
+L(zero):
|
||
+ xorl %eax, %eax
|
||
+ ret
|
||
+
|
||
+ .p2align 5
|
||
+L(first_vec_x0):
|
||
+ /* Check if first match was before length. */
|
||
+ tzcntl %eax, %eax
|
||
+ xorl %ecx, %ecx
|
||
+ cmpl %eax, %edx
|
||
+ leaq (%rdi, %rax, CHAR_SIZE), %rax
|
||
+ cmovle %rcx, %rax
|
||
+ ret
|
||
+# else
|
||
+ /* NB: first_vec_x0 is 17 bytes which will leave
|
||
+ cross_page_boundary (which is relatively cold) close enough
|
||
+ to ideal alignment. So only realign L(cross_page_boundary) if
|
||
+ rawmemchr. */
|
||
+ .p2align 4
|
||
+# endif
|
||
+L(cross_page_boundary):
|
||
+ /* Save pointer before aligning as its original value is
|
||
+ necessary for computer return address if byte is found or
|
||
+ adjusting length if it is not and this is memchr. */
|
||
+ movq %rdi, %rcx
|
||
+ /* Align data to VEC_SIZE. ALGN_PTR_REG is rcx for memchr and rdi
|
||
+ for rawmemchr. */
|
||
+ andq $-VEC_SIZE, %ALGN_PTR_REG
|
||
+ VPCMP $0, (%ALGN_PTR_REG), %YMMMATCH, %k0
|
||
+ kmovd %k0, %r8d
|
||
+# ifdef USE_AS_WMEMCHR
|
||
+ /* NB: Divide shift count by 4 since each bit in K0 represent 4
|
||
+ bytes. */
|
||
+ sarl $2, %eax
|
||
+# endif
|
||
+# ifndef USE_AS_RAWMEMCHR
|
||
+ movl $(PAGE_SIZE / CHAR_SIZE), %esi
|
||
+ subl %eax, %esi
|
||
+# endif
|
||
+# ifdef USE_AS_WMEMCHR
|
||
+ andl $(CHAR_PER_VEC - 1), %eax
|
||
+# endif
|
||
+ /* Remove the leading bytes. */
|
||
+ sarxl %eax, %r8d, %eax
|
||
+# ifndef USE_AS_RAWMEMCHR
|
||
+ /* Check the end of data. */
|
||
+ cmpq %rsi, %rdx
|
||
+ jbe L(first_vec_x0)
|
||
+# endif
|
||
+ testl %eax, %eax
|
||
+ jz L(cross_page_continue)
|
||
+ tzcntl %eax, %eax
|
||
+# ifdef USE_AS_WMEMCHR
|
||
+ /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */
|
||
+ leaq (%RAW_PTR_REG, %rax, CHAR_SIZE), %rax
|
||
+# else
|
||
+ addq %RAW_PTR_REG, %rax
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(first_vec_x1):
|
||
+ tzcntl %eax, %eax
|
||
+ leaq VEC_SIZE(%rdi, %rax, CHAR_SIZE), %rax
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(first_vec_x2):
|
||
+ tzcntl %eax, %eax
|
||
+ leaq (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(first_vec_x3):
|
||
+ tzcntl %eax, %eax
|
||
+ leaq (VEC_SIZE * 3)(%rdi, %rax, CHAR_SIZE), %rax
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(first_vec_x4):
|
||
+ tzcntl %eax, %eax
|
||
+ leaq (VEC_SIZE * 4)(%rdi, %rax, CHAR_SIZE), %rax
|
||
+ ret
|
||
+
|
||
+ .p2align 5
|
||
+L(aligned_more):
|
||
+ /* Check the first 4 * VEC_SIZE. Only one VEC_SIZE at a time
|
||
+ since data is only aligned to VEC_SIZE. */
|
||
+
|
||
+# ifndef USE_AS_RAWMEMCHR
|
||
+ /* Align data to VEC_SIZE. */
|
||
+L(cross_page_continue):
|
||
+ xorl %ecx, %ecx
|
||
+ subl %edi, %ecx
|
||
+ andq $-VEC_SIZE, %rdi
|
||
+ /* esi is for adjusting length to see if near the end. */
|
||
+ leal (VEC_SIZE * 5)(%rdi, %rcx), %esi
|
||
+# ifdef USE_AS_WMEMCHR
|
||
+ /* NB: Divide bytes by 4 to get the wchar_t count. */
|
||
+ sarl $2, %esi
|
||
+# endif
|
||
+# else
|
||
+ andq $-VEC_SIZE, %rdi
|
||
+L(cross_page_continue):
|
||
+# endif
|
||
+ /* Load first VEC regardless. */
|
||
+ VPCMP $0, (VEC_SIZE)(%rdi), %YMMMATCH, %k0
|
||
+ kmovd %k0, %eax
|
||
+# ifndef USE_AS_RAWMEMCHR
|
||
+ /* Adjust length. If near end handle specially. */
|
||
+ subq %rsi, %rdx
|
||
+ jbe L(last_4x_vec_or_less)
|
||
+# endif
|
||
+ testl %eax, %eax
|
||
+ jnz L(first_vec_x1)
|
||
+
|
||
+ VPCMP $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k0
|
||
+ kmovd %k0, %eax
|
||
+ testl %eax, %eax
|
||
+ jnz L(first_vec_x2)
|
||
+
|
||
+ VPCMP $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k0
|
||
+ kmovd %k0, %eax
|
||
+ testl %eax, %eax
|
||
+ jnz L(first_vec_x3)
|
||
+
|
||
+ VPCMP $0, (VEC_SIZE * 4)(%rdi), %YMMMATCH, %k0
|
||
+ kmovd %k0, %eax
|
||
+ testl %eax, %eax
|
||
+ jnz L(first_vec_x4)
|
||
+
|
||
+
|
||
+# ifndef USE_AS_RAWMEMCHR
|
||
+ /* Check if at last CHAR_PER_VEC * 4 length. */
|
||
+ subq $(CHAR_PER_VEC * 4), %rdx
|
||
+ jbe L(last_4x_vec_or_less_cmpeq)
|
||
+ addq $VEC_SIZE, %rdi
|
||
+
|
||
+ /* Align data to VEC_SIZE * 4 for the loop and readjust length.
|
||
+ */
|
||
+# ifdef USE_AS_WMEMCHR
|
||
+ movl %edi, %ecx
|
||
+ andq $-(4 * VEC_SIZE), %rdi
|
||
+ andl $(VEC_SIZE * 4 - 1), %ecx
|
||
+ /* NB: Divide bytes by 4 to get the wchar_t count. */
|
||
+ sarl $2, %ecx
|
||
+ addq %rcx, %rdx
|
||
+# else
|
||
+ addq %rdi, %rdx
|
||
+ andq $-(4 * VEC_SIZE), %rdi
|
||
+ subq %rdi, %rdx
|
||
+# endif
|
||
+# else
|
||
+ addq $VEC_SIZE, %rdi
|
||
+ andq $-(4 * VEC_SIZE), %rdi
|
||
+# endif
|
||
+
|
||
+ vpxorq %XMMZERO, %XMMZERO, %XMMZERO
|
||
+
|
||
+ /* Compare 4 * VEC at a time forward. */
|
||
+ .p2align 4
|
||
+L(loop_4x_vec):
|
||
+ /* It would be possible to save some instructions using 4x VPCMP
|
||
+ but bottleneck on port 5 makes it not woth it. */
|
||
+ VPCMP $4, (VEC_SIZE * 4)(%rdi), %YMMMATCH, %k1
|
||
+ /* xor will set bytes match esi to zero. */
|
||
+ vpxorq (VEC_SIZE * 5)(%rdi), %YMMMATCH, %YMM2
|
||
+ vpxorq (VEC_SIZE * 6)(%rdi), %YMMMATCH, %YMM3
|
||
+ VPCMP $0, (VEC_SIZE * 7)(%rdi), %YMMMATCH, %k3
|
||
+ /* Reduce VEC2 / VEC3 with min and VEC1 with zero mask. */
|
||
+ VPMINU %YMM2, %YMM3, %YMM3{%k1}{z}
|
||
+ VPCMP $0, %YMM3, %YMMZERO, %k2
|
||
+# ifdef USE_AS_RAWMEMCHR
|
||
+ subq $-(VEC_SIZE * 4), %rdi
|
||
+ kortestd %k2, %k3
|
||
+ jz L(loop_4x_vec)
|
||
+# else
|
||
+ kortestd %k2, %k3
|
||
+ jnz L(loop_4x_vec_end)
|
||
+
|
||
+ subq $-(VEC_SIZE * 4), %rdi
|
||
+
|
||
+ subq $(CHAR_PER_VEC * 4), %rdx
|
||
+ ja L(loop_4x_vec)
|
||
+
|
||
+ /* Fall through into less than 4 remaining vectors of length case.
|
||
+ */
|
||
+ VPCMP $0, (VEC_SIZE * 4)(%rdi), %YMMMATCH, %k0
|
||
+ kmovd %k0, %eax
|
||
+ addq $(VEC_SIZE * 3), %rdi
|
||
+ .p2align 4
|
||
+L(last_4x_vec_or_less):
|
||
+ /* Check if first VEC contained match. */
|
||
+ testl %eax, %eax
|
||
+ jnz L(first_vec_x1_check)
|
||
+
|
||
+ /* If remaining length > CHAR_PER_VEC * 2. */
|
||
+ addl $(CHAR_PER_VEC * 2), %edx
|
||
+ jg L(last_4x_vec)
|
||
+
|
||
+L(last_2x_vec):
|
||
+ /* If remaining length < CHAR_PER_VEC. */
|
||
+ addl $CHAR_PER_VEC, %edx
|
||
+ jle L(zero_end)
|
||
+
|
||
+ /* Check VEC2 and compare any match with remaining length. */
|
||
+ VPCMP $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k0
|
||
+ kmovd %k0, %eax
|
||
+ tzcntl %eax, %eax
|
||
+ cmpl %eax, %edx
|
||
+ jbe L(set_zero_end)
|
||
+ leaq (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax
|
||
+L(zero_end):
|
||
+ ret
|
||
+
|
||
+
|
||
+ .p2align 4
|
||
+L(first_vec_x1_check):
|
||
+ tzcntl %eax, %eax
|
||
+ /* Adjust length. */
|
||
+ subl $-(CHAR_PER_VEC * 4), %edx
|
||
+ /* Check if match within remaining length. */
|
||
+ cmpl %eax, %edx
|
||
+ jbe L(set_zero_end)
|
||
+ /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */
|
||
+ leaq VEC_SIZE(%rdi, %rax, CHAR_SIZE), %rax
|
||
+ ret
|
||
+L(set_zero_end):
|
||
+ xorl %eax, %eax
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(loop_4x_vec_end):
|
||
+# endif
|
||
+ /* rawmemchr will fall through into this if match was found in
|
||
+ loop. */
|
||
+
|
||
+ /* k1 has not of matches with VEC1. */
|
||
+ kmovd %k1, %eax
|
||
+# ifdef USE_AS_WMEMCHR
|
||
+ subl $((1 << CHAR_PER_VEC) - 1), %eax
|
||
+# else
|
||
+ incl %eax
|
||
+# endif
|
||
+ jnz L(last_vec_x1_return)
|
||
+
|
||
+ VPCMP $0, %YMM2, %YMMZERO, %k0
|
||
+ kmovd %k0, %eax
|
||
+ testl %eax, %eax
|
||
+ jnz L(last_vec_x2_return)
|
||
+
|
||
+ kmovd %k2, %eax
|
||
+ testl %eax, %eax
|
||
+ jnz L(last_vec_x3_return)
|
||
+
|
||
+ kmovd %k3, %eax
|
||
+ tzcntl %eax, %eax
|
||
+# ifdef USE_AS_RAWMEMCHR
|
||
+ leaq (VEC_SIZE * 3)(%rdi, %rax, CHAR_SIZE), %rax
|
||
+# else
|
||
+ leaq (VEC_SIZE * 7)(%rdi, %rax, CHAR_SIZE), %rax
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(last_vec_x1_return):
|
||
+ tzcntl %eax, %eax
|
||
+# ifdef USE_AS_RAWMEMCHR
|
||
+# ifdef USE_AS_WMEMCHR
|
||
+ /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */
|
||
+ leaq (%rdi, %rax, CHAR_SIZE), %rax
|
||
+# else
|
||
+ addq %rdi, %rax
|
||
+# endif
|
||
+# else
|
||
+ /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */
|
||
+ leaq (VEC_SIZE * 4)(%rdi, %rax, CHAR_SIZE), %rax
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(last_vec_x2_return):
|
||
+ tzcntl %eax, %eax
|
||
+# ifdef USE_AS_RAWMEMCHR
|
||
+ /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */
|
||
+ leaq VEC_SIZE(%rdi, %rax, CHAR_SIZE), %rax
|
||
+# else
|
||
+ /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */
|
||
+ leaq (VEC_SIZE * 5)(%rdi, %rax, CHAR_SIZE), %rax
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(last_vec_x3_return):
|
||
+ tzcntl %eax, %eax
|
||
+# ifdef USE_AS_RAWMEMCHR
|
||
+ /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */
|
||
+ leaq (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax
|
||
+# else
|
||
+ /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */
|
||
+ leaq (VEC_SIZE * 6)(%rdi, %rax, CHAR_SIZE), %rax
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+
|
||
+# ifndef USE_AS_RAWMEMCHR
|
||
+L(last_4x_vec_or_less_cmpeq):
|
||
+ VPCMP $0, (VEC_SIZE * 5)(%rdi), %YMMMATCH, %k0
|
||
+ kmovd %k0, %eax
|
||
+ subq $-(VEC_SIZE * 4), %rdi
|
||
+ /* Check first VEC regardless. */
|
||
+ testl %eax, %eax
|
||
+ jnz L(first_vec_x1_check)
|
||
+
|
||
+ /* If remaining length <= CHAR_PER_VEC * 2. */
|
||
+ addl $(CHAR_PER_VEC * 2), %edx
|
||
+ jle L(last_2x_vec)
|
||
+
|
||
+ .p2align 4
|
||
+L(last_4x_vec):
|
||
+ VPCMP $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k0
|
||
+ kmovd %k0, %eax
|
||
+ testl %eax, %eax
|
||
+ jnz L(last_vec_x2)
|
||
+
|
||
+
|
||
+ VPCMP $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k0
|
||
+ kmovd %k0, %eax
|
||
+ /* Create mask for possible matches within remaining length. */
|
||
+# ifdef USE_AS_WMEMCHR
|
||
+ movl $((1 << (CHAR_PER_VEC * 2)) - 1), %ecx
|
||
+ bzhil %edx, %ecx, %ecx
|
||
+# else
|
||
+ movq $-1, %rcx
|
||
+ bzhiq %rdx, %rcx, %rcx
|
||
+# endif
|
||
+ /* Test matches in data against length match. */
|
||
+ andl %ecx, %eax
|
||
+ jnz L(last_vec_x3)
|
||
+
|
||
+ /* if remaining length <= CHAR_PER_VEC * 3 (Note this is after
|
||
+ remaining length was found to be > CHAR_PER_VEC * 2. */
|
||
+ subl $CHAR_PER_VEC, %edx
|
||
+ jbe L(zero_end2)
|
||
+
|
||
+
|
||
+ VPCMP $0, (VEC_SIZE * 4)(%rdi), %YMMMATCH, %k0
|
||
+ kmovd %k0, %eax
|
||
+ /* Shift remaining length mask for last VEC. */
|
||
+# ifdef USE_AS_WMEMCHR
|
||
+ shrl $CHAR_PER_VEC, %ecx
|
||
+# else
|
||
+ shrq $CHAR_PER_VEC, %rcx
|
||
+# endif
|
||
+ andl %ecx, %eax
|
||
+ jz L(zero_end2)
|
||
+ tzcntl %eax, %eax
|
||
+ leaq (VEC_SIZE * 4)(%rdi, %rax, CHAR_SIZE), %rax
|
||
+L(zero_end2):
|
||
+ ret
|
||
+
|
||
+L(last_vec_x2):
|
||
+ tzcntl %eax, %eax
|
||
+ leaq (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(last_vec_x3):
|
||
+ tzcntl %eax, %eax
|
||
+ leaq (VEC_SIZE * 3)(%rdi, %rax, CHAR_SIZE), %rax
|
||
+ ret
|
||
+# endif
|
||
+
|
||
+END (MEMCHR)
|
||
+#endif
|
||
diff --git a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe-rtm.S b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe-rtm.S
|
||
new file mode 100644
|
||
index 0000000000..cf4eff5d4a
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe-rtm.S
|
||
@@ -0,0 +1,12 @@
|
||
+#ifndef MEMCMP
|
||
+# define MEMCMP __memcmp_avx2_movbe_rtm
|
||
+#endif
|
||
+
|
||
+#define ZERO_UPPER_VEC_REGISTERS_RETURN \
|
||
+ ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
|
||
+
|
||
+#define VZEROUPPER_RETURN jmp L(return_vzeroupper)
|
||
+
|
||
+#define SECTION(p) p##.avx.rtm
|
||
+
|
||
+#include "memcmp-avx2-movbe.S"
|
||
diff --git a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S
|
||
index 67fc575b59..87f9478eaf 100644
|
||
--- a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S
|
||
+++ b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S
|
||
@@ -47,6 +47,10 @@
|
||
# define VZEROUPPER vzeroupper
|
||
# endif
|
||
|
||
+# ifndef SECTION
|
||
+# define SECTION(p) p##.avx
|
||
+# endif
|
||
+
|
||
# define VEC_SIZE 32
|
||
# define VEC_MASK ((1 << VEC_SIZE) - 1)
|
||
|
||
@@ -55,7 +59,7 @@
|
||
memcmp has to use UNSIGNED comparison for elemnts.
|
||
*/
|
||
|
||
- .section .text.avx,"ax",@progbits
|
||
+ .section SECTION(.text),"ax",@progbits
|
||
ENTRY (MEMCMP)
|
||
# ifdef USE_AS_WMEMCMP
|
||
shl $2, %RDX_LP
|
||
@@ -123,8 +127,8 @@ ENTRY (MEMCMP)
|
||
vptest %ymm0, %ymm5
|
||
jnc L(4x_vec_end)
|
||
xorl %eax, %eax
|
||
- VZEROUPPER
|
||
- ret
|
||
+L(return_vzeroupper):
|
||
+ ZERO_UPPER_VEC_REGISTERS_RETURN
|
||
|
||
.p2align 4
|
||
L(last_2x_vec):
|
||
@@ -144,8 +148,7 @@ L(last_vec):
|
||
vpmovmskb %ymm2, %eax
|
||
subl $VEC_MASK, %eax
|
||
jnz L(first_vec)
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
L(first_vec):
|
||
@@ -164,8 +167,7 @@ L(wmemcmp_return):
|
||
movzbl (%rsi, %rcx), %edx
|
||
sub %edx, %eax
|
||
# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
# ifdef USE_AS_WMEMCMP
|
||
.p2align 4
|
||
@@ -367,8 +369,7 @@ L(last_4x_vec):
|
||
vpmovmskb %ymm2, %eax
|
||
subl $VEC_MASK, %eax
|
||
jnz L(first_vec)
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
L(4x_vec_end):
|
||
@@ -394,8 +395,7 @@ L(4x_vec_end):
|
||
movzbl (VEC_SIZE * 3)(%rsi, %rcx), %edx
|
||
sub %edx, %eax
|
||
# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
L(first_vec_x1):
|
||
@@ -410,8 +410,7 @@ L(first_vec_x1):
|
||
movzbl VEC_SIZE(%rsi, %rcx), %edx
|
||
sub %edx, %eax
|
||
# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
L(first_vec_x2):
|
||
@@ -426,7 +425,6 @@ L(first_vec_x2):
|
||
movzbl (VEC_SIZE * 2)(%rsi, %rcx), %edx
|
||
sub %edx, %eax
|
||
# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
END (MEMCMP)
|
||
#endif
|
||
diff --git a/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S b/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S
|
||
new file mode 100644
|
||
index 0000000000..9c093972e1
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S
|
||
@@ -0,0 +1,440 @@
|
||
+/* memcmp/wmemcmp optimized with 256-bit EVEX instructions.
|
||
+ Copyright (C) 2021 Free Software Foundation, Inc.
|
||
+ This file is part of the GNU C Library.
|
||
+
|
||
+ The GNU C Library is free software; you can redistribute it and/or
|
||
+ modify it under the terms of the GNU Lesser General Public
|
||
+ License as published by the Free Software Foundation; either
|
||
+ version 2.1 of the License, or (at your option) any later version.
|
||
+
|
||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
+ Lesser General Public License for more details.
|
||
+
|
||
+ You should have received a copy of the GNU Lesser General Public
|
||
+ License along with the GNU C Library; if not, see
|
||
+ <https://www.gnu.org/licenses/>. */
|
||
+
|
||
+#if IS_IN (libc)
|
||
+
|
||
+/* memcmp/wmemcmp is implemented as:
|
||
+ 1. For size from 2 to 7 bytes, load as big endian with movbe and bswap
|
||
+ to avoid branches.
|
||
+ 2. Use overlapping compare to avoid branch.
|
||
+ 3. Use vector compare when size >= 4 bytes for memcmp or size >= 8
|
||
+ bytes for wmemcmp.
|
||
+ 4. If size is 8 * VEC_SIZE or less, unroll the loop.
|
||
+ 5. Compare 4 * VEC_SIZE at a time with the aligned first memory
|
||
+ area.
|
||
+ 6. Use 2 vector compares when size is 2 * VEC_SIZE or less.
|
||
+ 7. Use 4 vector compares when size is 4 * VEC_SIZE or less.
|
||
+ 8. Use 8 vector compares when size is 8 * VEC_SIZE or less. */
|
||
+
|
||
+# include <sysdep.h>
|
||
+
|
||
+# ifndef MEMCMP
|
||
+# define MEMCMP __memcmp_evex_movbe
|
||
+# endif
|
||
+
|
||
+# define VMOVU vmovdqu64
|
||
+
|
||
+# ifdef USE_AS_WMEMCMP
|
||
+# define VPCMPEQ vpcmpeqd
|
||
+# else
|
||
+# define VPCMPEQ vpcmpeqb
|
||
+# endif
|
||
+
|
||
+# define XMM1 xmm17
|
||
+# define XMM2 xmm18
|
||
+# define YMM1 ymm17
|
||
+# define YMM2 ymm18
|
||
+# define YMM3 ymm19
|
||
+# define YMM4 ymm20
|
||
+# define YMM5 ymm21
|
||
+# define YMM6 ymm22
|
||
+
|
||
+# define VEC_SIZE 32
|
||
+# ifdef USE_AS_WMEMCMP
|
||
+# define VEC_MASK 0xff
|
||
+# define XMM_MASK 0xf
|
||
+# else
|
||
+# define VEC_MASK 0xffffffff
|
||
+# define XMM_MASK 0xffff
|
||
+# endif
|
||
+
|
||
+/* Warning!
|
||
+ wmemcmp has to use SIGNED comparison for elements.
|
||
+ memcmp has to use UNSIGNED comparison for elemnts.
|
||
+*/
|
||
+
|
||
+ .section .text.evex,"ax",@progbits
|
||
+ENTRY (MEMCMP)
|
||
+# ifdef USE_AS_WMEMCMP
|
||
+ shl $2, %RDX_LP
|
||
+# elif defined __ILP32__
|
||
+ /* Clear the upper 32 bits. */
|
||
+ movl %edx, %edx
|
||
+# endif
|
||
+ cmp $VEC_SIZE, %RDX_LP
|
||
+ jb L(less_vec)
|
||
+
|
||
+ /* From VEC to 2 * VEC. No branch when size == VEC_SIZE. */
|
||
+ VMOVU (%rsi), %YMM2
|
||
+ VPCMPEQ (%rdi), %YMM2, %k1
|
||
+ kmovd %k1, %eax
|
||
+ subl $VEC_MASK, %eax
|
||
+ jnz L(first_vec)
|
||
+
|
||
+ cmpq $(VEC_SIZE * 2), %rdx
|
||
+ jbe L(last_vec)
|
||
+
|
||
+ /* More than 2 * VEC. */
|
||
+ cmpq $(VEC_SIZE * 8), %rdx
|
||
+ ja L(more_8x_vec)
|
||
+ cmpq $(VEC_SIZE * 4), %rdx
|
||
+ jb L(last_4x_vec)
|
||
+
|
||
+ /* From 4 * VEC to 8 * VEC, inclusively. */
|
||
+ VMOVU (%rsi), %YMM1
|
||
+ VPCMPEQ (%rdi), %YMM1, %k1
|
||
+
|
||
+ VMOVU VEC_SIZE(%rsi), %YMM2
|
||
+ VPCMPEQ VEC_SIZE(%rdi), %YMM2, %k2
|
||
+
|
||
+ VMOVU (VEC_SIZE * 2)(%rsi), %YMM3
|
||
+ VPCMPEQ (VEC_SIZE * 2)(%rdi), %YMM3, %k3
|
||
+
|
||
+ VMOVU (VEC_SIZE * 3)(%rsi), %YMM4
|
||
+ VPCMPEQ (VEC_SIZE * 3)(%rdi), %YMM4, %k4
|
||
+
|
||
+ kandd %k1, %k2, %k5
|
||
+ kandd %k3, %k4, %k6
|
||
+ kandd %k5, %k6, %k6
|
||
+
|
||
+ kmovd %k6, %eax
|
||
+ cmpl $VEC_MASK, %eax
|
||
+ jne L(4x_vec_end)
|
||
+
|
||
+ leaq -(4 * VEC_SIZE)(%rdi, %rdx), %rdi
|
||
+ leaq -(4 * VEC_SIZE)(%rsi, %rdx), %rsi
|
||
+ VMOVU (%rsi), %YMM1
|
||
+ VPCMPEQ (%rdi), %YMM1, %k1
|
||
+
|
||
+ VMOVU VEC_SIZE(%rsi), %YMM2
|
||
+ VPCMPEQ VEC_SIZE(%rdi), %YMM2, %k2
|
||
+ kandd %k1, %k2, %k5
|
||
+
|
||
+ VMOVU (VEC_SIZE * 2)(%rsi), %YMM3
|
||
+ VPCMPEQ (VEC_SIZE * 2)(%rdi), %YMM3, %k3
|
||
+ kandd %k3, %k5, %k5
|
||
+
|
||
+ VMOVU (VEC_SIZE * 3)(%rsi), %YMM4
|
||
+ VPCMPEQ (VEC_SIZE * 3)(%rdi), %YMM4, %k4
|
||
+ kandd %k4, %k5, %k5
|
||
+
|
||
+ kmovd %k5, %eax
|
||
+ cmpl $VEC_MASK, %eax
|
||
+ jne L(4x_vec_end)
|
||
+ xorl %eax, %eax
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(last_2x_vec):
|
||
+ /* From VEC to 2 * VEC. No branch when size == VEC_SIZE. */
|
||
+ VMOVU (%rsi), %YMM2
|
||
+ VPCMPEQ (%rdi), %YMM2, %k2
|
||
+ kmovd %k2, %eax
|
||
+ subl $VEC_MASK, %eax
|
||
+ jnz L(first_vec)
|
||
+
|
||
+L(last_vec):
|
||
+ /* Use overlapping loads to avoid branches. */
|
||
+ leaq -VEC_SIZE(%rdi, %rdx), %rdi
|
||
+ leaq -VEC_SIZE(%rsi, %rdx), %rsi
|
||
+ VMOVU (%rsi), %YMM2
|
||
+ VPCMPEQ (%rdi), %YMM2, %k2
|
||
+ kmovd %k2, %eax
|
||
+ subl $VEC_MASK, %eax
|
||
+ jnz L(first_vec)
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(first_vec):
|
||
+ /* A byte or int32 is different within 16 or 32 bytes. */
|
||
+ tzcntl %eax, %ecx
|
||
+# ifdef USE_AS_WMEMCMP
|
||
+ xorl %eax, %eax
|
||
+ movl (%rdi, %rcx, 4), %edx
|
||
+ cmpl (%rsi, %rcx, 4), %edx
|
||
+L(wmemcmp_return):
|
||
+ setl %al
|
||
+ negl %eax
|
||
+ orl $1, %eax
|
||
+# else
|
||
+ movzbl (%rdi, %rcx), %eax
|
||
+ movzbl (%rsi, %rcx), %edx
|
||
+ sub %edx, %eax
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+# ifdef USE_AS_WMEMCMP
|
||
+ .p2align 4
|
||
+L(4):
|
||
+ xorl %eax, %eax
|
||
+ movl (%rdi), %edx
|
||
+ cmpl (%rsi), %edx
|
||
+ jne L(wmemcmp_return)
|
||
+ ret
|
||
+# else
|
||
+ .p2align 4
|
||
+L(between_4_7):
|
||
+ /* Load as big endian with overlapping movbe to avoid branches. */
|
||
+ movbe (%rdi), %eax
|
||
+ movbe (%rsi), %ecx
|
||
+ shlq $32, %rax
|
||
+ shlq $32, %rcx
|
||
+ movbe -4(%rdi, %rdx), %edi
|
||
+ movbe -4(%rsi, %rdx), %esi
|
||
+ orq %rdi, %rax
|
||
+ orq %rsi, %rcx
|
||
+ subq %rcx, %rax
|
||
+ je L(exit)
|
||
+ sbbl %eax, %eax
|
||
+ orl $1, %eax
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(exit):
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(between_2_3):
|
||
+ /* Load as big endian to avoid branches. */
|
||
+ movzwl (%rdi), %eax
|
||
+ movzwl (%rsi), %ecx
|
||
+ shll $8, %eax
|
||
+ shll $8, %ecx
|
||
+ bswap %eax
|
||
+ bswap %ecx
|
||
+ movb -1(%rdi, %rdx), %al
|
||
+ movb -1(%rsi, %rdx), %cl
|
||
+ /* Subtraction is okay because the upper 8 bits are zero. */
|
||
+ subl %ecx, %eax
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(1):
|
||
+ movzbl (%rdi), %eax
|
||
+ movzbl (%rsi), %ecx
|
||
+ subl %ecx, %eax
|
||
+ ret
|
||
+# endif
|
||
+
|
||
+ .p2align 4
|
||
+L(zero):
|
||
+ xorl %eax, %eax
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(less_vec):
|
||
+# ifdef USE_AS_WMEMCMP
|
||
+ /* It can only be 0, 4, 8, 12, 16, 20, 24, 28 bytes. */
|
||
+ cmpb $4, %dl
|
||
+ je L(4)
|
||
+ jb L(zero)
|
||
+# else
|
||
+ cmpb $1, %dl
|
||
+ je L(1)
|
||
+ jb L(zero)
|
||
+ cmpb $4, %dl
|
||
+ jb L(between_2_3)
|
||
+ cmpb $8, %dl
|
||
+ jb L(between_4_7)
|
||
+# endif
|
||
+ cmpb $16, %dl
|
||
+ jae L(between_16_31)
|
||
+ /* It is between 8 and 15 bytes. */
|
||
+ vmovq (%rdi), %XMM1
|
||
+ vmovq (%rsi), %XMM2
|
||
+ VPCMPEQ %XMM1, %XMM2, %k2
|
||
+ kmovw %k2, %eax
|
||
+ subl $XMM_MASK, %eax
|
||
+ jnz L(first_vec)
|
||
+ /* Use overlapping loads to avoid branches. */
|
||
+ leaq -8(%rdi, %rdx), %rdi
|
||
+ leaq -8(%rsi, %rdx), %rsi
|
||
+ vmovq (%rdi), %XMM1
|
||
+ vmovq (%rsi), %XMM2
|
||
+ VPCMPEQ %XMM1, %XMM2, %k2
|
||
+ kmovw %k2, %eax
|
||
+ subl $XMM_MASK, %eax
|
||
+ jnz L(first_vec)
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(between_16_31):
|
||
+ /* From 16 to 31 bytes. No branch when size == 16. */
|
||
+ VMOVU (%rsi), %XMM2
|
||
+ VPCMPEQ (%rdi), %XMM2, %k2
|
||
+ kmovw %k2, %eax
|
||
+ subl $XMM_MASK, %eax
|
||
+ jnz L(first_vec)
|
||
+
|
||
+ /* Use overlapping loads to avoid branches. */
|
||
+ leaq -16(%rdi, %rdx), %rdi
|
||
+ leaq -16(%rsi, %rdx), %rsi
|
||
+ VMOVU (%rsi), %XMM2
|
||
+ VPCMPEQ (%rdi), %XMM2, %k2
|
||
+ kmovw %k2, %eax
|
||
+ subl $XMM_MASK, %eax
|
||
+ jnz L(first_vec)
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(more_8x_vec):
|
||
+ /* More than 8 * VEC. Check the first VEC. */
|
||
+ VMOVU (%rsi), %YMM2
|
||
+ VPCMPEQ (%rdi), %YMM2, %k2
|
||
+ kmovd %k2, %eax
|
||
+ subl $VEC_MASK, %eax
|
||
+ jnz L(first_vec)
|
||
+
|
||
+ /* Align the first memory area for aligned loads in the loop.
|
||
+ Compute how much the first memory area is misaligned. */
|
||
+ movq %rdi, %rcx
|
||
+ andl $(VEC_SIZE - 1), %ecx
|
||
+ /* Get the negative of offset for alignment. */
|
||
+ subq $VEC_SIZE, %rcx
|
||
+ /* Adjust the second memory area. */
|
||
+ subq %rcx, %rsi
|
||
+ /* Adjust the first memory area which should be aligned now. */
|
||
+ subq %rcx, %rdi
|
||
+ /* Adjust length. */
|
||
+ addq %rcx, %rdx
|
||
+
|
||
+L(loop_4x_vec):
|
||
+ /* Compare 4 * VEC at a time forward. */
|
||
+ VMOVU (%rsi), %YMM1
|
||
+ VPCMPEQ (%rdi), %YMM1, %k1
|
||
+
|
||
+ VMOVU VEC_SIZE(%rsi), %YMM2
|
||
+ VPCMPEQ VEC_SIZE(%rdi), %YMM2, %k2
|
||
+ kandd %k2, %k1, %k5
|
||
+
|
||
+ VMOVU (VEC_SIZE * 2)(%rsi), %YMM3
|
||
+ VPCMPEQ (VEC_SIZE * 2)(%rdi), %YMM3, %k3
|
||
+ kandd %k3, %k5, %k5
|
||
+
|
||
+ VMOVU (VEC_SIZE * 3)(%rsi), %YMM4
|
||
+ VPCMPEQ (VEC_SIZE * 3)(%rdi), %YMM4, %k4
|
||
+ kandd %k4, %k5, %k5
|
||
+
|
||
+ kmovd %k5, %eax
|
||
+ cmpl $VEC_MASK, %eax
|
||
+ jne L(4x_vec_end)
|
||
+
|
||
+ addq $(VEC_SIZE * 4), %rdi
|
||
+ addq $(VEC_SIZE * 4), %rsi
|
||
+
|
||
+ subq $(VEC_SIZE * 4), %rdx
|
||
+ cmpq $(VEC_SIZE * 4), %rdx
|
||
+ jae L(loop_4x_vec)
|
||
+
|
||
+ /* Less than 4 * VEC. */
|
||
+ cmpq $VEC_SIZE, %rdx
|
||
+ jbe L(last_vec)
|
||
+ cmpq $(VEC_SIZE * 2), %rdx
|
||
+ jbe L(last_2x_vec)
|
||
+
|
||
+L(last_4x_vec):
|
||
+ /* From 2 * VEC to 4 * VEC. */
|
||
+ VMOVU (%rsi), %YMM2
|
||
+ VPCMPEQ (%rdi), %YMM2, %k2
|
||
+ kmovd %k2, %eax
|
||
+ subl $VEC_MASK, %eax
|
||
+ jnz L(first_vec)
|
||
+
|
||
+ addq $VEC_SIZE, %rdi
|
||
+ addq $VEC_SIZE, %rsi
|
||
+ VMOVU (%rsi), %YMM2
|
||
+ VPCMPEQ (%rdi), %YMM2, %k2
|
||
+ kmovd %k2, %eax
|
||
+ subl $VEC_MASK, %eax
|
||
+ jnz L(first_vec)
|
||
+
|
||
+ /* Use overlapping loads to avoid branches. */
|
||
+ leaq -(3 * VEC_SIZE)(%rdi, %rdx), %rdi
|
||
+ leaq -(3 * VEC_SIZE)(%rsi, %rdx), %rsi
|
||
+ VMOVU (%rsi), %YMM2
|
||
+ VPCMPEQ (%rdi), %YMM2, %k2
|
||
+ kmovd %k2, %eax
|
||
+ subl $VEC_MASK, %eax
|
||
+ jnz L(first_vec)
|
||
+
|
||
+ addq $VEC_SIZE, %rdi
|
||
+ addq $VEC_SIZE, %rsi
|
||
+ VMOVU (%rsi), %YMM2
|
||
+ VPCMPEQ (%rdi), %YMM2, %k2
|
||
+ kmovd %k2, %eax
|
||
+ subl $VEC_MASK, %eax
|
||
+ jnz L(first_vec)
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(4x_vec_end):
|
||
+ kmovd %k1, %eax
|
||
+ subl $VEC_MASK, %eax
|
||
+ jnz L(first_vec)
|
||
+ kmovd %k2, %eax
|
||
+ subl $VEC_MASK, %eax
|
||
+ jnz L(first_vec_x1)
|
||
+ kmovd %k3, %eax
|
||
+ subl $VEC_MASK, %eax
|
||
+ jnz L(first_vec_x2)
|
||
+ kmovd %k4, %eax
|
||
+ subl $VEC_MASK, %eax
|
||
+ tzcntl %eax, %ecx
|
||
+# ifdef USE_AS_WMEMCMP
|
||
+ xorl %eax, %eax
|
||
+ movl (VEC_SIZE * 3)(%rdi, %rcx, 4), %edx
|
||
+ cmpl (VEC_SIZE * 3)(%rsi, %rcx, 4), %edx
|
||
+ jmp L(wmemcmp_return)
|
||
+# else
|
||
+ movzbl (VEC_SIZE * 3)(%rdi, %rcx), %eax
|
||
+ movzbl (VEC_SIZE * 3)(%rsi, %rcx), %edx
|
||
+ sub %edx, %eax
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(first_vec_x1):
|
||
+ tzcntl %eax, %ecx
|
||
+# ifdef USE_AS_WMEMCMP
|
||
+ xorl %eax, %eax
|
||
+ movl VEC_SIZE(%rdi, %rcx, 4), %edx
|
||
+ cmpl VEC_SIZE(%rsi, %rcx, 4), %edx
|
||
+ jmp L(wmemcmp_return)
|
||
+# else
|
||
+ movzbl VEC_SIZE(%rdi, %rcx), %eax
|
||
+ movzbl VEC_SIZE(%rsi, %rcx), %edx
|
||
+ sub %edx, %eax
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(first_vec_x2):
|
||
+ tzcntl %eax, %ecx
|
||
+# ifdef USE_AS_WMEMCMP
|
||
+ xorl %eax, %eax
|
||
+ movl (VEC_SIZE * 2)(%rdi, %rcx, 4), %edx
|
||
+ cmpl (VEC_SIZE * 2)(%rsi, %rcx, 4), %edx
|
||
+ jmp L(wmemcmp_return)
|
||
+# else
|
||
+ movzbl (VEC_SIZE * 2)(%rdi, %rcx), %eax
|
||
+ movzbl (VEC_SIZE * 2)(%rsi, %rcx), %edx
|
||
+ sub %edx, %eax
|
||
+# endif
|
||
+ ret
|
||
+END (MEMCMP)
|
||
+#endif
|
||
diff --git a/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms-rtm.S b/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms-rtm.S
|
||
new file mode 100644
|
||
index 0000000000..1ec1962e86
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms-rtm.S
|
||
@@ -0,0 +1,17 @@
|
||
+#if IS_IN (libc)
|
||
+# define VEC_SIZE 32
|
||
+# define VEC(i) ymm##i
|
||
+# define VMOVNT vmovntdq
|
||
+# define VMOVU vmovdqu
|
||
+# define VMOVA vmovdqa
|
||
+
|
||
+# define ZERO_UPPER_VEC_REGISTERS_RETURN \
|
||
+ ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
|
||
+
|
||
+# define VZEROUPPER_RETURN jmp L(return)
|
||
+
|
||
+# define SECTION(p) p##.avx.rtm
|
||
+# define MEMMOVE_SYMBOL(p,s) p##_avx_##s##_rtm
|
||
+
|
||
+# include "memmove-vec-unaligned-erms.S"
|
||
+#endif
|
||
diff --git a/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S
|
||
index aac1515cf6..848848ab39 100644
|
||
--- a/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S
|
||
+++ b/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S
|
||
@@ -1,11 +1,32 @@
|
||
#if IS_IN (libc)
|
||
# define VEC_SIZE 64
|
||
-# define VEC(i) zmm##i
|
||
+# define XMM0 xmm16
|
||
+# define XMM1 xmm17
|
||
+# define YMM0 ymm16
|
||
+# define YMM1 ymm17
|
||
+# define VEC0 zmm16
|
||
+# define VEC1 zmm17
|
||
+# define VEC2 zmm18
|
||
+# define VEC3 zmm19
|
||
+# define VEC4 zmm20
|
||
+# define VEC5 zmm21
|
||
+# define VEC6 zmm22
|
||
+# define VEC7 zmm23
|
||
+# define VEC8 zmm24
|
||
+# define VEC9 zmm25
|
||
+# define VEC10 zmm26
|
||
+# define VEC11 zmm27
|
||
+# define VEC12 zmm28
|
||
+# define VEC13 zmm29
|
||
+# define VEC14 zmm30
|
||
+# define VEC15 zmm31
|
||
+# define VEC(i) VEC##i
|
||
# define VMOVNT vmovntdq
|
||
# define VMOVU vmovdqu64
|
||
# define VMOVA vmovdqa64
|
||
+# define VZEROUPPER
|
||
|
||
-# define SECTION(p) p##.avx512
|
||
+# define SECTION(p) p##.evex512
|
||
# define MEMMOVE_SYMBOL(p,s) p##_avx512_##s
|
||
|
||
# include "memmove-vec-unaligned-erms.S"
|
||
diff --git a/sysdeps/x86_64/multiarch/memmove-evex-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-evex-unaligned-erms.S
|
||
new file mode 100644
|
||
index 0000000000..0cbce8f944
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/memmove-evex-unaligned-erms.S
|
||
@@ -0,0 +1,33 @@
|
||
+#if IS_IN (libc)
|
||
+# define VEC_SIZE 32
|
||
+# define XMM0 xmm16
|
||
+# define XMM1 xmm17
|
||
+# define YMM0 ymm16
|
||
+# define YMM1 ymm17
|
||
+# define VEC0 ymm16
|
||
+# define VEC1 ymm17
|
||
+# define VEC2 ymm18
|
||
+# define VEC3 ymm19
|
||
+# define VEC4 ymm20
|
||
+# define VEC5 ymm21
|
||
+# define VEC6 ymm22
|
||
+# define VEC7 ymm23
|
||
+# define VEC8 ymm24
|
||
+# define VEC9 ymm25
|
||
+# define VEC10 ymm26
|
||
+# define VEC11 ymm27
|
||
+# define VEC12 ymm28
|
||
+# define VEC13 ymm29
|
||
+# define VEC14 ymm30
|
||
+# define VEC15 ymm31
|
||
+# define VEC(i) VEC##i
|
||
+# define VMOVNT vmovntdq
|
||
+# define VMOVU vmovdqu64
|
||
+# define VMOVA vmovdqa64
|
||
+# define VZEROUPPER
|
||
+
|
||
+# define SECTION(p) p##.evex
|
||
+# define MEMMOVE_SYMBOL(p,s) p##_evex_##s
|
||
+
|
||
+# include "memmove-vec-unaligned-erms.S"
|
||
+#endif
|
||
diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
|
||
index bd5dc1a3f3..f71c343ecb 100644
|
||
--- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
|
||
+++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
|
||
@@ -48,6 +48,14 @@
|
||
# define MEMMOVE_CHK_SYMBOL(p,s) MEMMOVE_SYMBOL(p, s)
|
||
#endif
|
||
|
||
+#ifndef XMM0
|
||
+# define XMM0 xmm0
|
||
+#endif
|
||
+
|
||
+#ifndef YMM0
|
||
+# define YMM0 ymm0
|
||
+#endif
|
||
+
|
||
#ifndef VZEROUPPER
|
||
# if VEC_SIZE > 16
|
||
# define VZEROUPPER vzeroupper
|
||
@@ -56,6 +64,13 @@
|
||
# endif
|
||
#endif
|
||
|
||
+/* Avoid short distance rep movsb only with non-SSE vector. */
|
||
+#ifndef AVOID_SHORT_DISTANCE_REP_MOVSB
|
||
+# define AVOID_SHORT_DISTANCE_REP_MOVSB (VEC_SIZE > 16)
|
||
+#else
|
||
+# define AVOID_SHORT_DISTANCE_REP_MOVSB 0
|
||
+#endif
|
||
+
|
||
#ifndef PREFETCH
|
||
# define PREFETCH(addr) prefetcht0 addr
|
||
#endif
|
||
@@ -132,11 +147,12 @@ L(last_2x_vec):
|
||
VMOVU -VEC_SIZE(%rsi,%rdx), %VEC(1)
|
||
VMOVU %VEC(0), (%rdi)
|
||
VMOVU %VEC(1), -VEC_SIZE(%rdi,%rdx)
|
||
- VZEROUPPER
|
||
#if !defined USE_MULTIARCH || !IS_IN (libc)
|
||
L(nop):
|
||
-#endif
|
||
ret
|
||
+#else
|
||
+ VZEROUPPER_RETURN
|
||
+#endif
|
||
#if defined USE_MULTIARCH && IS_IN (libc)
|
||
END (MEMMOVE_SYMBOL (__memmove, unaligned))
|
||
|
||
@@ -229,8 +245,11 @@ L(last_2x_vec):
|
||
VMOVU %VEC(0), (%rdi)
|
||
VMOVU %VEC(1), -VEC_SIZE(%rdi,%rdx)
|
||
L(return):
|
||
- VZEROUPPER
|
||
+#if VEC_SIZE > 16
|
||
+ ZERO_UPPER_VEC_REGISTERS_RETURN
|
||
+#else
|
||
ret
|
||
+#endif
|
||
|
||
L(movsb):
|
||
cmp __x86_shared_non_temporal_threshold(%rip), %RDX_LP
|
||
@@ -243,7 +262,21 @@ L(movsb):
|
||
cmpq %r9, %rdi
|
||
/* Avoid slow backward REP MOVSB. */
|
||
jb L(more_8x_vec_backward)
|
||
+# if AVOID_SHORT_DISTANCE_REP_MOVSB
|
||
+ movq %rdi, %rcx
|
||
+ subq %rsi, %rcx
|
||
+ jmp 2f
|
||
+# endif
|
||
1:
|
||
+# if AVOID_SHORT_DISTANCE_REP_MOVSB
|
||
+ movq %rsi, %rcx
|
||
+ subq %rdi, %rcx
|
||
+2:
|
||
+/* Avoid "rep movsb" if RCX, the distance between source and destination,
|
||
+ is N*4GB + [1..63] with N >= 0. */
|
||
+ cmpl $63, %ecx
|
||
+ jbe L(more_2x_vec) /* Avoid "rep movsb" if ECX <= 63. */
|
||
+# endif
|
||
mov %RDX_LP, %RCX_LP
|
||
rep movsb
|
||
L(nop):
|
||
@@ -277,21 +310,20 @@ L(less_vec):
|
||
#if VEC_SIZE > 32
|
||
L(between_32_63):
|
||
/* From 32 to 63. No branch when size == 32. */
|
||
- vmovdqu (%rsi), %ymm0
|
||
- vmovdqu -32(%rsi,%rdx), %ymm1
|
||
- vmovdqu %ymm0, (%rdi)
|
||
- vmovdqu %ymm1, -32(%rdi,%rdx)
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VMOVU (%rsi), %YMM0
|
||
+ VMOVU -32(%rsi,%rdx), %YMM1
|
||
+ VMOVU %YMM0, (%rdi)
|
||
+ VMOVU %YMM1, -32(%rdi,%rdx)
|
||
+ VZEROUPPER_RETURN
|
||
#endif
|
||
#if VEC_SIZE > 16
|
||
/* From 16 to 31. No branch when size == 16. */
|
||
L(between_16_31):
|
||
- vmovdqu (%rsi), %xmm0
|
||
- vmovdqu -16(%rsi,%rdx), %xmm1
|
||
- vmovdqu %xmm0, (%rdi)
|
||
- vmovdqu %xmm1, -16(%rdi,%rdx)
|
||
- ret
|
||
+ VMOVU (%rsi), %XMM0
|
||
+ VMOVU -16(%rsi,%rdx), %XMM1
|
||
+ VMOVU %XMM0, (%rdi)
|
||
+ VMOVU %XMM1, -16(%rdi,%rdx)
|
||
+ VZEROUPPER_RETURN
|
||
#endif
|
||
L(between_8_15):
|
||
/* From 8 to 15. No branch when size == 8. */
|
||
@@ -344,8 +376,7 @@ L(more_2x_vec):
|
||
VMOVU %VEC(5), -(VEC_SIZE * 2)(%rdi,%rdx)
|
||
VMOVU %VEC(6), -(VEC_SIZE * 3)(%rdi,%rdx)
|
||
VMOVU %VEC(7), -(VEC_SIZE * 4)(%rdi,%rdx)
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
L(last_4x_vec):
|
||
/* Copy from 2 * VEC to 4 * VEC. */
|
||
VMOVU (%rsi), %VEC(0)
|
||
@@ -356,8 +387,7 @@ L(last_4x_vec):
|
||
VMOVU %VEC(1), VEC_SIZE(%rdi)
|
||
VMOVU %VEC(2), -VEC_SIZE(%rdi,%rdx)
|
||
VMOVU %VEC(3), -(VEC_SIZE * 2)(%rdi,%rdx)
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
L(more_8x_vec):
|
||
cmpq %rsi, %rdi
|
||
@@ -413,8 +443,7 @@ L(loop_4x_vec_forward):
|
||
VMOVU %VEC(8), -(VEC_SIZE * 3)(%rcx)
|
||
/* Store the first VEC. */
|
||
VMOVU %VEC(4), (%r11)
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
L(more_8x_vec_backward):
|
||
/* Load the first 4 * VEC and last VEC to support overlapping
|
||
@@ -465,8 +494,7 @@ L(loop_4x_vec_backward):
|
||
VMOVU %VEC(7), (VEC_SIZE * 3)(%rdi)
|
||
/* Store the last VEC. */
|
||
VMOVU %VEC(8), (%r11)
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
#if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc)
|
||
L(large_forward):
|
||
@@ -501,8 +529,7 @@ L(loop_large_forward):
|
||
VMOVU %VEC(8), -(VEC_SIZE * 3)(%rcx)
|
||
/* Store the first VEC. */
|
||
VMOVU %VEC(4), (%r11)
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
L(large_backward):
|
||
/* Don't use non-temporal store if there is overlap between
|
||
@@ -536,8 +563,7 @@ L(loop_large_backward):
|
||
VMOVU %VEC(7), (VEC_SIZE * 3)(%rdi)
|
||
/* Store the last VEC. */
|
||
VMOVU %VEC(8), (%r11)
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
#endif
|
||
END (MEMMOVE_SYMBOL (__memmove, unaligned_erms))
|
||
|
||
diff --git a/sysdeps/x86_64/multiarch/memrchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/memrchr-avx2-rtm.S
|
||
new file mode 100644
|
||
index 0000000000..cea2d2a72d
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/memrchr-avx2-rtm.S
|
||
@@ -0,0 +1,12 @@
|
||
+#ifndef MEMRCHR
|
||
+# define MEMRCHR __memrchr_avx2_rtm
|
||
+#endif
|
||
+
|
||
+#define ZERO_UPPER_VEC_REGISTERS_RETURN \
|
||
+ ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
|
||
+
|
||
+#define VZEROUPPER_RETURN jmp L(return_vzeroupper)
|
||
+
|
||
+#define SECTION(p) p##.avx.rtm
|
||
+
|
||
+#include "memrchr-avx2.S"
|
||
diff --git a/sysdeps/x86_64/multiarch/memrchr-avx2.S b/sysdeps/x86_64/multiarch/memrchr-avx2.S
|
||
index f5437b54de..c8d54c08d6 100644
|
||
--- a/sysdeps/x86_64/multiarch/memrchr-avx2.S
|
||
+++ b/sysdeps/x86_64/multiarch/memrchr-avx2.S
|
||
@@ -20,14 +20,22 @@
|
||
|
||
# include <sysdep.h>
|
||
|
||
+# ifndef MEMRCHR
|
||
+# define MEMRCHR __memrchr_avx2
|
||
+# endif
|
||
+
|
||
# ifndef VZEROUPPER
|
||
# define VZEROUPPER vzeroupper
|
||
# endif
|
||
|
||
+# ifndef SECTION
|
||
+# define SECTION(p) p##.avx
|
||
+# endif
|
||
+
|
||
# define VEC_SIZE 32
|
||
|
||
- .section .text.avx,"ax",@progbits
|
||
-ENTRY (__memrchr_avx2)
|
||
+ .section SECTION(.text),"ax",@progbits
|
||
+ENTRY (MEMRCHR)
|
||
/* Broadcast CHAR to YMM0. */
|
||
vmovd %esi, %xmm0
|
||
vpbroadcastb %xmm0, %ymm0
|
||
@@ -134,8 +142,8 @@ L(loop_4x_vec):
|
||
vpmovmskb %ymm1, %eax
|
||
bsrl %eax, %eax
|
||
addq %rdi, %rax
|
||
- VZEROUPPER
|
||
- ret
|
||
+L(return_vzeroupper):
|
||
+ ZERO_UPPER_VEC_REGISTERS_RETURN
|
||
|
||
.p2align 4
|
||
L(last_4x_vec_or_less):
|
||
@@ -169,8 +177,7 @@ L(last_4x_vec_or_less):
|
||
addq %rax, %rdx
|
||
jl L(zero)
|
||
addq %rdi, %rax
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
L(last_2x_vec):
|
||
@@ -191,31 +198,27 @@ L(last_2x_vec):
|
||
jl L(zero)
|
||
addl $(VEC_SIZE * 2), %eax
|
||
addq %rdi, %rax
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
L(last_vec_x0):
|
||
bsrl %eax, %eax
|
||
addq %rdi, %rax
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
L(last_vec_x1):
|
||
bsrl %eax, %eax
|
||
addl $VEC_SIZE, %eax
|
||
addq %rdi, %rax
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
L(last_vec_x2):
|
||
bsrl %eax, %eax
|
||
addl $(VEC_SIZE * 2), %eax
|
||
addq %rdi, %rax
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
L(last_vec_x3):
|
||
@@ -232,8 +235,7 @@ L(last_vec_x1_check):
|
||
jl L(zero)
|
||
addl $VEC_SIZE, %eax
|
||
addq %rdi, %rax
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
L(last_vec_x3_check):
|
||
@@ -243,12 +245,14 @@ L(last_vec_x3_check):
|
||
jl L(zero)
|
||
addl $(VEC_SIZE * 3), %eax
|
||
addq %rdi, %rax
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
L(zero):
|
||
- VZEROUPPER
|
||
+ xorl %eax, %eax
|
||
+ VZEROUPPER_RETURN
|
||
+
|
||
+ .p2align 4
|
||
L(null):
|
||
xorl %eax, %eax
|
||
ret
|
||
@@ -273,8 +277,7 @@ L(last_vec_or_less_aligned):
|
||
|
||
bsrl %eax, %eax
|
||
addq %rdi, %rax
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
L(last_vec_or_less):
|
||
@@ -315,8 +318,7 @@ L(last_vec_or_less):
|
||
bsrl %eax, %eax
|
||
addq %rdi, %rax
|
||
addq %r8, %rax
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
L(last_vec_2x_aligned):
|
||
@@ -353,7 +355,6 @@ L(last_vec_2x_aligned):
|
||
bsrl %eax, %eax
|
||
addq %rdi, %rax
|
||
addq %r8, %rax
|
||
- VZEROUPPER
|
||
- ret
|
||
-END (__memrchr_avx2)
|
||
+ VZEROUPPER_RETURN
|
||
+END (MEMRCHR)
|
||
#endif
|
||
diff --git a/sysdeps/x86_64/multiarch/memrchr-evex.S b/sysdeps/x86_64/multiarch/memrchr-evex.S
|
||
new file mode 100644
|
||
index 0000000000..16bf8e02b1
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/memrchr-evex.S
|
||
@@ -0,0 +1,337 @@
|
||
+/* memrchr optimized with 256-bit EVEX instructions.
|
||
+ Copyright (C) 2021 Free Software Foundation, Inc.
|
||
+ This file is part of the GNU C Library.
|
||
+
|
||
+ The GNU C Library is free software; you can redistribute it and/or
|
||
+ modify it under the terms of the GNU Lesser General Public
|
||
+ License as published by the Free Software Foundation; either
|
||
+ version 2.1 of the License, or (at your option) any later version.
|
||
+
|
||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
+ Lesser General Public License for more details.
|
||
+
|
||
+ You should have received a copy of the GNU Lesser General Public
|
||
+ License along with the GNU C Library; if not, see
|
||
+ <https://www.gnu.org/licenses/>. */
|
||
+
|
||
+#if IS_IN (libc)
|
||
+
|
||
+# include <sysdep.h>
|
||
+
|
||
+# define VMOVA vmovdqa64
|
||
+
|
||
+# define YMMMATCH ymm16
|
||
+
|
||
+# define VEC_SIZE 32
|
||
+
|
||
+ .section .text.evex,"ax",@progbits
|
||
+ENTRY (__memrchr_evex)
|
||
+ /* Broadcast CHAR to YMMMATCH. */
|
||
+ vpbroadcastb %esi, %YMMMATCH
|
||
+
|
||
+ sub $VEC_SIZE, %RDX_LP
|
||
+ jbe L(last_vec_or_less)
|
||
+
|
||
+ add %RDX_LP, %RDI_LP
|
||
+
|
||
+ /* Check the last VEC_SIZE bytes. */
|
||
+ vpcmpb $0, (%rdi), %YMMMATCH, %k1
|
||
+ kmovd %k1, %eax
|
||
+ testl %eax, %eax
|
||
+ jnz L(last_vec_x0)
|
||
+
|
||
+ subq $(VEC_SIZE * 4), %rdi
|
||
+ movl %edi, %ecx
|
||
+ andl $(VEC_SIZE - 1), %ecx
|
||
+ jz L(aligned_more)
|
||
+
|
||
+ /* Align data for aligned loads in the loop. */
|
||
+ addq $VEC_SIZE, %rdi
|
||
+ addq $VEC_SIZE, %rdx
|
||
+ andq $-VEC_SIZE, %rdi
|
||
+ subq %rcx, %rdx
|
||
+
|
||
+ .p2align 4
|
||
+L(aligned_more):
|
||
+ subq $(VEC_SIZE * 4), %rdx
|
||
+ jbe L(last_4x_vec_or_less)
|
||
+
|
||
+ /* Check the last 4 * VEC_SIZE. Only one VEC_SIZE at a time
|
||
+ since data is only aligned to VEC_SIZE. */
|
||
+ vpcmpb $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k1
|
||
+ kmovd %k1, %eax
|
||
+ testl %eax, %eax
|
||
+ jnz L(last_vec_x3)
|
||
+
|
||
+ vpcmpb $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k2
|
||
+ kmovd %k2, %eax
|
||
+ testl %eax, %eax
|
||
+ jnz L(last_vec_x2)
|
||
+
|
||
+ vpcmpb $0, VEC_SIZE(%rdi), %YMMMATCH, %k3
|
||
+ kmovd %k3, %eax
|
||
+ testl %eax, %eax
|
||
+ jnz L(last_vec_x1)
|
||
+
|
||
+ vpcmpb $0, (%rdi), %YMMMATCH, %k4
|
||
+ kmovd %k4, %eax
|
||
+ testl %eax, %eax
|
||
+ jnz L(last_vec_x0)
|
||
+
|
||
+ /* Align data to 4 * VEC_SIZE for loop with fewer branches.
|
||
+ There are some overlaps with above if data isn't aligned
|
||
+ to 4 * VEC_SIZE. */
|
||
+ movl %edi, %ecx
|
||
+ andl $(VEC_SIZE * 4 - 1), %ecx
|
||
+ jz L(loop_4x_vec)
|
||
+
|
||
+ addq $(VEC_SIZE * 4), %rdi
|
||
+ addq $(VEC_SIZE * 4), %rdx
|
||
+ andq $-(VEC_SIZE * 4), %rdi
|
||
+ subq %rcx, %rdx
|
||
+
|
||
+ .p2align 4
|
||
+L(loop_4x_vec):
|
||
+ /* Compare 4 * VEC at a time forward. */
|
||
+ subq $(VEC_SIZE * 4), %rdi
|
||
+ subq $(VEC_SIZE * 4), %rdx
|
||
+ jbe L(last_4x_vec_or_less)
|
||
+
|
||
+ vpcmpb $0, (%rdi), %YMMMATCH, %k1
|
||
+ vpcmpb $0, VEC_SIZE(%rdi), %YMMMATCH, %k2
|
||
+ kord %k1, %k2, %k5
|
||
+ vpcmpb $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k3
|
||
+ vpcmpb $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k4
|
||
+
|
||
+ kord %k3, %k4, %k6
|
||
+ kortestd %k5, %k6
|
||
+ jz L(loop_4x_vec)
|
||
+
|
||
+ /* There is a match. */
|
||
+ kmovd %k4, %eax
|
||
+ testl %eax, %eax
|
||
+ jnz L(last_vec_x3)
|
||
+
|
||
+ kmovd %k3, %eax
|
||
+ testl %eax, %eax
|
||
+ jnz L(last_vec_x2)
|
||
+
|
||
+ kmovd %k2, %eax
|
||
+ testl %eax, %eax
|
||
+ jnz L(last_vec_x1)
|
||
+
|
||
+ kmovd %k1, %eax
|
||
+ bsrl %eax, %eax
|
||
+ addq %rdi, %rax
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(last_4x_vec_or_less):
|
||
+ addl $(VEC_SIZE * 4), %edx
|
||
+ cmpl $(VEC_SIZE * 2), %edx
|
||
+ jbe L(last_2x_vec)
|
||
+
|
||
+ vpcmpb $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k1
|
||
+ kmovd %k1, %eax
|
||
+ testl %eax, %eax
|
||
+ jnz L(last_vec_x3)
|
||
+
|
||
+ vpcmpb $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k2
|
||
+ kmovd %k2, %eax
|
||
+ testl %eax, %eax
|
||
+ jnz L(last_vec_x2)
|
||
+
|
||
+ vpcmpb $0, VEC_SIZE(%rdi), %YMMMATCH, %k3
|
||
+ kmovd %k3, %eax
|
||
+ testl %eax, %eax
|
||
+ jnz L(last_vec_x1_check)
|
||
+ cmpl $(VEC_SIZE * 3), %edx
|
||
+ jbe L(zero)
|
||
+
|
||
+ vpcmpb $0, (%rdi), %YMMMATCH, %k4
|
||
+ kmovd %k4, %eax
|
||
+ testl %eax, %eax
|
||
+ jz L(zero)
|
||
+ bsrl %eax, %eax
|
||
+ subq $(VEC_SIZE * 4), %rdx
|
||
+ addq %rax, %rdx
|
||
+ jl L(zero)
|
||
+ addq %rdi, %rax
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(last_2x_vec):
|
||
+ vpcmpb $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k1
|
||
+ kmovd %k1, %eax
|
||
+ testl %eax, %eax
|
||
+ jnz L(last_vec_x3_check)
|
||
+ cmpl $VEC_SIZE, %edx
|
||
+ jbe L(zero)
|
||
+
|
||
+ vpcmpb $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k1
|
||
+ kmovd %k1, %eax
|
||
+ testl %eax, %eax
|
||
+ jz L(zero)
|
||
+ bsrl %eax, %eax
|
||
+ subq $(VEC_SIZE * 2), %rdx
|
||
+ addq %rax, %rdx
|
||
+ jl L(zero)
|
||
+ addl $(VEC_SIZE * 2), %eax
|
||
+ addq %rdi, %rax
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(last_vec_x0):
|
||
+ bsrl %eax, %eax
|
||
+ addq %rdi, %rax
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(last_vec_x1):
|
||
+ bsrl %eax, %eax
|
||
+ addl $VEC_SIZE, %eax
|
||
+ addq %rdi, %rax
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(last_vec_x2):
|
||
+ bsrl %eax, %eax
|
||
+ addl $(VEC_SIZE * 2), %eax
|
||
+ addq %rdi, %rax
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(last_vec_x3):
|
||
+ bsrl %eax, %eax
|
||
+ addl $(VEC_SIZE * 3), %eax
|
||
+ addq %rdi, %rax
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(last_vec_x1_check):
|
||
+ bsrl %eax, %eax
|
||
+ subq $(VEC_SIZE * 3), %rdx
|
||
+ addq %rax, %rdx
|
||
+ jl L(zero)
|
||
+ addl $VEC_SIZE, %eax
|
||
+ addq %rdi, %rax
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(last_vec_x3_check):
|
||
+ bsrl %eax, %eax
|
||
+ subq $VEC_SIZE, %rdx
|
||
+ addq %rax, %rdx
|
||
+ jl L(zero)
|
||
+ addl $(VEC_SIZE * 3), %eax
|
||
+ addq %rdi, %rax
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(zero):
|
||
+ xorl %eax, %eax
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(last_vec_or_less_aligned):
|
||
+ movl %edx, %ecx
|
||
+
|
||
+ vpcmpb $0, (%rdi), %YMMMATCH, %k1
|
||
+
|
||
+ movl $1, %edx
|
||
+ /* Support rdx << 32. */
|
||
+ salq %cl, %rdx
|
||
+ subq $1, %rdx
|
||
+
|
||
+ kmovd %k1, %eax
|
||
+
|
||
+ /* Remove the trailing bytes. */
|
||
+ andl %edx, %eax
|
||
+ testl %eax, %eax
|
||
+ jz L(zero)
|
||
+
|
||
+ bsrl %eax, %eax
|
||
+ addq %rdi, %rax
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(last_vec_or_less):
|
||
+ addl $VEC_SIZE, %edx
|
||
+
|
||
+ /* Check for zero length. */
|
||
+ testl %edx, %edx
|
||
+ jz L(zero)
|
||
+
|
||
+ movl %edi, %ecx
|
||
+ andl $(VEC_SIZE - 1), %ecx
|
||
+ jz L(last_vec_or_less_aligned)
|
||
+
|
||
+ movl %ecx, %esi
|
||
+ movl %ecx, %r8d
|
||
+ addl %edx, %esi
|
||
+ andq $-VEC_SIZE, %rdi
|
||
+
|
||
+ subl $VEC_SIZE, %esi
|
||
+ ja L(last_vec_2x_aligned)
|
||
+
|
||
+ /* Check the last VEC. */
|
||
+ vpcmpb $0, (%rdi), %YMMMATCH, %k1
|
||
+ kmovd %k1, %eax
|
||
+
|
||
+ /* Remove the leading and trailing bytes. */
|
||
+ sarl %cl, %eax
|
||
+ movl %edx, %ecx
|
||
+
|
||
+ movl $1, %edx
|
||
+ sall %cl, %edx
|
||
+ subl $1, %edx
|
||
+
|
||
+ andl %edx, %eax
|
||
+ testl %eax, %eax
|
||
+ jz L(zero)
|
||
+
|
||
+ bsrl %eax, %eax
|
||
+ addq %rdi, %rax
|
||
+ addq %r8, %rax
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(last_vec_2x_aligned):
|
||
+ movl %esi, %ecx
|
||
+
|
||
+ /* Check the last VEC. */
|
||
+ vpcmpb $0, VEC_SIZE(%rdi), %YMMMATCH, %k1
|
||
+
|
||
+ movl $1, %edx
|
||
+ sall %cl, %edx
|
||
+ subl $1, %edx
|
||
+
|
||
+ kmovd %k1, %eax
|
||
+
|
||
+ /* Remove the trailing bytes. */
|
||
+ andl %edx, %eax
|
||
+
|
||
+ testl %eax, %eax
|
||
+ jnz L(last_vec_x1)
|
||
+
|
||
+ /* Check the second last VEC. */
|
||
+ vpcmpb $0, (%rdi), %YMMMATCH, %k1
|
||
+
|
||
+ movl %r8d, %ecx
|
||
+
|
||
+ kmovd %k1, %eax
|
||
+
|
||
+ /* Remove the leading bytes. Must use unsigned right shift for
|
||
+ bsrl below. */
|
||
+ shrl %cl, %eax
|
||
+ testl %eax, %eax
|
||
+ jz L(zero)
|
||
+
|
||
+ bsrl %eax, %eax
|
||
+ addq %rdi, %rax
|
||
+ addq %r8, %rax
|
||
+ ret
|
||
+END (__memrchr_evex)
|
||
+#endif
|
||
diff --git a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms-rtm.S b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms-rtm.S
|
||
new file mode 100644
|
||
index 0000000000..8ac3e479bb
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms-rtm.S
|
||
@@ -0,0 +1,10 @@
|
||
+#define ZERO_UPPER_VEC_REGISTERS_RETURN \
|
||
+ ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
|
||
+
|
||
+#define VZEROUPPER_RETURN jmp L(return)
|
||
+
|
||
+#define SECTION(p) p##.avx.rtm
|
||
+#define MEMSET_SYMBOL(p,s) p##_avx2_##s##_rtm
|
||
+#define WMEMSET_SYMBOL(p,s) p##_avx2_##s##_rtm
|
||
+
|
||
+#include "memset-avx2-unaligned-erms.S"
|
||
diff --git a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S
|
||
index 7ab3d89849..ae0860f36a 100644
|
||
--- a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S
|
||
+++ b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S
|
||
@@ -14,9 +14,15 @@
|
||
movq r, %rax; \
|
||
vpbroadcastd %xmm0, %ymm0
|
||
|
||
-# define SECTION(p) p##.avx
|
||
-# define MEMSET_SYMBOL(p,s) p##_avx2_##s
|
||
-# define WMEMSET_SYMBOL(p,s) p##_avx2_##s
|
||
+# ifndef SECTION
|
||
+# define SECTION(p) p##.avx
|
||
+# endif
|
||
+# ifndef MEMSET_SYMBOL
|
||
+# define MEMSET_SYMBOL(p,s) p##_avx2_##s
|
||
+# endif
|
||
+# ifndef WMEMSET_SYMBOL
|
||
+# define WMEMSET_SYMBOL(p,s) p##_avx2_##s
|
||
+# endif
|
||
|
||
# include "memset-vec-unaligned-erms.S"
|
||
#endif
|
||
diff --git a/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S
|
||
index 0783979ca5..22e7b187c8 100644
|
||
--- a/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S
|
||
+++ b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S
|
||
@@ -1,22 +1,22 @@
|
||
#if IS_IN (libc)
|
||
# define VEC_SIZE 64
|
||
-# define VEC(i) zmm##i
|
||
+# define XMM0 xmm16
|
||
+# define YMM0 ymm16
|
||
+# define VEC0 zmm16
|
||
+# define VEC(i) VEC##i
|
||
# define VMOVU vmovdqu64
|
||
# define VMOVA vmovdqa64
|
||
+# define VZEROUPPER
|
||
|
||
# define MEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
|
||
- vmovd d, %xmm0; \
|
||
movq r, %rax; \
|
||
- vpbroadcastb %xmm0, %xmm0; \
|
||
- vpbroadcastq %xmm0, %zmm0
|
||
+ vpbroadcastb d, %VEC0
|
||
|
||
# define WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
|
||
- vmovd d, %xmm0; \
|
||
movq r, %rax; \
|
||
- vpbroadcastd %xmm0, %xmm0; \
|
||
- vpbroadcastq %xmm0, %zmm0
|
||
+ vpbroadcastd d, %VEC0
|
||
|
||
-# define SECTION(p) p##.avx512
|
||
+# define SECTION(p) p##.evex512
|
||
# define MEMSET_SYMBOL(p,s) p##_avx512_##s
|
||
# define WMEMSET_SYMBOL(p,s) p##_avx512_##s
|
||
|
||
diff --git a/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
|
||
new file mode 100644
|
||
index 0000000000..ae0a4d6e46
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
|
||
@@ -0,0 +1,24 @@
|
||
+#if IS_IN (libc)
|
||
+# define VEC_SIZE 32
|
||
+# define XMM0 xmm16
|
||
+# define YMM0 ymm16
|
||
+# define VEC0 ymm16
|
||
+# define VEC(i) VEC##i
|
||
+# define VMOVU vmovdqu64
|
||
+# define VMOVA vmovdqa64
|
||
+# define VZEROUPPER
|
||
+
|
||
+# define MEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
|
||
+ movq r, %rax; \
|
||
+ vpbroadcastb d, %VEC0
|
||
+
|
||
+# define WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
|
||
+ movq r, %rax; \
|
||
+ vpbroadcastd d, %VEC0
|
||
+
|
||
+# define SECTION(p) p##.evex
|
||
+# define MEMSET_SYMBOL(p,s) p##_evex_##s
|
||
+# define WMEMSET_SYMBOL(p,s) p##_evex_##s
|
||
+
|
||
+# include "memset-vec-unaligned-erms.S"
|
||
+#endif
|
||
diff --git a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
|
||
index 2bfc95de05..de5a8a38f5 100644
|
||
--- a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
|
||
+++ b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
|
||
@@ -34,20 +34,25 @@
|
||
# define WMEMSET_CHK_SYMBOL(p,s) WMEMSET_SYMBOL(p, s)
|
||
#endif
|
||
|
||
+#ifndef XMM0
|
||
+# define XMM0 xmm0
|
||
+#endif
|
||
+
|
||
+#ifndef YMM0
|
||
+# define YMM0 ymm0
|
||
+#endif
|
||
+
|
||
#ifndef VZEROUPPER
|
||
# if VEC_SIZE > 16
|
||
# define VZEROUPPER vzeroupper
|
||
+# define VZEROUPPER_SHORT_RETURN vzeroupper; ret
|
||
# else
|
||
# define VZEROUPPER
|
||
# endif
|
||
#endif
|
||
|
||
#ifndef VZEROUPPER_SHORT_RETURN
|
||
-# if VEC_SIZE > 16
|
||
-# define VZEROUPPER_SHORT_RETURN vzeroupper
|
||
-# else
|
||
-# define VZEROUPPER_SHORT_RETURN rep
|
||
-# endif
|
||
+# define VZEROUPPER_SHORT_RETURN rep; ret
|
||
#endif
|
||
|
||
#ifndef MOVQ
|
||
@@ -67,7 +72,7 @@
|
||
ENTRY (__bzero)
|
||
mov %RDI_LP, %RAX_LP /* Set return value. */
|
||
mov %RSI_LP, %RDX_LP /* Set n. */
|
||
- pxor %xmm0, %xmm0
|
||
+ pxor %XMM0, %XMM0
|
||
jmp L(entry_from_bzero)
|
||
END (__bzero)
|
||
weak_alias (__bzero, bzero)
|
||
@@ -109,8 +114,7 @@ L(entry_from_bzero):
|
||
/* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. */
|
||
VMOVU %VEC(0), -VEC_SIZE(%rdi,%rdx)
|
||
VMOVU %VEC(0), (%rdi)
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
#if defined USE_MULTIARCH && IS_IN (libc)
|
||
END (MEMSET_SYMBOL (__memset, unaligned))
|
||
|
||
@@ -133,14 +137,12 @@ ENTRY (__memset_erms)
|
||
ENTRY (MEMSET_SYMBOL (__memset, erms))
|
||
# endif
|
||
L(stosb):
|
||
- /* Issue vzeroupper before rep stosb. */
|
||
- VZEROUPPER
|
||
mov %RDX_LP, %RCX_LP
|
||
movzbl %sil, %eax
|
||
mov %RDI_LP, %RDX_LP
|
||
rep stosb
|
||
mov %RDX_LP, %RAX_LP
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
# if VEC_SIZE == 16
|
||
END (__memset_erms)
|
||
# else
|
||
@@ -167,8 +169,7 @@ ENTRY (MEMSET_SYMBOL (__memset, unaligned_erms))
|
||
/* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. */
|
||
VMOVU %VEC(0), -VEC_SIZE(%rdi,%rdx)
|
||
VMOVU %VEC(0), (%rdi)
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
L(stosb_more_2x_vec):
|
||
cmp __x86_rep_stosb_threshold(%rip), %RDX_LP
|
||
@@ -182,8 +183,11 @@ L(more_2x_vec):
|
||
VMOVU %VEC(0), -VEC_SIZE(%rdi,%rdx)
|
||
VMOVU %VEC(0), -(VEC_SIZE * 2)(%rdi,%rdx)
|
||
L(return):
|
||
- VZEROUPPER
|
||
+#if VEC_SIZE > 16
|
||
+ ZERO_UPPER_VEC_REGISTERS_RETURN
|
||
+#else
|
||
ret
|
||
+#endif
|
||
|
||
L(loop_start):
|
||
leaq (VEC_SIZE * 4)(%rdi), %rcx
|
||
@@ -209,7 +213,6 @@ L(loop):
|
||
cmpq %rcx, %rdx
|
||
jne L(loop)
|
||
VZEROUPPER_SHORT_RETURN
|
||
- ret
|
||
L(less_vec):
|
||
/* Less than 1 VEC. */
|
||
# if VEC_SIZE != 16 && VEC_SIZE != 32 && VEC_SIZE != 64
|
||
@@ -223,7 +226,7 @@ L(less_vec):
|
||
cmpb $16, %dl
|
||
jae L(between_16_31)
|
||
# endif
|
||
- MOVQ %xmm0, %rcx
|
||
+ MOVQ %XMM0, %rcx
|
||
cmpb $8, %dl
|
||
jae L(between_8_15)
|
||
cmpb $4, %dl
|
||
@@ -233,40 +236,34 @@ L(less_vec):
|
||
jb 1f
|
||
movb %cl, (%rdi)
|
||
1:
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
# if VEC_SIZE > 32
|
||
/* From 32 to 63. No branch when size == 32. */
|
||
L(between_32_63):
|
||
- vmovdqu %ymm0, -32(%rdi,%rdx)
|
||
- vmovdqu %ymm0, (%rdi)
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VMOVU %YMM0, -32(%rdi,%rdx)
|
||
+ VMOVU %YMM0, (%rdi)
|
||
+ VZEROUPPER_RETURN
|
||
# endif
|
||
# if VEC_SIZE > 16
|
||
/* From 16 to 31. No branch when size == 16. */
|
||
L(between_16_31):
|
||
- vmovdqu %xmm0, -16(%rdi,%rdx)
|
||
- vmovdqu %xmm0, (%rdi)
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VMOVU %XMM0, -16(%rdi,%rdx)
|
||
+ VMOVU %XMM0, (%rdi)
|
||
+ VZEROUPPER_RETURN
|
||
# endif
|
||
/* From 8 to 15. No branch when size == 8. */
|
||
L(between_8_15):
|
||
movq %rcx, -8(%rdi,%rdx)
|
||
movq %rcx, (%rdi)
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
L(between_4_7):
|
||
/* From 4 to 7. No branch when size == 4. */
|
||
movl %ecx, -4(%rdi,%rdx)
|
||
movl %ecx, (%rdi)
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
L(between_2_3):
|
||
/* From 2 to 3. No branch when size == 2. */
|
||
movw %cx, -2(%rdi,%rdx)
|
||
movw %cx, (%rdi)
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
END (MEMSET_SYMBOL (__memset, unaligned_erms))
|
||
diff --git a/sysdeps/x86_64/multiarch/rawmemchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/rawmemchr-avx2-rtm.S
|
||
new file mode 100644
|
||
index 0000000000..acc5f6e2fb
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/rawmemchr-avx2-rtm.S
|
||
@@ -0,0 +1,4 @@
|
||
+#define MEMCHR __rawmemchr_avx2_rtm
|
||
+#define USE_AS_RAWMEMCHR 1
|
||
+
|
||
+#include "memchr-avx2-rtm.S"
|
||
diff --git a/sysdeps/x86_64/multiarch/rawmemchr-evex.S b/sysdeps/x86_64/multiarch/rawmemchr-evex.S
|
||
new file mode 100644
|
||
index 0000000000..ec942b77ba
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/rawmemchr-evex.S
|
||
@@ -0,0 +1,4 @@
|
||
+#define MEMCHR __rawmemchr_evex
|
||
+#define USE_AS_RAWMEMCHR 1
|
||
+
|
||
+#include "memchr-evex.S"
|
||
diff --git a/sysdeps/x86_64/multiarch/stpcpy-avx2-rtm.S b/sysdeps/x86_64/multiarch/stpcpy-avx2-rtm.S
|
||
new file mode 100644
|
||
index 0000000000..2b9c07a59f
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/stpcpy-avx2-rtm.S
|
||
@@ -0,0 +1,3 @@
|
||
+#define USE_AS_STPCPY
|
||
+#define STRCPY __stpcpy_avx2_rtm
|
||
+#include "strcpy-avx2-rtm.S"
|
||
diff --git a/sysdeps/x86_64/multiarch/stpcpy-evex.S b/sysdeps/x86_64/multiarch/stpcpy-evex.S
|
||
new file mode 100644
|
||
index 0000000000..7c6f26cd98
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/stpcpy-evex.S
|
||
@@ -0,0 +1,3 @@
|
||
+#define USE_AS_STPCPY
|
||
+#define STRCPY __stpcpy_evex
|
||
+#include "strcpy-evex.S"
|
||
diff --git a/sysdeps/x86_64/multiarch/stpncpy-avx2-rtm.S b/sysdeps/x86_64/multiarch/stpncpy-avx2-rtm.S
|
||
new file mode 100644
|
||
index 0000000000..60a2ccfe53
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/stpncpy-avx2-rtm.S
|
||
@@ -0,0 +1,4 @@
|
||
+#define USE_AS_STPCPY
|
||
+#define USE_AS_STRNCPY
|
||
+#define STRCPY __stpncpy_avx2_rtm
|
||
+#include "strcpy-avx2-rtm.S"
|
||
diff --git a/sysdeps/x86_64/multiarch/stpncpy-evex.S b/sysdeps/x86_64/multiarch/stpncpy-evex.S
|
||
new file mode 100644
|
||
index 0000000000..1570014d1c
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/stpncpy-evex.S
|
||
@@ -0,0 +1,4 @@
|
||
+#define USE_AS_STPCPY
|
||
+#define USE_AS_STRNCPY
|
||
+#define STRCPY __stpncpy_evex
|
||
+#include "strcpy-evex.S"
|
||
diff --git a/sysdeps/x86_64/multiarch/strcat-avx2-rtm.S b/sysdeps/x86_64/multiarch/strcat-avx2-rtm.S
|
||
new file mode 100644
|
||
index 0000000000..637fb557c4
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/strcat-avx2-rtm.S
|
||
@@ -0,0 +1,12 @@
|
||
+#ifndef STRCAT
|
||
+# define STRCAT __strcat_avx2_rtm
|
||
+#endif
|
||
+
|
||
+#define ZERO_UPPER_VEC_REGISTERS_RETURN \
|
||
+ ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
|
||
+
|
||
+#define VZEROUPPER_RETURN jmp L(return_vzeroupper)
|
||
+
|
||
+#define SECTION(p) p##.avx.rtm
|
||
+
|
||
+#include "strcat-avx2.S"
|
||
diff --git a/sysdeps/x86_64/multiarch/strcat-avx2.S b/sysdeps/x86_64/multiarch/strcat-avx2.S
|
||
index a4143bf8f5..1e6d4827ee 100644
|
||
--- a/sysdeps/x86_64/multiarch/strcat-avx2.S
|
||
+++ b/sysdeps/x86_64/multiarch/strcat-avx2.S
|
||
@@ -30,7 +30,11 @@
|
||
/* Number of bytes in a vector register */
|
||
# define VEC_SIZE 32
|
||
|
||
- .section .text.avx,"ax",@progbits
|
||
+# ifndef SECTION
|
||
+# define SECTION(p) p##.avx
|
||
+# endif
|
||
+
|
||
+ .section SECTION(.text),"ax",@progbits
|
||
ENTRY (STRCAT)
|
||
mov %rdi, %r9
|
||
# ifdef USE_AS_STRNCAT
|
||
diff --git a/sysdeps/x86_64/multiarch/strcat-evex.S b/sysdeps/x86_64/multiarch/strcat-evex.S
|
||
new file mode 100644
|
||
index 0000000000..97c3d85b6d
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/strcat-evex.S
|
||
@@ -0,0 +1,283 @@
|
||
+/* strcat with 256-bit EVEX instructions.
|
||
+ Copyright (C) 2021 Free Software Foundation, Inc.
|
||
+ This file is part of the GNU C Library.
|
||
+
|
||
+ The GNU C Library is free software; you can redistribute it and/or
|
||
+ modify it under the terms of the GNU Lesser General Public
|
||
+ License as published by the Free Software Foundation; either
|
||
+ version 2.1 of the License, or (at your option) any later version.
|
||
+
|
||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
+ Lesser General Public License for more details.
|
||
+
|
||
+ You should have received a copy of the GNU Lesser General Public
|
||
+ License along with the GNU C Library; if not, see
|
||
+ <https://www.gnu.org/licenses/>. */
|
||
+
|
||
+#if IS_IN (libc)
|
||
+
|
||
+# include <sysdep.h>
|
||
+
|
||
+# ifndef STRCAT
|
||
+# define STRCAT __strcat_evex
|
||
+# endif
|
||
+
|
||
+# define VMOVU vmovdqu64
|
||
+# define VMOVA vmovdqa64
|
||
+
|
||
+/* zero register */
|
||
+# define XMMZERO xmm16
|
||
+# define YMMZERO ymm16
|
||
+# define YMM0 ymm17
|
||
+# define YMM1 ymm18
|
||
+
|
||
+# define USE_AS_STRCAT
|
||
+
|
||
+/* Number of bytes in a vector register */
|
||
+# define VEC_SIZE 32
|
||
+
|
||
+ .section .text.evex,"ax",@progbits
|
||
+ENTRY (STRCAT)
|
||
+ mov %rdi, %r9
|
||
+# ifdef USE_AS_STRNCAT
|
||
+ mov %rdx, %r8
|
||
+# endif
|
||
+
|
||
+ xor %eax, %eax
|
||
+ mov %edi, %ecx
|
||
+ and $((VEC_SIZE * 4) - 1), %ecx
|
||
+ vpxorq %XMMZERO, %XMMZERO, %XMMZERO
|
||
+ cmp $(VEC_SIZE * 3), %ecx
|
||
+ ja L(fourth_vector_boundary)
|
||
+ vpcmpb $0, (%rdi), %YMMZERO, %k0
|
||
+ kmovd %k0, %edx
|
||
+ test %edx, %edx
|
||
+ jnz L(exit_null_on_first_vector)
|
||
+ mov %rdi, %rax
|
||
+ and $-VEC_SIZE, %rax
|
||
+ jmp L(align_vec_size_start)
|
||
+L(fourth_vector_boundary):
|
||
+ mov %rdi, %rax
|
||
+ and $-VEC_SIZE, %rax
|
||
+ vpcmpb $0, (%rax), %YMMZERO, %k0
|
||
+ mov $-1, %r10d
|
||
+ sub %rax, %rcx
|
||
+ shl %cl, %r10d
|
||
+ kmovd %k0, %edx
|
||
+ and %r10d, %edx
|
||
+ jnz L(exit)
|
||
+
|
||
+L(align_vec_size_start):
|
||
+ vpcmpb $0, VEC_SIZE(%rax), %YMMZERO, %k0
|
||
+ kmovd %k0, %edx
|
||
+ test %edx, %edx
|
||
+ jnz L(exit_null_on_second_vector)
|
||
+
|
||
+ vpcmpb $0, (VEC_SIZE * 2)(%rax), %YMMZERO, %k1
|
||
+ kmovd %k1, %edx
|
||
+ test %edx, %edx
|
||
+ jnz L(exit_null_on_third_vector)
|
||
+
|
||
+ vpcmpb $0, (VEC_SIZE * 3)(%rax), %YMMZERO, %k2
|
||
+ kmovd %k2, %edx
|
||
+ test %edx, %edx
|
||
+ jnz L(exit_null_on_fourth_vector)
|
||
+
|
||
+ vpcmpb $0, (VEC_SIZE * 4)(%rax), %YMMZERO, %k3
|
||
+ kmovd %k3, %edx
|
||
+ test %edx, %edx
|
||
+ jnz L(exit_null_on_fifth_vector)
|
||
+
|
||
+ vpcmpb $0, (VEC_SIZE * 5)(%rax), %YMMZERO, %k4
|
||
+ add $(VEC_SIZE * 4), %rax
|
||
+ kmovd %k4, %edx
|
||
+ test %edx, %edx
|
||
+ jnz L(exit_null_on_second_vector)
|
||
+
|
||
+ vpcmpb $0, (VEC_SIZE * 2)(%rax), %YMMZERO, %k1
|
||
+ kmovd %k1, %edx
|
||
+ test %edx, %edx
|
||
+ jnz L(exit_null_on_third_vector)
|
||
+
|
||
+ vpcmpb $0, (VEC_SIZE * 3)(%rax), %YMMZERO, %k2
|
||
+ kmovd %k2, %edx
|
||
+ test %edx, %edx
|
||
+ jnz L(exit_null_on_fourth_vector)
|
||
+
|
||
+ vpcmpb $0, (VEC_SIZE * 4)(%rax), %YMMZERO, %k3
|
||
+ kmovd %k3, %edx
|
||
+ test %edx, %edx
|
||
+ jnz L(exit_null_on_fifth_vector)
|
||
+
|
||
+ vpcmpb $0, (VEC_SIZE * 5)(%rax), %YMMZERO, %k4
|
||
+ kmovd %k4, %edx
|
||
+ add $(VEC_SIZE * 4), %rax
|
||
+ test %edx, %edx
|
||
+ jnz L(exit_null_on_second_vector)
|
||
+
|
||
+ vpcmpb $0, (VEC_SIZE * 2)(%rax), %YMMZERO, %k1
|
||
+ kmovd %k1, %edx
|
||
+ test %edx, %edx
|
||
+ jnz L(exit_null_on_third_vector)
|
||
+
|
||
+ vpcmpb $0, (VEC_SIZE * 3)(%rax), %YMMZERO, %k2
|
||
+ kmovd %k2, %edx
|
||
+ test %edx, %edx
|
||
+ jnz L(exit_null_on_fourth_vector)
|
||
+
|
||
+ vpcmpb $0, (VEC_SIZE * 4)(%rax), %YMMZERO, %k3
|
||
+ kmovd %k3, %edx
|
||
+ test %edx, %edx
|
||
+ jnz L(exit_null_on_fifth_vector)
|
||
+
|
||
+ vpcmpb $0, (VEC_SIZE * 5)(%rax), %YMMZERO, %k4
|
||
+ add $(VEC_SIZE * 4), %rax
|
||
+ kmovd %k4, %edx
|
||
+ test %edx, %edx
|
||
+ jnz L(exit_null_on_second_vector)
|
||
+
|
||
+ vpcmpb $0, (VEC_SIZE * 2)(%rax), %YMMZERO, %k1
|
||
+ kmovd %k1, %edx
|
||
+ test %edx, %edx
|
||
+ jnz L(exit_null_on_third_vector)
|
||
+
|
||
+ vpcmpb $0, (VEC_SIZE * 3)(%rax), %YMMZERO, %k2
|
||
+ kmovd %k2, %edx
|
||
+ test %edx, %edx
|
||
+ jnz L(exit_null_on_fourth_vector)
|
||
+
|
||
+ vpcmpb $0, (VEC_SIZE * 4)(%rax), %YMMZERO, %k3
|
||
+ kmovd %k3, %edx
|
||
+ test %edx, %edx
|
||
+ jnz L(exit_null_on_fifth_vector)
|
||
+
|
||
+ test $((VEC_SIZE * 4) - 1), %rax
|
||
+ jz L(align_four_vec_loop)
|
||
+
|
||
+ vpcmpb $0, (VEC_SIZE * 5)(%rax), %YMMZERO, %k4
|
||
+ add $(VEC_SIZE * 5), %rax
|
||
+ kmovd %k4, %edx
|
||
+ test %edx, %edx
|
||
+ jnz L(exit)
|
||
+
|
||
+ test $((VEC_SIZE * 4) - 1), %rax
|
||
+ jz L(align_four_vec_loop)
|
||
+
|
||
+ vpcmpb $0, VEC_SIZE(%rax), %YMMZERO, %k0
|
||
+ add $VEC_SIZE, %rax
|
||
+ kmovd %k0, %edx
|
||
+ test %edx, %edx
|
||
+ jnz L(exit)
|
||
+
|
||
+ test $((VEC_SIZE * 4) - 1), %rax
|
||
+ jz L(align_four_vec_loop)
|
||
+
|
||
+ vpcmpb $0, VEC_SIZE(%rax), %YMMZERO, %k0
|
||
+ add $VEC_SIZE, %rax
|
||
+ kmovd %k0, %edx
|
||
+ test %edx, %edx
|
||
+ jnz L(exit)
|
||
+
|
||
+ test $((VEC_SIZE * 4) - 1), %rax
|
||
+ jz L(align_four_vec_loop)
|
||
+
|
||
+ vpcmpb $0, VEC_SIZE(%rax), %YMMZERO, %k1
|
||
+ add $VEC_SIZE, %rax
|
||
+ kmovd %k1, %edx
|
||
+ test %edx, %edx
|
||
+ jnz L(exit)
|
||
+
|
||
+ add $VEC_SIZE, %rax
|
||
+
|
||
+ .p2align 4
|
||
+L(align_four_vec_loop):
|
||
+ VMOVA (%rax), %YMM0
|
||
+ VMOVA (VEC_SIZE * 2)(%rax), %YMM1
|
||
+ vpminub VEC_SIZE(%rax), %YMM0, %YMM0
|
||
+ vpminub (VEC_SIZE * 3)(%rax), %YMM1, %YMM1
|
||
+ vpminub %YMM0, %YMM1, %YMM0
|
||
+ /* If K0 != 0, there is a null byte. */
|
||
+ vpcmpb $0, %YMM0, %YMMZERO, %k0
|
||
+ add $(VEC_SIZE * 4), %rax
|
||
+ ktestd %k0, %k0
|
||
+ jz L(align_four_vec_loop)
|
||
+
|
||
+ vpcmpb $0, -(VEC_SIZE * 4)(%rax), %YMMZERO, %k0
|
||
+ sub $(VEC_SIZE * 5), %rax
|
||
+ kmovd %k0, %edx
|
||
+ test %edx, %edx
|
||
+ jnz L(exit_null_on_second_vector)
|
||
+
|
||
+ vpcmpb $0, (VEC_SIZE * 2)(%rax), %YMMZERO, %k1
|
||
+ kmovd %k1, %edx
|
||
+ test %edx, %edx
|
||
+ jnz L(exit_null_on_third_vector)
|
||
+
|
||
+ vpcmpb $0, (VEC_SIZE * 3)(%rax), %YMMZERO, %k2
|
||
+ kmovd %k2, %edx
|
||
+ test %edx, %edx
|
||
+ jnz L(exit_null_on_fourth_vector)
|
||
+
|
||
+ vpcmpb $0, (VEC_SIZE * 4)(%rax), %YMMZERO, %k3
|
||
+ kmovd %k3, %edx
|
||
+ sub %rdi, %rax
|
||
+ bsf %rdx, %rdx
|
||
+ add %rdx, %rax
|
||
+ add $(VEC_SIZE * 4), %rax
|
||
+ jmp L(StartStrcpyPart)
|
||
+
|
||
+ .p2align 4
|
||
+L(exit):
|
||
+ sub %rdi, %rax
|
||
+L(exit_null_on_first_vector):
|
||
+ bsf %rdx, %rdx
|
||
+ add %rdx, %rax
|
||
+ jmp L(StartStrcpyPart)
|
||
+
|
||
+ .p2align 4
|
||
+L(exit_null_on_second_vector):
|
||
+ sub %rdi, %rax
|
||
+ bsf %rdx, %rdx
|
||
+ add %rdx, %rax
|
||
+ add $VEC_SIZE, %rax
|
||
+ jmp L(StartStrcpyPart)
|
||
+
|
||
+ .p2align 4
|
||
+L(exit_null_on_third_vector):
|
||
+ sub %rdi, %rax
|
||
+ bsf %rdx, %rdx
|
||
+ add %rdx, %rax
|
||
+ add $(VEC_SIZE * 2), %rax
|
||
+ jmp L(StartStrcpyPart)
|
||
+
|
||
+ .p2align 4
|
||
+L(exit_null_on_fourth_vector):
|
||
+ sub %rdi, %rax
|
||
+ bsf %rdx, %rdx
|
||
+ add %rdx, %rax
|
||
+ add $(VEC_SIZE * 3), %rax
|
||
+ jmp L(StartStrcpyPart)
|
||
+
|
||
+ .p2align 4
|
||
+L(exit_null_on_fifth_vector):
|
||
+ sub %rdi, %rax
|
||
+ bsf %rdx, %rdx
|
||
+ add %rdx, %rax
|
||
+ add $(VEC_SIZE * 4), %rax
|
||
+
|
||
+ .p2align 4
|
||
+L(StartStrcpyPart):
|
||
+ lea (%r9, %rax), %rdi
|
||
+ mov %rsi, %rcx
|
||
+ mov %r9, %rax /* save result */
|
||
+
|
||
+# ifdef USE_AS_STRNCAT
|
||
+ test %r8, %r8
|
||
+ jz L(ExitZero)
|
||
+# define USE_AS_STRNCPY
|
||
+# endif
|
||
+
|
||
+# include "strcpy-evex.S"
|
||
+#endif
|
||
diff --git a/sysdeps/x86_64/multiarch/strchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/strchr-avx2-rtm.S
|
||
new file mode 100644
|
||
index 0000000000..81f20d1d8e
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/strchr-avx2-rtm.S
|
||
@@ -0,0 +1,12 @@
|
||
+#ifndef STRCHR
|
||
+# define STRCHR __strchr_avx2_rtm
|
||
+#endif
|
||
+
|
||
+#define ZERO_UPPER_VEC_REGISTERS_RETURN \
|
||
+ ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
|
||
+
|
||
+#define VZEROUPPER_RETURN jmp L(return_vzeroupper)
|
||
+
|
||
+#define SECTION(p) p##.avx.rtm
|
||
+
|
||
+#include "strchr-avx2.S"
|
||
diff --git a/sysdeps/x86_64/multiarch/strchr-avx2.S b/sysdeps/x86_64/multiarch/strchr-avx2.S
|
||
index 39fc69da7b..0a5217514a 100644
|
||
--- a/sysdeps/x86_64/multiarch/strchr-avx2.S
|
||
+++ b/sysdeps/x86_64/multiarch/strchr-avx2.S
|
||
@@ -38,9 +38,13 @@
|
||
# define VZEROUPPER vzeroupper
|
||
# endif
|
||
|
||
+# ifndef SECTION
|
||
+# define SECTION(p) p##.avx
|
||
+# endif
|
||
+
|
||
# define VEC_SIZE 32
|
||
|
||
- .section .text.avx,"ax",@progbits
|
||
+ .section SECTION(.text),"ax",@progbits
|
||
ENTRY (STRCHR)
|
||
movl %edi, %ecx
|
||
/* Broadcast CHAR to YMM0. */
|
||
@@ -93,8 +97,8 @@ L(cros_page_boundary):
|
||
cmp (%rax), %CHAR_REG
|
||
cmovne %rdx, %rax
|
||
# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+L(return_vzeroupper):
|
||
+ ZERO_UPPER_VEC_REGISTERS_RETURN
|
||
|
||
.p2align 4
|
||
L(aligned_more):
|
||
@@ -190,8 +194,7 @@ L(first_vec_x0):
|
||
cmp (%rax), %CHAR_REG
|
||
cmovne %rdx, %rax
|
||
# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
L(first_vec_x1):
|
||
@@ -205,8 +208,7 @@ L(first_vec_x1):
|
||
cmp (%rax), %CHAR_REG
|
||
cmovne %rdx, %rax
|
||
# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
L(first_vec_x2):
|
||
@@ -220,8 +222,7 @@ L(first_vec_x2):
|
||
cmp (%rax), %CHAR_REG
|
||
cmovne %rdx, %rax
|
||
# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
L(4x_vec_end):
|
||
@@ -247,8 +248,7 @@ L(first_vec_x3):
|
||
cmp (%rax), %CHAR_REG
|
||
cmovne %rdx, %rax
|
||
# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
END (STRCHR)
|
||
#endif
|
||
diff --git a/sysdeps/x86_64/multiarch/strchr-evex.S b/sysdeps/x86_64/multiarch/strchr-evex.S
|
||
new file mode 100644
|
||
index 0000000000..ddc86a7058
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/strchr-evex.S
|
||
@@ -0,0 +1,335 @@
|
||
+/* strchr/strchrnul optimized with 256-bit EVEX instructions.
|
||
+ Copyright (C) 2021 Free Software Foundation, Inc.
|
||
+ This file is part of the GNU C Library.
|
||
+
|
||
+ The GNU C Library is free software; you can redistribute it and/or
|
||
+ modify it under the terms of the GNU Lesser General Public
|
||
+ License as published by the Free Software Foundation; either
|
||
+ version 2.1 of the License, or (at your option) any later version.
|
||
+
|
||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
+ Lesser General Public License for more details.
|
||
+
|
||
+ You should have received a copy of the GNU Lesser General Public
|
||
+ License along with the GNU C Library; if not, see
|
||
+ <https://www.gnu.org/licenses/>. */
|
||
+
|
||
+#if IS_IN (libc)
|
||
+
|
||
+# include <sysdep.h>
|
||
+
|
||
+# ifndef STRCHR
|
||
+# define STRCHR __strchr_evex
|
||
+# endif
|
||
+
|
||
+# define VMOVU vmovdqu64
|
||
+# define VMOVA vmovdqa64
|
||
+
|
||
+# ifdef USE_AS_WCSCHR
|
||
+# define VPBROADCAST vpbroadcastd
|
||
+# define VPCMP vpcmpd
|
||
+# define VPMINU vpminud
|
||
+# define CHAR_REG esi
|
||
+# define SHIFT_REG r8d
|
||
+# else
|
||
+# define VPBROADCAST vpbroadcastb
|
||
+# define VPCMP vpcmpb
|
||
+# define VPMINU vpminub
|
||
+# define CHAR_REG sil
|
||
+# define SHIFT_REG ecx
|
||
+# endif
|
||
+
|
||
+# define XMMZERO xmm16
|
||
+
|
||
+# define YMMZERO ymm16
|
||
+# define YMM0 ymm17
|
||
+# define YMM1 ymm18
|
||
+# define YMM2 ymm19
|
||
+# define YMM3 ymm20
|
||
+# define YMM4 ymm21
|
||
+# define YMM5 ymm22
|
||
+# define YMM6 ymm23
|
||
+# define YMM7 ymm24
|
||
+# define YMM8 ymm25
|
||
+
|
||
+# define VEC_SIZE 32
|
||
+# define PAGE_SIZE 4096
|
||
+
|
||
+ .section .text.evex,"ax",@progbits
|
||
+ENTRY (STRCHR)
|
||
+ movl %edi, %ecx
|
||
+# ifndef USE_AS_STRCHRNUL
|
||
+ xorl %edx, %edx
|
||
+# endif
|
||
+
|
||
+ /* Broadcast CHAR to YMM0. */
|
||
+ VPBROADCAST %esi, %YMM0
|
||
+
|
||
+ vpxorq %XMMZERO, %XMMZERO, %XMMZERO
|
||
+
|
||
+ /* Check if we cross page boundary with one vector load. */
|
||
+ andl $(PAGE_SIZE - 1), %ecx
|
||
+ cmpl $(PAGE_SIZE - VEC_SIZE), %ecx
|
||
+ ja L(cross_page_boundary)
|
||
+
|
||
+ /* Check the first VEC_SIZE bytes. Search for both CHAR and the
|
||
+ null bytes. */
|
||
+ VMOVU (%rdi), %YMM1
|
||
+
|
||
+ /* Leaves only CHARS matching esi as 0. */
|
||
+ vpxorq %YMM1, %YMM0, %YMM2
|
||
+ VPMINU %YMM2, %YMM1, %YMM2
|
||
+ /* Each bit in K0 represents a CHAR or a null byte in YMM1. */
|
||
+ VPCMP $0, %YMMZERO, %YMM2, %k0
|
||
+ ktestd %k0, %k0
|
||
+ jz L(more_vecs)
|
||
+ kmovd %k0, %eax
|
||
+ tzcntl %eax, %eax
|
||
+ /* Found CHAR or the null byte. */
|
||
+# ifdef USE_AS_WCSCHR
|
||
+ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
|
||
+ leaq (%rdi, %rax, 4), %rax
|
||
+# else
|
||
+ addq %rdi, %rax
|
||
+# endif
|
||
+# ifndef USE_AS_STRCHRNUL
|
||
+ cmp (%rax), %CHAR_REG
|
||
+ cmovne %rdx, %rax
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(more_vecs):
|
||
+ /* Align data for aligned loads in the loop. */
|
||
+ andq $-VEC_SIZE, %rdi
|
||
+L(aligned_more):
|
||
+
|
||
+ /* Check the next 4 * VEC_SIZE. Only one VEC_SIZE at a time
|
||
+ since data is only aligned to VEC_SIZE. */
|
||
+ VMOVA VEC_SIZE(%rdi), %YMM1
|
||
+ addq $VEC_SIZE, %rdi
|
||
+
|
||
+ /* Leaves only CHARS matching esi as 0. */
|
||
+ vpxorq %YMM1, %YMM0, %YMM2
|
||
+ VPMINU %YMM2, %YMM1, %YMM2
|
||
+ /* Each bit in K0 represents a CHAR or a null byte in YMM1. */
|
||
+ VPCMP $0, %YMMZERO, %YMM2, %k0
|
||
+ kmovd %k0, %eax
|
||
+ testl %eax, %eax
|
||
+ jnz L(first_vec_x0)
|
||
+
|
||
+ VMOVA VEC_SIZE(%rdi), %YMM1
|
||
+ /* Leaves only CHARS matching esi as 0. */
|
||
+ vpxorq %YMM1, %YMM0, %YMM2
|
||
+ VPMINU %YMM2, %YMM1, %YMM2
|
||
+ /* Each bit in K0 represents a CHAR or a null byte in YMM1. */
|
||
+ VPCMP $0, %YMMZERO, %YMM2, %k0
|
||
+ kmovd %k0, %eax
|
||
+ testl %eax, %eax
|
||
+ jnz L(first_vec_x1)
|
||
+
|
||
+ VMOVA (VEC_SIZE * 2)(%rdi), %YMM1
|
||
+ /* Leaves only CHARS matching esi as 0. */
|
||
+ vpxorq %YMM1, %YMM0, %YMM2
|
||
+ VPMINU %YMM2, %YMM1, %YMM2
|
||
+ /* Each bit in K0 represents a CHAR or a null byte in YMM1. */
|
||
+ VPCMP $0, %YMMZERO, %YMM2, %k0
|
||
+ kmovd %k0, %eax
|
||
+ testl %eax, %eax
|
||
+ jnz L(first_vec_x2)
|
||
+
|
||
+ VMOVA (VEC_SIZE * 3)(%rdi), %YMM1
|
||
+ /* Leaves only CHARS matching esi as 0. */
|
||
+ vpxorq %YMM1, %YMM0, %YMM2
|
||
+ VPMINU %YMM2, %YMM1, %YMM2
|
||
+ /* Each bit in K0 represents a CHAR or a null byte in YMM1. */
|
||
+ VPCMP $0, %YMMZERO, %YMM2, %k0
|
||
+ ktestd %k0, %k0
|
||
+ jz L(prep_loop_4x)
|
||
+
|
||
+ kmovd %k0, %eax
|
||
+ tzcntl %eax, %eax
|
||
+ /* Found CHAR or the null byte. */
|
||
+# ifdef USE_AS_WCSCHR
|
||
+ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
|
||
+ leaq (VEC_SIZE * 3)(%rdi, %rax, 4), %rax
|
||
+# else
|
||
+ leaq (VEC_SIZE * 3)(%rdi, %rax), %rax
|
||
+# endif
|
||
+# ifndef USE_AS_STRCHRNUL
|
||
+ cmp (%rax), %CHAR_REG
|
||
+ cmovne %rdx, %rax
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(first_vec_x0):
|
||
+ tzcntl %eax, %eax
|
||
+ /* Found CHAR or the null byte. */
|
||
+# ifdef USE_AS_WCSCHR
|
||
+ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
|
||
+ leaq (%rdi, %rax, 4), %rax
|
||
+# else
|
||
+ addq %rdi, %rax
|
||
+# endif
|
||
+# ifndef USE_AS_STRCHRNUL
|
||
+ cmp (%rax), %CHAR_REG
|
||
+ cmovne %rdx, %rax
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(first_vec_x1):
|
||
+ tzcntl %eax, %eax
|
||
+ /* Found CHAR or the null byte. */
|
||
+# ifdef USE_AS_WCSCHR
|
||
+ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
|
||
+ leaq VEC_SIZE(%rdi, %rax, 4), %rax
|
||
+# else
|
||
+ leaq VEC_SIZE(%rdi, %rax), %rax
|
||
+# endif
|
||
+# ifndef USE_AS_STRCHRNUL
|
||
+ cmp (%rax), %CHAR_REG
|
||
+ cmovne %rdx, %rax
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(first_vec_x2):
|
||
+ tzcntl %eax, %eax
|
||
+ /* Found CHAR or the null byte. */
|
||
+# ifdef USE_AS_WCSCHR
|
||
+ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
|
||
+ leaq (VEC_SIZE * 2)(%rdi, %rax, 4), %rax
|
||
+# else
|
||
+ leaq (VEC_SIZE * 2)(%rdi, %rax), %rax
|
||
+# endif
|
||
+# ifndef USE_AS_STRCHRNUL
|
||
+ cmp (%rax), %CHAR_REG
|
||
+ cmovne %rdx, %rax
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+L(prep_loop_4x):
|
||
+ /* Align data to 4 * VEC_SIZE. */
|
||
+ andq $-(VEC_SIZE * 4), %rdi
|
||
+
|
||
+ .p2align 4
|
||
+L(loop_4x_vec):
|
||
+ /* Compare 4 * VEC at a time forward. */
|
||
+ VMOVA (VEC_SIZE * 4)(%rdi), %YMM1
|
||
+ VMOVA (VEC_SIZE * 5)(%rdi), %YMM2
|
||
+ VMOVA (VEC_SIZE * 6)(%rdi), %YMM3
|
||
+ VMOVA (VEC_SIZE * 7)(%rdi), %YMM4
|
||
+
|
||
+ /* Leaves only CHARS matching esi as 0. */
|
||
+ vpxorq %YMM1, %YMM0, %YMM5
|
||
+ vpxorq %YMM2, %YMM0, %YMM6
|
||
+ vpxorq %YMM3, %YMM0, %YMM7
|
||
+ vpxorq %YMM4, %YMM0, %YMM8
|
||
+
|
||
+ VPMINU %YMM5, %YMM1, %YMM5
|
||
+ VPMINU %YMM6, %YMM2, %YMM6
|
||
+ VPMINU %YMM7, %YMM3, %YMM7
|
||
+ VPMINU %YMM8, %YMM4, %YMM8
|
||
+
|
||
+ VPMINU %YMM5, %YMM6, %YMM1
|
||
+ VPMINU %YMM7, %YMM8, %YMM2
|
||
+
|
||
+ VPMINU %YMM1, %YMM2, %YMM1
|
||
+
|
||
+ /* Each bit in K0 represents a CHAR or a null byte. */
|
||
+ VPCMP $0, %YMMZERO, %YMM1, %k0
|
||
+
|
||
+ addq $(VEC_SIZE * 4), %rdi
|
||
+
|
||
+ ktestd %k0, %k0
|
||
+ jz L(loop_4x_vec)
|
||
+
|
||
+ /* Each bit in K0 represents a CHAR or a null byte in YMM1. */
|
||
+ VPCMP $0, %YMMZERO, %YMM5, %k0
|
||
+ kmovd %k0, %eax
|
||
+ testl %eax, %eax
|
||
+ jnz L(first_vec_x0)
|
||
+
|
||
+ /* Each bit in K1 represents a CHAR or a null byte in YMM2. */
|
||
+ VPCMP $0, %YMMZERO, %YMM6, %k1
|
||
+ kmovd %k1, %eax
|
||
+ testl %eax, %eax
|
||
+ jnz L(first_vec_x1)
|
||
+
|
||
+ /* Each bit in K2 represents a CHAR or a null byte in YMM3. */
|
||
+ VPCMP $0, %YMMZERO, %YMM7, %k2
|
||
+ /* Each bit in K3 represents a CHAR or a null byte in YMM4. */
|
||
+ VPCMP $0, %YMMZERO, %YMM8, %k3
|
||
+
|
||
+# ifdef USE_AS_WCSCHR
|
||
+ /* NB: Each bit in K2/K3 represents 4-byte element. */
|
||
+ kshiftlw $8, %k3, %k1
|
||
+# else
|
||
+ kshiftlq $32, %k3, %k1
|
||
+# endif
|
||
+
|
||
+ /* Each bit in K1 represents a NULL or a mismatch. */
|
||
+ korq %k1, %k2, %k1
|
||
+ kmovq %k1, %rax
|
||
+
|
||
+ tzcntq %rax, %rax
|
||
+# ifdef USE_AS_WCSCHR
|
||
+ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
|
||
+ leaq (VEC_SIZE * 2)(%rdi, %rax, 4), %rax
|
||
+# else
|
||
+ leaq (VEC_SIZE * 2)(%rdi, %rax), %rax
|
||
+# endif
|
||
+# ifndef USE_AS_STRCHRNUL
|
||
+ cmp (%rax), %CHAR_REG
|
||
+ cmovne %rdx, %rax
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+ /* Cold case for crossing page with first load. */
|
||
+ .p2align 4
|
||
+L(cross_page_boundary):
|
||
+ andq $-VEC_SIZE, %rdi
|
||
+ andl $(VEC_SIZE - 1), %ecx
|
||
+
|
||
+ VMOVA (%rdi), %YMM1
|
||
+
|
||
+ /* Leaves only CHARS matching esi as 0. */
|
||
+ vpxorq %YMM1, %YMM0, %YMM2
|
||
+ VPMINU %YMM2, %YMM1, %YMM2
|
||
+ /* Each bit in K0 represents a CHAR or a null byte in YMM1. */
|
||
+ VPCMP $0, %YMMZERO, %YMM2, %k0
|
||
+ kmovd %k0, %eax
|
||
+ testl %eax, %eax
|
||
+
|
||
+# ifdef USE_AS_WCSCHR
|
||
+ /* NB: Divide shift count by 4 since each bit in K1 represent 4
|
||
+ bytes. */
|
||
+ movl %ecx, %SHIFT_REG
|
||
+ sarl $2, %SHIFT_REG
|
||
+# endif
|
||
+
|
||
+ /* Remove the leading bits. */
|
||
+ sarxl %SHIFT_REG, %eax, %eax
|
||
+ testl %eax, %eax
|
||
+
|
||
+ jz L(aligned_more)
|
||
+ tzcntl %eax, %eax
|
||
+ addq %rcx, %rdi
|
||
+# ifdef USE_AS_WCSCHR
|
||
+ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
|
||
+ leaq (%rdi, %rax, 4), %rax
|
||
+# else
|
||
+ addq %rdi, %rax
|
||
+# endif
|
||
+# ifndef USE_AS_STRCHRNUL
|
||
+ cmp (%rax), %CHAR_REG
|
||
+ cmovne %rdx, %rax
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+END (STRCHR)
|
||
+# endif
|
||
diff --git a/sysdeps/x86_64/multiarch/strchr.c b/sysdeps/x86_64/multiarch/strchr.c
|
||
index 8df4609bf8..4ed1177c70 100644
|
||
--- a/sysdeps/x86_64/multiarch/strchr.c
|
||
+++ b/sysdeps/x86_64/multiarch/strchr.c
|
||
@@ -29,16 +29,28 @@
|
||
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
|
||
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_no_bsf) attribute_hidden;
|
||
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
|
||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
|
||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
|
||
|
||
static inline void *
|
||
IFUNC_SELECTOR (void)
|
||
{
|
||
const struct cpu_features* cpu_features = __get_cpu_features ();
|
||
|
||
- if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)
|
||
- && CPU_FEATURE_USABLE_P (cpu_features, AVX2)
|
||
+ if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
|
||
&& CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
|
||
- return OPTIMIZE (avx2);
|
||
+ {
|
||
+ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
|
||
+ && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
|
||
+ && CPU_FEATURE_USABLE_P (cpu_features, BMI2))
|
||
+ return OPTIMIZE (evex);
|
||
+
|
||
+ if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
|
||
+ return OPTIMIZE (avx2_rtm);
|
||
+
|
||
+ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
|
||
+ return OPTIMIZE (avx2);
|
||
+ }
|
||
|
||
if (CPU_FEATURES_ARCH_P (cpu_features, Slow_BSF))
|
||
return OPTIMIZE (sse2_no_bsf);
|
||
diff --git a/sysdeps/x86_64/multiarch/strchrnul-avx2-rtm.S b/sysdeps/x86_64/multiarch/strchrnul-avx2-rtm.S
|
||
new file mode 100644
|
||
index 0000000000..cdcf818b91
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/strchrnul-avx2-rtm.S
|
||
@@ -0,0 +1,3 @@
|
||
+#define STRCHR __strchrnul_avx2_rtm
|
||
+#define USE_AS_STRCHRNUL 1
|
||
+#include "strchr-avx2-rtm.S"
|
||
diff --git a/sysdeps/x86_64/multiarch/strchrnul-evex.S b/sysdeps/x86_64/multiarch/strchrnul-evex.S
|
||
new file mode 100644
|
||
index 0000000000..064fe7ca9e
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/strchrnul-evex.S
|
||
@@ -0,0 +1,3 @@
|
||
+#define STRCHR __strchrnul_evex
|
||
+#define USE_AS_STRCHRNUL 1
|
||
+#include "strchr-evex.S"
|
||
diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/strcmp-avx2-rtm.S
|
||
new file mode 100644
|
||
index 0000000000..aecd30d97f
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/strcmp-avx2-rtm.S
|
||
@@ -0,0 +1,12 @@
|
||
+#ifndef STRCMP
|
||
+# define STRCMP __strcmp_avx2_rtm
|
||
+#endif
|
||
+
|
||
+#define ZERO_UPPER_VEC_REGISTERS_RETURN \
|
||
+ ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
|
||
+
|
||
+#define VZEROUPPER_RETURN jmp L(return_vzeroupper)
|
||
+
|
||
+#define SECTION(p) p##.avx.rtm
|
||
+
|
||
+#include "strcmp-avx2.S"
|
||
diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2.S b/sysdeps/x86_64/multiarch/strcmp-avx2.S
|
||
index d42b04b54f..759e5b64c2 100644
|
||
--- a/sysdeps/x86_64/multiarch/strcmp-avx2.S
|
||
+++ b/sysdeps/x86_64/multiarch/strcmp-avx2.S
|
||
@@ -55,6 +55,10 @@
|
||
# define VZEROUPPER vzeroupper
|
||
# endif
|
||
|
||
+# ifndef SECTION
|
||
+# define SECTION(p) p##.avx
|
||
+# endif
|
||
+
|
||
/* Warning!
|
||
wcscmp/wcsncmp have to use SIGNED comparison for elements.
|
||
strcmp/strncmp have to use UNSIGNED comparison for elements.
|
||
@@ -75,7 +79,7 @@
|
||
the maximum offset is reached before a difference is found, zero is
|
||
returned. */
|
||
|
||
- .section .text.avx,"ax",@progbits
|
||
+ .section SECTION(.text),"ax",@progbits
|
||
ENTRY (STRCMP)
|
||
# ifdef USE_AS_STRNCMP
|
||
/* Check for simple cases (0 or 1) in offset. */
|
||
@@ -83,6 +87,16 @@ ENTRY (STRCMP)
|
||
je L(char0)
|
||
jb L(zero)
|
||
# ifdef USE_AS_WCSCMP
|
||
+# ifndef __ILP32__
|
||
+ movq %rdx, %rcx
|
||
+ /* Check if length could overflow when multiplied by
|
||
+ sizeof(wchar_t). Checking top 8 bits will cover all potential
|
||
+ overflow cases as well as redirect cases where its impossible to
|
||
+ length to bound a valid memory region. In these cases just use
|
||
+ 'wcscmp'. */
|
||
+ shrq $56, %rcx
|
||
+ jnz __wcscmp_avx2
|
||
+# endif
|
||
/* Convert units: from wide to byte char. */
|
||
shl $2, %RDX_LP
|
||
# endif
|
||
@@ -127,8 +141,8 @@ L(return):
|
||
movzbl (%rsi, %rdx), %edx
|
||
subl %edx, %eax
|
||
# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+L(return_vzeroupper):
|
||
+ ZERO_UPPER_VEC_REGISTERS_RETURN
|
||
|
||
.p2align 4
|
||
L(return_vec_size):
|
||
@@ -161,8 +175,7 @@ L(return_vec_size):
|
||
subl %edx, %eax
|
||
# endif
|
||
# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
L(return_2_vec_size):
|
||
@@ -195,8 +208,7 @@ L(return_2_vec_size):
|
||
subl %edx, %eax
|
||
# endif
|
||
# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
L(return_3_vec_size):
|
||
@@ -229,8 +241,7 @@ L(return_3_vec_size):
|
||
subl %edx, %eax
|
||
# endif
|
||
# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
L(next_3_vectors):
|
||
@@ -356,8 +367,7 @@ L(back_to_loop):
|
||
subl %edx, %eax
|
||
# endif
|
||
# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
L(test_vec):
|
||
@@ -400,8 +410,7 @@ L(test_vec):
|
||
subl %edx, %eax
|
||
# endif
|
||
# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
L(test_2_vec):
|
||
@@ -444,8 +453,7 @@ L(test_2_vec):
|
||
subl %edx, %eax
|
||
# endif
|
||
# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
L(test_3_vec):
|
||
@@ -486,8 +494,7 @@ L(test_3_vec):
|
||
subl %edx, %eax
|
||
# endif
|
||
# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
L(loop_cross_page):
|
||
@@ -556,8 +563,7 @@ L(loop_cross_page):
|
||
subl %edx, %eax
|
||
# endif
|
||
# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
L(loop_cross_page_2_vec):
|
||
@@ -631,8 +637,7 @@ L(loop_cross_page_2_vec):
|
||
subl %edx, %eax
|
||
# endif
|
||
# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
# ifdef USE_AS_STRNCMP
|
||
L(string_nbyte_offset_check):
|
||
@@ -674,8 +679,7 @@ L(cross_page_loop):
|
||
# ifndef USE_AS_WCSCMP
|
||
L(different):
|
||
# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
# ifdef USE_AS_WCSCMP
|
||
.p2align 4
|
||
@@ -685,16 +689,14 @@ L(different):
|
||
setl %al
|
||
negl %eax
|
||
orl $1, %eax
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
# endif
|
||
|
||
# ifdef USE_AS_STRNCMP
|
||
.p2align 4
|
||
L(zero):
|
||
xorl %eax, %eax
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
L(char0):
|
||
@@ -708,8 +710,7 @@ L(char0):
|
||
movzbl (%rdi), %eax
|
||
subl %ecx, %eax
|
||
# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
# endif
|
||
|
||
.p2align 4
|
||
@@ -734,8 +735,7 @@ L(last_vector):
|
||
movzbl (%rsi, %rdx), %edx
|
||
subl %edx, %eax
|
||
# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
/* Comparing on page boundary region requires special treatment:
|
||
It must done one vector at the time, starting with the wider
|
||
@@ -856,7 +856,6 @@ L(cross_page_4bytes):
|
||
testl %eax, %eax
|
||
jne L(cross_page_loop)
|
||
subl %ecx, %eax
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
END (STRCMP)
|
||
#endif
|
||
diff --git a/sysdeps/x86_64/multiarch/strcmp-evex.S b/sysdeps/x86_64/multiarch/strcmp-evex.S
|
||
new file mode 100644
|
||
index 0000000000..459eeed09f
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/strcmp-evex.S
|
||
@@ -0,0 +1,1043 @@
|
||
+/* strcmp/wcscmp/strncmp/wcsncmp optimized with 256-bit EVEX instructions.
|
||
+ Copyright (C) 2021 Free Software Foundation, Inc.
|
||
+ This file is part of the GNU C Library.
|
||
+
|
||
+ The GNU C Library is free software; you can redistribute it and/or
|
||
+ modify it under the terms of the GNU Lesser General Public
|
||
+ License as published by the Free Software Foundation; either
|
||
+ version 2.1 of the License, or (at your option) any later version.
|
||
+
|
||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
+ Lesser General Public License for more details.
|
||
+
|
||
+ You should have received a copy of the GNU Lesser General Public
|
||
+ License along with the GNU C Library; if not, see
|
||
+ <https://www.gnu.org/licenses/>. */
|
||
+
|
||
+#if IS_IN (libc)
|
||
+
|
||
+# include <sysdep.h>
|
||
+
|
||
+# ifndef STRCMP
|
||
+# define STRCMP __strcmp_evex
|
||
+# endif
|
||
+
|
||
+# define PAGE_SIZE 4096
|
||
+
|
||
+/* VEC_SIZE = Number of bytes in a ymm register */
|
||
+# define VEC_SIZE 32
|
||
+
|
||
+/* Shift for dividing by (VEC_SIZE * 4). */
|
||
+# define DIVIDE_BY_VEC_4_SHIFT 7
|
||
+# if (VEC_SIZE * 4) != (1 << DIVIDE_BY_VEC_4_SHIFT)
|
||
+# error (VEC_SIZE * 4) != (1 << DIVIDE_BY_VEC_4_SHIFT)
|
||
+# endif
|
||
+
|
||
+# define VMOVU vmovdqu64
|
||
+# define VMOVA vmovdqa64
|
||
+
|
||
+# ifdef USE_AS_WCSCMP
|
||
+/* Compare packed dwords. */
|
||
+# define VPCMP vpcmpd
|
||
+# define SHIFT_REG32 r8d
|
||
+# define SHIFT_REG64 r8
|
||
+/* 1 dword char == 4 bytes. */
|
||
+# define SIZE_OF_CHAR 4
|
||
+# else
|
||
+/* Compare packed bytes. */
|
||
+# define VPCMP vpcmpb
|
||
+# define SHIFT_REG32 ecx
|
||
+# define SHIFT_REG64 rcx
|
||
+/* 1 byte char == 1 byte. */
|
||
+# define SIZE_OF_CHAR 1
|
||
+# endif
|
||
+
|
||
+# define XMMZERO xmm16
|
||
+# define XMM0 xmm17
|
||
+# define XMM1 xmm18
|
||
+
|
||
+# define YMMZERO ymm16
|
||
+# define YMM0 ymm17
|
||
+# define YMM1 ymm18
|
||
+# define YMM2 ymm19
|
||
+# define YMM3 ymm20
|
||
+# define YMM4 ymm21
|
||
+# define YMM5 ymm22
|
||
+# define YMM6 ymm23
|
||
+# define YMM7 ymm24
|
||
+
|
||
+/* Warning!
|
||
+ wcscmp/wcsncmp have to use SIGNED comparison for elements.
|
||
+ strcmp/strncmp have to use UNSIGNED comparison for elements.
|
||
+*/
|
||
+
|
||
+/* The main idea of the string comparison (byte or dword) using 256-bit
|
||
+ EVEX instructions consists of comparing (VPCMP) two ymm vectors. The
|
||
+ latter can be on either packed bytes or dwords depending on
|
||
+ USE_AS_WCSCMP. In order to check the null char, algorithm keeps the
|
||
+ matched bytes/dwords, requiring 5 EVEX instructions (3 VPCMP and 2
|
||
+ KORD). In general, the costs of comparing VEC_SIZE bytes (32-bytes)
|
||
+ are 3 VPCMP and 2 KORD instructions, together with VMOVU and ktestd
|
||
+ instructions. Main loop (away from from page boundary) compares 4
|
||
+ vectors are a time, effectively comparing 4 x VEC_SIZE bytes (128
|
||
+ bytes) on each loop.
|
||
+
|
||
+ The routine strncmp/wcsncmp (enabled by defining USE_AS_STRNCMP) logic
|
||
+ is the same as strcmp, except that an a maximum offset is tracked. If
|
||
+ the maximum offset is reached before a difference is found, zero is
|
||
+ returned. */
|
||
+
|
||
+ .section .text.evex,"ax",@progbits
|
||
+ENTRY (STRCMP)
|
||
+# ifdef USE_AS_STRNCMP
|
||
+ /* Check for simple cases (0 or 1) in offset. */
|
||
+ cmp $1, %RDX_LP
|
||
+ je L(char0)
|
||
+ jb L(zero)
|
||
+# ifdef USE_AS_WCSCMP
|
||
+ /* Convert units: from wide to byte char. */
|
||
+ shl $2, %RDX_LP
|
||
+# endif
|
||
+ /* Register %r11 tracks the maximum offset. */
|
||
+ mov %RDX_LP, %R11_LP
|
||
+# endif
|
||
+ movl %edi, %eax
|
||
+ xorl %edx, %edx
|
||
+ /* Make %XMMZERO (%YMMZERO) all zeros in this function. */
|
||
+ vpxorq %XMMZERO, %XMMZERO, %XMMZERO
|
||
+ orl %esi, %eax
|
||
+ andl $(PAGE_SIZE - 1), %eax
|
||
+ cmpl $(PAGE_SIZE - (VEC_SIZE * 4)), %eax
|
||
+ jg L(cross_page)
|
||
+ /* Start comparing 4 vectors. */
|
||
+ VMOVU (%rdi), %YMM0
|
||
+ VMOVU (%rsi), %YMM1
|
||
+
|
||
+ /* Each bit in K0 represents a mismatch in YMM0 and YMM1. */
|
||
+ VPCMP $4, %YMM0, %YMM1, %k0
|
||
+
|
||
+ /* Check for NULL in YMM0. */
|
||
+ VPCMP $0, %YMMZERO, %YMM0, %k1
|
||
+ /* Check for NULL in YMM1. */
|
||
+ VPCMP $0, %YMMZERO, %YMM1, %k2
|
||
+ /* Each bit in K1 represents a NULL in YMM0 or YMM1. */
|
||
+ kord %k1, %k2, %k1
|
||
+
|
||
+ /* Each bit in K1 represents:
|
||
+ 1. A mismatch in YMM0 and YMM1. Or
|
||
+ 2. A NULL in YMM0 or YMM1.
|
||
+ */
|
||
+ kord %k0, %k1, %k1
|
||
+
|
||
+ ktestd %k1, %k1
|
||
+ je L(next_3_vectors)
|
||
+ kmovd %k1, %ecx
|
||
+ tzcntl %ecx, %edx
|
||
+# ifdef USE_AS_WCSCMP
|
||
+ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
|
||
+ sall $2, %edx
|
||
+# endif
|
||
+# ifdef USE_AS_STRNCMP
|
||
+ /* Return 0 if the mismatched index (%rdx) is after the maximum
|
||
+ offset (%r11). */
|
||
+ cmpq %r11, %rdx
|
||
+ jae L(zero)
|
||
+# endif
|
||
+# ifdef USE_AS_WCSCMP
|
||
+ xorl %eax, %eax
|
||
+ movl (%rdi, %rdx), %ecx
|
||
+ cmpl (%rsi, %rdx), %ecx
|
||
+ je L(return)
|
||
+L(wcscmp_return):
|
||
+ setl %al
|
||
+ negl %eax
|
||
+ orl $1, %eax
|
||
+L(return):
|
||
+# else
|
||
+ movzbl (%rdi, %rdx), %eax
|
||
+ movzbl (%rsi, %rdx), %edx
|
||
+ subl %edx, %eax
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(return_vec_size):
|
||
+ kmovd %k1, %ecx
|
||
+ tzcntl %ecx, %edx
|
||
+# ifdef USE_AS_WCSCMP
|
||
+ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
|
||
+ sall $2, %edx
|
||
+# endif
|
||
+# ifdef USE_AS_STRNCMP
|
||
+ /* Return 0 if the mismatched index (%rdx + VEC_SIZE) is after
|
||
+ the maximum offset (%r11). */
|
||
+ addq $VEC_SIZE, %rdx
|
||
+ cmpq %r11, %rdx
|
||
+ jae L(zero)
|
||
+# ifdef USE_AS_WCSCMP
|
||
+ xorl %eax, %eax
|
||
+ movl (%rdi, %rdx), %ecx
|
||
+ cmpl (%rsi, %rdx), %ecx
|
||
+ jne L(wcscmp_return)
|
||
+# else
|
||
+ movzbl (%rdi, %rdx), %eax
|
||
+ movzbl (%rsi, %rdx), %edx
|
||
+ subl %edx, %eax
|
||
+# endif
|
||
+# else
|
||
+# ifdef USE_AS_WCSCMP
|
||
+ xorl %eax, %eax
|
||
+ movl VEC_SIZE(%rdi, %rdx), %ecx
|
||
+ cmpl VEC_SIZE(%rsi, %rdx), %ecx
|
||
+ jne L(wcscmp_return)
|
||
+# else
|
||
+ movzbl VEC_SIZE(%rdi, %rdx), %eax
|
||
+ movzbl VEC_SIZE(%rsi, %rdx), %edx
|
||
+ subl %edx, %eax
|
||
+# endif
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(return_2_vec_size):
|
||
+ kmovd %k1, %ecx
|
||
+ tzcntl %ecx, %edx
|
||
+# ifdef USE_AS_WCSCMP
|
||
+ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
|
||
+ sall $2, %edx
|
||
+# endif
|
||
+# ifdef USE_AS_STRNCMP
|
||
+ /* Return 0 if the mismatched index (%rdx + 2 * VEC_SIZE) is
|
||
+ after the maximum offset (%r11). */
|
||
+ addq $(VEC_SIZE * 2), %rdx
|
||
+ cmpq %r11, %rdx
|
||
+ jae L(zero)
|
||
+# ifdef USE_AS_WCSCMP
|
||
+ xorl %eax, %eax
|
||
+ movl (%rdi, %rdx), %ecx
|
||
+ cmpl (%rsi, %rdx), %ecx
|
||
+ jne L(wcscmp_return)
|
||
+# else
|
||
+ movzbl (%rdi, %rdx), %eax
|
||
+ movzbl (%rsi, %rdx), %edx
|
||
+ subl %edx, %eax
|
||
+# endif
|
||
+# else
|
||
+# ifdef USE_AS_WCSCMP
|
||
+ xorl %eax, %eax
|
||
+ movl (VEC_SIZE * 2)(%rdi, %rdx), %ecx
|
||
+ cmpl (VEC_SIZE * 2)(%rsi, %rdx), %ecx
|
||
+ jne L(wcscmp_return)
|
||
+# else
|
||
+ movzbl (VEC_SIZE * 2)(%rdi, %rdx), %eax
|
||
+ movzbl (VEC_SIZE * 2)(%rsi, %rdx), %edx
|
||
+ subl %edx, %eax
|
||
+# endif
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(return_3_vec_size):
|
||
+ kmovd %k1, %ecx
|
||
+ tzcntl %ecx, %edx
|
||
+# ifdef USE_AS_WCSCMP
|
||
+ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
|
||
+ sall $2, %edx
|
||
+# endif
|
||
+# ifdef USE_AS_STRNCMP
|
||
+ /* Return 0 if the mismatched index (%rdx + 3 * VEC_SIZE) is
|
||
+ after the maximum offset (%r11). */
|
||
+ addq $(VEC_SIZE * 3), %rdx
|
||
+ cmpq %r11, %rdx
|
||
+ jae L(zero)
|
||
+# ifdef USE_AS_WCSCMP
|
||
+ xorl %eax, %eax
|
||
+ movl (%rdi, %rdx), %ecx
|
||
+ cmpl (%rsi, %rdx), %ecx
|
||
+ jne L(wcscmp_return)
|
||
+# else
|
||
+ movzbl (%rdi, %rdx), %eax
|
||
+ movzbl (%rsi, %rdx), %edx
|
||
+ subl %edx, %eax
|
||
+# endif
|
||
+# else
|
||
+# ifdef USE_AS_WCSCMP
|
||
+ xorl %eax, %eax
|
||
+ movl (VEC_SIZE * 3)(%rdi, %rdx), %ecx
|
||
+ cmpl (VEC_SIZE * 3)(%rsi, %rdx), %ecx
|
||
+ jne L(wcscmp_return)
|
||
+# else
|
||
+ movzbl (VEC_SIZE * 3)(%rdi, %rdx), %eax
|
||
+ movzbl (VEC_SIZE * 3)(%rsi, %rdx), %edx
|
||
+ subl %edx, %eax
|
||
+# endif
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(next_3_vectors):
|
||
+ VMOVU VEC_SIZE(%rdi), %YMM0
|
||
+ VMOVU VEC_SIZE(%rsi), %YMM1
|
||
+ /* Each bit in K0 represents a mismatch in YMM0 and YMM1. */
|
||
+ VPCMP $4, %YMM0, %YMM1, %k0
|
||
+ VPCMP $0, %YMMZERO, %YMM0, %k1
|
||
+ VPCMP $0, %YMMZERO, %YMM1, %k2
|
||
+ /* Each bit in K1 represents a NULL in YMM0 or YMM1. */
|
||
+ kord %k1, %k2, %k1
|
||
+ /* Each bit in K1 represents a NULL or a mismatch. */
|
||
+ kord %k0, %k1, %k1
|
||
+ ktestd %k1, %k1
|
||
+ jne L(return_vec_size)
|
||
+
|
||
+ VMOVU (VEC_SIZE * 2)(%rdi), %YMM2
|
||
+ VMOVU (VEC_SIZE * 3)(%rdi), %YMM3
|
||
+ VMOVU (VEC_SIZE * 2)(%rsi), %YMM4
|
||
+ VMOVU (VEC_SIZE * 3)(%rsi), %YMM5
|
||
+
|
||
+ /* Each bit in K0 represents a mismatch in YMM2 and YMM4. */
|
||
+ VPCMP $4, %YMM2, %YMM4, %k0
|
||
+ VPCMP $0, %YMMZERO, %YMM2, %k1
|
||
+ VPCMP $0, %YMMZERO, %YMM4, %k2
|
||
+ /* Each bit in K1 represents a NULL in YMM2 or YMM4. */
|
||
+ kord %k1, %k2, %k1
|
||
+ /* Each bit in K1 represents a NULL or a mismatch. */
|
||
+ kord %k0, %k1, %k1
|
||
+ ktestd %k1, %k1
|
||
+ jne L(return_2_vec_size)
|
||
+
|
||
+ /* Each bit in K0 represents a mismatch in YMM3 and YMM5. */
|
||
+ VPCMP $4, %YMM3, %YMM5, %k0
|
||
+ VPCMP $0, %YMMZERO, %YMM3, %k1
|
||
+ VPCMP $0, %YMMZERO, %YMM5, %k2
|
||
+ /* Each bit in K1 represents a NULL in YMM3 or YMM5. */
|
||
+ kord %k1, %k2, %k1
|
||
+ /* Each bit in K1 represents a NULL or a mismatch. */
|
||
+ kord %k0, %k1, %k1
|
||
+ ktestd %k1, %k1
|
||
+ jne L(return_3_vec_size)
|
||
+L(main_loop_header):
|
||
+ leaq (VEC_SIZE * 4)(%rdi), %rdx
|
||
+ movl $PAGE_SIZE, %ecx
|
||
+ /* Align load via RAX. */
|
||
+ andq $-(VEC_SIZE * 4), %rdx
|
||
+ subq %rdi, %rdx
|
||
+ leaq (%rdi, %rdx), %rax
|
||
+# ifdef USE_AS_STRNCMP
|
||
+ /* Starting from this point, the maximum offset, or simply the
|
||
+ 'offset', DECREASES by the same amount when base pointers are
|
||
+ moved forward. Return 0 when:
|
||
+ 1) On match: offset <= the matched vector index.
|
||
+ 2) On mistmach, offset is before the mistmatched index.
|
||
+ */
|
||
+ subq %rdx, %r11
|
||
+ jbe L(zero)
|
||
+# endif
|
||
+ addq %rsi, %rdx
|
||
+ movq %rdx, %rsi
|
||
+ andl $(PAGE_SIZE - 1), %esi
|
||
+ /* Number of bytes before page crossing. */
|
||
+ subq %rsi, %rcx
|
||
+ /* Number of VEC_SIZE * 4 blocks before page crossing. */
|
||
+ shrq $DIVIDE_BY_VEC_4_SHIFT, %rcx
|
||
+ /* ESI: Number of VEC_SIZE * 4 blocks before page crossing. */
|
||
+ movl %ecx, %esi
|
||
+ jmp L(loop_start)
|
||
+
|
||
+ .p2align 4
|
||
+L(loop):
|
||
+# ifdef USE_AS_STRNCMP
|
||
+ /* Base pointers are moved forward by 4 * VEC_SIZE. Decrease
|
||
+ the maximum offset (%r11) by the same amount. */
|
||
+ subq $(VEC_SIZE * 4), %r11
|
||
+ jbe L(zero)
|
||
+# endif
|
||
+ addq $(VEC_SIZE * 4), %rax
|
||
+ addq $(VEC_SIZE * 4), %rdx
|
||
+L(loop_start):
|
||
+ testl %esi, %esi
|
||
+ leal -1(%esi), %esi
|
||
+ je L(loop_cross_page)
|
||
+L(back_to_loop):
|
||
+ /* Main loop, comparing 4 vectors are a time. */
|
||
+ VMOVA (%rax), %YMM0
|
||
+ VMOVA VEC_SIZE(%rax), %YMM2
|
||
+ VMOVA (VEC_SIZE * 2)(%rax), %YMM4
|
||
+ VMOVA (VEC_SIZE * 3)(%rax), %YMM6
|
||
+ VMOVU (%rdx), %YMM1
|
||
+ VMOVU VEC_SIZE(%rdx), %YMM3
|
||
+ VMOVU (VEC_SIZE * 2)(%rdx), %YMM5
|
||
+ VMOVU (VEC_SIZE * 3)(%rdx), %YMM7
|
||
+
|
||
+ VPCMP $4, %YMM0, %YMM1, %k0
|
||
+ VPCMP $0, %YMMZERO, %YMM0, %k1
|
||
+ VPCMP $0, %YMMZERO, %YMM1, %k2
|
||
+ kord %k1, %k2, %k1
|
||
+ /* Each bit in K4 represents a NULL or a mismatch in YMM0 and
|
||
+ YMM1. */
|
||
+ kord %k0, %k1, %k4
|
||
+
|
||
+ VPCMP $4, %YMM2, %YMM3, %k0
|
||
+ VPCMP $0, %YMMZERO, %YMM2, %k1
|
||
+ VPCMP $0, %YMMZERO, %YMM3, %k2
|
||
+ kord %k1, %k2, %k1
|
||
+ /* Each bit in K5 represents a NULL or a mismatch in YMM2 and
|
||
+ YMM3. */
|
||
+ kord %k0, %k1, %k5
|
||
+
|
||
+ VPCMP $4, %YMM4, %YMM5, %k0
|
||
+ VPCMP $0, %YMMZERO, %YMM4, %k1
|
||
+ VPCMP $0, %YMMZERO, %YMM5, %k2
|
||
+ kord %k1, %k2, %k1
|
||
+ /* Each bit in K6 represents a NULL or a mismatch in YMM4 and
|
||
+ YMM5. */
|
||
+ kord %k0, %k1, %k6
|
||
+
|
||
+ VPCMP $4, %YMM6, %YMM7, %k0
|
||
+ VPCMP $0, %YMMZERO, %YMM6, %k1
|
||
+ VPCMP $0, %YMMZERO, %YMM7, %k2
|
||
+ kord %k1, %k2, %k1
|
||
+ /* Each bit in K7 represents a NULL or a mismatch in YMM6 and
|
||
+ YMM7. */
|
||
+ kord %k0, %k1, %k7
|
||
+
|
||
+ kord %k4, %k5, %k0
|
||
+ kord %k6, %k7, %k1
|
||
+
|
||
+ /* Test each mask (32 bits) individually because for VEC_SIZE
|
||
+ == 32 is not possible to OR the four masks and keep all bits
|
||
+ in a 64-bit integer register, differing from SSE2 strcmp
|
||
+ where ORing is possible. */
|
||
+ kortestd %k0, %k1
|
||
+ je L(loop)
|
||
+ ktestd %k4, %k4
|
||
+ je L(test_vec)
|
||
+ kmovd %k4, %edi
|
||
+ tzcntl %edi, %ecx
|
||
+# ifdef USE_AS_WCSCMP
|
||
+ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
|
||
+ sall $2, %ecx
|
||
+# endif
|
||
+# ifdef USE_AS_STRNCMP
|
||
+ cmpq %rcx, %r11
|
||
+ jbe L(zero)
|
||
+# ifdef USE_AS_WCSCMP
|
||
+ movq %rax, %rsi
|
||
+ xorl %eax, %eax
|
||
+ movl (%rsi, %rcx), %edi
|
||
+ cmpl (%rdx, %rcx), %edi
|
||
+ jne L(wcscmp_return)
|
||
+# else
|
||
+ movzbl (%rax, %rcx), %eax
|
||
+ movzbl (%rdx, %rcx), %edx
|
||
+ subl %edx, %eax
|
||
+# endif
|
||
+# else
|
||
+# ifdef USE_AS_WCSCMP
|
||
+ movq %rax, %rsi
|
||
+ xorl %eax, %eax
|
||
+ movl (%rsi, %rcx), %edi
|
||
+ cmpl (%rdx, %rcx), %edi
|
||
+ jne L(wcscmp_return)
|
||
+# else
|
||
+ movzbl (%rax, %rcx), %eax
|
||
+ movzbl (%rdx, %rcx), %edx
|
||
+ subl %edx, %eax
|
||
+# endif
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(test_vec):
|
||
+# ifdef USE_AS_STRNCMP
|
||
+ /* The first vector matched. Return 0 if the maximum offset
|
||
+ (%r11) <= VEC_SIZE. */
|
||
+ cmpq $VEC_SIZE, %r11
|
||
+ jbe L(zero)
|
||
+# endif
|
||
+ ktestd %k5, %k5
|
||
+ je L(test_2_vec)
|
||
+ kmovd %k5, %ecx
|
||
+ tzcntl %ecx, %edi
|
||
+# ifdef USE_AS_WCSCMP
|
||
+ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
|
||
+ sall $2, %edi
|
||
+# endif
|
||
+# ifdef USE_AS_STRNCMP
|
||
+ addq $VEC_SIZE, %rdi
|
||
+ cmpq %rdi, %r11
|
||
+ jbe L(zero)
|
||
+# ifdef USE_AS_WCSCMP
|
||
+ movq %rax, %rsi
|
||
+ xorl %eax, %eax
|
||
+ movl (%rsi, %rdi), %ecx
|
||
+ cmpl (%rdx, %rdi), %ecx
|
||
+ jne L(wcscmp_return)
|
||
+# else
|
||
+ movzbl (%rax, %rdi), %eax
|
||
+ movzbl (%rdx, %rdi), %edx
|
||
+ subl %edx, %eax
|
||
+# endif
|
||
+# else
|
||
+# ifdef USE_AS_WCSCMP
|
||
+ movq %rax, %rsi
|
||
+ xorl %eax, %eax
|
||
+ movl VEC_SIZE(%rsi, %rdi), %ecx
|
||
+ cmpl VEC_SIZE(%rdx, %rdi), %ecx
|
||
+ jne L(wcscmp_return)
|
||
+# else
|
||
+ movzbl VEC_SIZE(%rax, %rdi), %eax
|
||
+ movzbl VEC_SIZE(%rdx, %rdi), %edx
|
||
+ subl %edx, %eax
|
||
+# endif
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(test_2_vec):
|
||
+# ifdef USE_AS_STRNCMP
|
||
+ /* The first 2 vectors matched. Return 0 if the maximum offset
|
||
+ (%r11) <= 2 * VEC_SIZE. */
|
||
+ cmpq $(VEC_SIZE * 2), %r11
|
||
+ jbe L(zero)
|
||
+# endif
|
||
+ ktestd %k6, %k6
|
||
+ je L(test_3_vec)
|
||
+ kmovd %k6, %ecx
|
||
+ tzcntl %ecx, %edi
|
||
+# ifdef USE_AS_WCSCMP
|
||
+ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
|
||
+ sall $2, %edi
|
||
+# endif
|
||
+# ifdef USE_AS_STRNCMP
|
||
+ addq $(VEC_SIZE * 2), %rdi
|
||
+ cmpq %rdi, %r11
|
||
+ jbe L(zero)
|
||
+# ifdef USE_AS_WCSCMP
|
||
+ movq %rax, %rsi
|
||
+ xorl %eax, %eax
|
||
+ movl (%rsi, %rdi), %ecx
|
||
+ cmpl (%rdx, %rdi), %ecx
|
||
+ jne L(wcscmp_return)
|
||
+# else
|
||
+ movzbl (%rax, %rdi), %eax
|
||
+ movzbl (%rdx, %rdi), %edx
|
||
+ subl %edx, %eax
|
||
+# endif
|
||
+# else
|
||
+# ifdef USE_AS_WCSCMP
|
||
+ movq %rax, %rsi
|
||
+ xorl %eax, %eax
|
||
+ movl (VEC_SIZE * 2)(%rsi, %rdi), %ecx
|
||
+ cmpl (VEC_SIZE * 2)(%rdx, %rdi), %ecx
|
||
+ jne L(wcscmp_return)
|
||
+# else
|
||
+ movzbl (VEC_SIZE * 2)(%rax, %rdi), %eax
|
||
+ movzbl (VEC_SIZE * 2)(%rdx, %rdi), %edx
|
||
+ subl %edx, %eax
|
||
+# endif
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(test_3_vec):
|
||
+# ifdef USE_AS_STRNCMP
|
||
+ /* The first 3 vectors matched. Return 0 if the maximum offset
|
||
+ (%r11) <= 3 * VEC_SIZE. */
|
||
+ cmpq $(VEC_SIZE * 3), %r11
|
||
+ jbe L(zero)
|
||
+# endif
|
||
+ kmovd %k7, %esi
|
||
+ tzcntl %esi, %ecx
|
||
+# ifdef USE_AS_WCSCMP
|
||
+ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
|
||
+ sall $2, %ecx
|
||
+# endif
|
||
+# ifdef USE_AS_STRNCMP
|
||
+ addq $(VEC_SIZE * 3), %rcx
|
||
+ cmpq %rcx, %r11
|
||
+ jbe L(zero)
|
||
+# ifdef USE_AS_WCSCMP
|
||
+ movq %rax, %rsi
|
||
+ xorl %eax, %eax
|
||
+ movl (%rsi, %rcx), %esi
|
||
+ cmpl (%rdx, %rcx), %esi
|
||
+ jne L(wcscmp_return)
|
||
+# else
|
||
+ movzbl (%rax, %rcx), %eax
|
||
+ movzbl (%rdx, %rcx), %edx
|
||
+ subl %edx, %eax
|
||
+# endif
|
||
+# else
|
||
+# ifdef USE_AS_WCSCMP
|
||
+ movq %rax, %rsi
|
||
+ xorl %eax, %eax
|
||
+ movl (VEC_SIZE * 3)(%rsi, %rcx), %esi
|
||
+ cmpl (VEC_SIZE * 3)(%rdx, %rcx), %esi
|
||
+ jne L(wcscmp_return)
|
||
+# else
|
||
+ movzbl (VEC_SIZE * 3)(%rax, %rcx), %eax
|
||
+ movzbl (VEC_SIZE * 3)(%rdx, %rcx), %edx
|
||
+ subl %edx, %eax
|
||
+# endif
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(loop_cross_page):
|
||
+ xorl %r10d, %r10d
|
||
+ movq %rdx, %rcx
|
||
+ /* Align load via RDX. We load the extra ECX bytes which should
|
||
+ be ignored. */
|
||
+ andl $((VEC_SIZE * 4) - 1), %ecx
|
||
+ /* R10 is -RCX. */
|
||
+ subq %rcx, %r10
|
||
+
|
||
+ /* This works only if VEC_SIZE * 2 == 64. */
|
||
+# if (VEC_SIZE * 2) != 64
|
||
+# error (VEC_SIZE * 2) != 64
|
||
+# endif
|
||
+
|
||
+ /* Check if the first VEC_SIZE * 2 bytes should be ignored. */
|
||
+ cmpl $(VEC_SIZE * 2), %ecx
|
||
+ jge L(loop_cross_page_2_vec)
|
||
+
|
||
+ VMOVU (%rax, %r10), %YMM2
|
||
+ VMOVU VEC_SIZE(%rax, %r10), %YMM3
|
||
+ VMOVU (%rdx, %r10), %YMM4
|
||
+ VMOVU VEC_SIZE(%rdx, %r10), %YMM5
|
||
+
|
||
+ VPCMP $4, %YMM4, %YMM2, %k0
|
||
+ VPCMP $0, %YMMZERO, %YMM2, %k1
|
||
+ VPCMP $0, %YMMZERO, %YMM4, %k2
|
||
+ kord %k1, %k2, %k1
|
||
+ /* Each bit in K1 represents a NULL or a mismatch in YMM2 and
|
||
+ YMM4. */
|
||
+ kord %k0, %k1, %k1
|
||
+
|
||
+ VPCMP $4, %YMM5, %YMM3, %k3
|
||
+ VPCMP $0, %YMMZERO, %YMM3, %k4
|
||
+ VPCMP $0, %YMMZERO, %YMM5, %k5
|
||
+ kord %k4, %k5, %k4
|
||
+ /* Each bit in K3 represents a NULL or a mismatch in YMM3 and
|
||
+ YMM5. */
|
||
+ kord %k3, %k4, %k3
|
||
+
|
||
+# ifdef USE_AS_WCSCMP
|
||
+ /* NB: Each bit in K1/K3 represents 4-byte element. */
|
||
+ kshiftlw $8, %k3, %k2
|
||
+ /* NB: Divide shift count by 4 since each bit in K1 represent 4
|
||
+ bytes. */
|
||
+ movl %ecx, %SHIFT_REG32
|
||
+ sarl $2, %SHIFT_REG32
|
||
+# else
|
||
+ kshiftlq $32, %k3, %k2
|
||
+# endif
|
||
+
|
||
+ /* Each bit in K1 represents a NULL or a mismatch. */
|
||
+ korq %k1, %k2, %k1
|
||
+ kmovq %k1, %rdi
|
||
+
|
||
+ /* Since ECX < VEC_SIZE * 2, simply skip the first ECX bytes. */
|
||
+ shrxq %SHIFT_REG64, %rdi, %rdi
|
||
+ testq %rdi, %rdi
|
||
+ je L(loop_cross_page_2_vec)
|
||
+ tzcntq %rdi, %rcx
|
||
+# ifdef USE_AS_WCSCMP
|
||
+ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
|
||
+ sall $2, %ecx
|
||
+# endif
|
||
+# ifdef USE_AS_STRNCMP
|
||
+ cmpq %rcx, %r11
|
||
+ jbe L(zero)
|
||
+# ifdef USE_AS_WCSCMP
|
||
+ movq %rax, %rsi
|
||
+ xorl %eax, %eax
|
||
+ movl (%rsi, %rcx), %edi
|
||
+ cmpl (%rdx, %rcx), %edi
|
||
+ jne L(wcscmp_return)
|
||
+# else
|
||
+ movzbl (%rax, %rcx), %eax
|
||
+ movzbl (%rdx, %rcx), %edx
|
||
+ subl %edx, %eax
|
||
+# endif
|
||
+# else
|
||
+# ifdef USE_AS_WCSCMP
|
||
+ movq %rax, %rsi
|
||
+ xorl %eax, %eax
|
||
+ movl (%rsi, %rcx), %edi
|
||
+ cmpl (%rdx, %rcx), %edi
|
||
+ jne L(wcscmp_return)
|
||
+# else
|
||
+ movzbl (%rax, %rcx), %eax
|
||
+ movzbl (%rdx, %rcx), %edx
|
||
+ subl %edx, %eax
|
||
+# endif
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(loop_cross_page_2_vec):
|
||
+ /* The first VEC_SIZE * 2 bytes match or are ignored. */
|
||
+ VMOVU (VEC_SIZE * 2)(%rax, %r10), %YMM0
|
||
+ VMOVU (VEC_SIZE * 3)(%rax, %r10), %YMM1
|
||
+ VMOVU (VEC_SIZE * 2)(%rdx, %r10), %YMM2
|
||
+ VMOVU (VEC_SIZE * 3)(%rdx, %r10), %YMM3
|
||
+
|
||
+ VPCMP $4, %YMM0, %YMM2, %k0
|
||
+ VPCMP $0, %YMMZERO, %YMM0, %k1
|
||
+ VPCMP $0, %YMMZERO, %YMM2, %k2
|
||
+ kord %k1, %k2, %k1
|
||
+ /* Each bit in K1 represents a NULL or a mismatch in YMM0 and
|
||
+ YMM2. */
|
||
+ kord %k0, %k1, %k1
|
||
+
|
||
+ VPCMP $4, %YMM1, %YMM3, %k3
|
||
+ VPCMP $0, %YMMZERO, %YMM1, %k4
|
||
+ VPCMP $0, %YMMZERO, %YMM3, %k5
|
||
+ kord %k4, %k5, %k4
|
||
+ /* Each bit in K3 represents a NULL or a mismatch in YMM1 and
|
||
+ YMM3. */
|
||
+ kord %k3, %k4, %k3
|
||
+
|
||
+# ifdef USE_AS_WCSCMP
|
||
+ /* NB: Each bit in K1/K3 represents 4-byte element. */
|
||
+ kshiftlw $8, %k3, %k2
|
||
+# else
|
||
+ kshiftlq $32, %k3, %k2
|
||
+# endif
|
||
+
|
||
+ /* Each bit in K1 represents a NULL or a mismatch. */
|
||
+ korq %k1, %k2, %k1
|
||
+ kmovq %k1, %rdi
|
||
+
|
||
+ xorl %r8d, %r8d
|
||
+ /* If ECX > VEC_SIZE * 2, skip ECX - (VEC_SIZE * 2) bytes. */
|
||
+ subl $(VEC_SIZE * 2), %ecx
|
||
+ jle 1f
|
||
+ /* R8 has number of bytes skipped. */
|
||
+ movl %ecx, %r8d
|
||
+# ifdef USE_AS_WCSCMP
|
||
+ /* NB: Divide shift count by 4 since each bit in K1 represent 4
|
||
+ bytes. */
|
||
+ sarl $2, %ecx
|
||
+# endif
|
||
+ /* Skip ECX bytes. */
|
||
+ shrq %cl, %rdi
|
||
+1:
|
||
+ /* Before jumping back to the loop, set ESI to the number of
|
||
+ VEC_SIZE * 4 blocks before page crossing. */
|
||
+ movl $(PAGE_SIZE / (VEC_SIZE * 4) - 1), %esi
|
||
+
|
||
+ testq %rdi, %rdi
|
||
+# ifdef USE_AS_STRNCMP
|
||
+ /* At this point, if %rdi value is 0, it already tested
|
||
+ VEC_SIZE*4+%r10 byte starting from %rax. This label
|
||
+ checks whether strncmp maximum offset reached or not. */
|
||
+ je L(string_nbyte_offset_check)
|
||
+# else
|
||
+ je L(back_to_loop)
|
||
+# endif
|
||
+ tzcntq %rdi, %rcx
|
||
+# ifdef USE_AS_WCSCMP
|
||
+ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
|
||
+ sall $2, %ecx
|
||
+# endif
|
||
+ addq %r10, %rcx
|
||
+ /* Adjust for number of bytes skipped. */
|
||
+ addq %r8, %rcx
|
||
+# ifdef USE_AS_STRNCMP
|
||
+ addq $(VEC_SIZE * 2), %rcx
|
||
+ subq %rcx, %r11
|
||
+ jbe L(zero)
|
||
+# ifdef USE_AS_WCSCMP
|
||
+ movq %rax, %rsi
|
||
+ xorl %eax, %eax
|
||
+ movl (%rsi, %rcx), %edi
|
||
+ cmpl (%rdx, %rcx), %edi
|
||
+ jne L(wcscmp_return)
|
||
+# else
|
||
+ movzbl (%rax, %rcx), %eax
|
||
+ movzbl (%rdx, %rcx), %edx
|
||
+ subl %edx, %eax
|
||
+# endif
|
||
+# else
|
||
+# ifdef USE_AS_WCSCMP
|
||
+ movq %rax, %rsi
|
||
+ xorl %eax, %eax
|
||
+ movl (VEC_SIZE * 2)(%rsi, %rcx), %edi
|
||
+ cmpl (VEC_SIZE * 2)(%rdx, %rcx), %edi
|
||
+ jne L(wcscmp_return)
|
||
+# else
|
||
+ movzbl (VEC_SIZE * 2)(%rax, %rcx), %eax
|
||
+ movzbl (VEC_SIZE * 2)(%rdx, %rcx), %edx
|
||
+ subl %edx, %eax
|
||
+# endif
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+# ifdef USE_AS_STRNCMP
|
||
+L(string_nbyte_offset_check):
|
||
+ leaq (VEC_SIZE * 4)(%r10), %r10
|
||
+ cmpq %r10, %r11
|
||
+ jbe L(zero)
|
||
+ jmp L(back_to_loop)
|
||
+# endif
|
||
+
|
||
+ .p2align 4
|
||
+L(cross_page_loop):
|
||
+ /* Check one byte/dword at a time. */
|
||
+# ifdef USE_AS_WCSCMP
|
||
+ cmpl %ecx, %eax
|
||
+# else
|
||
+ subl %ecx, %eax
|
||
+# endif
|
||
+ jne L(different)
|
||
+ addl $SIZE_OF_CHAR, %edx
|
||
+ cmpl $(VEC_SIZE * 4), %edx
|
||
+ je L(main_loop_header)
|
||
+# ifdef USE_AS_STRNCMP
|
||
+ cmpq %r11, %rdx
|
||
+ jae L(zero)
|
||
+# endif
|
||
+# ifdef USE_AS_WCSCMP
|
||
+ movl (%rdi, %rdx), %eax
|
||
+ movl (%rsi, %rdx), %ecx
|
||
+# else
|
||
+ movzbl (%rdi, %rdx), %eax
|
||
+ movzbl (%rsi, %rdx), %ecx
|
||
+# endif
|
||
+ /* Check null char. */
|
||
+ testl %eax, %eax
|
||
+ jne L(cross_page_loop)
|
||
+ /* Since %eax == 0, subtract is OK for both SIGNED and UNSIGNED
|
||
+ comparisons. */
|
||
+ subl %ecx, %eax
|
||
+# ifndef USE_AS_WCSCMP
|
||
+L(different):
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+# ifdef USE_AS_WCSCMP
|
||
+ .p2align 4
|
||
+L(different):
|
||
+ /* Use movl to avoid modifying EFLAGS. */
|
||
+ movl $0, %eax
|
||
+ setl %al
|
||
+ negl %eax
|
||
+ orl $1, %eax
|
||
+ ret
|
||
+# endif
|
||
+
|
||
+# ifdef USE_AS_STRNCMP
|
||
+ .p2align 4
|
||
+L(zero):
|
||
+ xorl %eax, %eax
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(char0):
|
||
+# ifdef USE_AS_WCSCMP
|
||
+ xorl %eax, %eax
|
||
+ movl (%rdi), %ecx
|
||
+ cmpl (%rsi), %ecx
|
||
+ jne L(wcscmp_return)
|
||
+# else
|
||
+ movzbl (%rsi), %ecx
|
||
+ movzbl (%rdi), %eax
|
||
+ subl %ecx, %eax
|
||
+# endif
|
||
+ ret
|
||
+# endif
|
||
+
|
||
+ .p2align 4
|
||
+L(last_vector):
|
||
+ addq %rdx, %rdi
|
||
+ addq %rdx, %rsi
|
||
+# ifdef USE_AS_STRNCMP
|
||
+ subq %rdx, %r11
|
||
+# endif
|
||
+ tzcntl %ecx, %edx
|
||
+# ifdef USE_AS_WCSCMP
|
||
+ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
|
||
+ sall $2, %edx
|
||
+# endif
|
||
+# ifdef USE_AS_STRNCMP
|
||
+ cmpq %r11, %rdx
|
||
+ jae L(zero)
|
||
+# endif
|
||
+# ifdef USE_AS_WCSCMP
|
||
+ xorl %eax, %eax
|
||
+ movl (%rdi, %rdx), %ecx
|
||
+ cmpl (%rsi, %rdx), %ecx
|
||
+ jne L(wcscmp_return)
|
||
+# else
|
||
+ movzbl (%rdi, %rdx), %eax
|
||
+ movzbl (%rsi, %rdx), %edx
|
||
+ subl %edx, %eax
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+ /* Comparing on page boundary region requires special treatment:
|
||
+ It must done one vector at the time, starting with the wider
|
||
+ ymm vector if possible, if not, with xmm. If fetching 16 bytes
|
||
+ (xmm) still passes the boundary, byte comparison must be done.
|
||
+ */
|
||
+ .p2align 4
|
||
+L(cross_page):
|
||
+ /* Try one ymm vector at a time. */
|
||
+ cmpl $(PAGE_SIZE - VEC_SIZE), %eax
|
||
+ jg L(cross_page_1_vector)
|
||
+L(loop_1_vector):
|
||
+ VMOVU (%rdi, %rdx), %YMM0
|
||
+ VMOVU (%rsi, %rdx), %YMM1
|
||
+
|
||
+ /* Each bit in K0 represents a mismatch in YMM0 and YMM1. */
|
||
+ VPCMP $4, %YMM0, %YMM1, %k0
|
||
+ VPCMP $0, %YMMZERO, %YMM0, %k1
|
||
+ VPCMP $0, %YMMZERO, %YMM1, %k2
|
||
+ /* Each bit in K1 represents a NULL in YMM0 or YMM1. */
|
||
+ kord %k1, %k2, %k1
|
||
+ /* Each bit in K1 represents a NULL or a mismatch. */
|
||
+ kord %k0, %k1, %k1
|
||
+ kmovd %k1, %ecx
|
||
+ testl %ecx, %ecx
|
||
+ jne L(last_vector)
|
||
+
|
||
+ addl $VEC_SIZE, %edx
|
||
+
|
||
+ addl $VEC_SIZE, %eax
|
||
+# ifdef USE_AS_STRNCMP
|
||
+ /* Return 0 if the current offset (%rdx) >= the maximum offset
|
||
+ (%r11). */
|
||
+ cmpq %r11, %rdx
|
||
+ jae L(zero)
|
||
+# endif
|
||
+ cmpl $(PAGE_SIZE - VEC_SIZE), %eax
|
||
+ jle L(loop_1_vector)
|
||
+L(cross_page_1_vector):
|
||
+ /* Less than 32 bytes to check, try one xmm vector. */
|
||
+ cmpl $(PAGE_SIZE - 16), %eax
|
||
+ jg L(cross_page_1_xmm)
|
||
+ VMOVU (%rdi, %rdx), %XMM0
|
||
+ VMOVU (%rsi, %rdx), %XMM1
|
||
+
|
||
+ /* Each bit in K0 represents a mismatch in XMM0 and XMM1. */
|
||
+ VPCMP $4, %XMM0, %XMM1, %k0
|
||
+ VPCMP $0, %XMMZERO, %XMM0, %k1
|
||
+ VPCMP $0, %XMMZERO, %XMM1, %k2
|
||
+ /* Each bit in K1 represents a NULL in XMM0 or XMM1. */
|
||
+ korw %k1, %k2, %k1
|
||
+ /* Each bit in K1 represents a NULL or a mismatch. */
|
||
+ korw %k0, %k1, %k1
|
||
+ kmovw %k1, %ecx
|
||
+ testl %ecx, %ecx
|
||
+ jne L(last_vector)
|
||
+
|
||
+ addl $16, %edx
|
||
+# ifndef USE_AS_WCSCMP
|
||
+ addl $16, %eax
|
||
+# endif
|
||
+# ifdef USE_AS_STRNCMP
|
||
+ /* Return 0 if the current offset (%rdx) >= the maximum offset
|
||
+ (%r11). */
|
||
+ cmpq %r11, %rdx
|
||
+ jae L(zero)
|
||
+# endif
|
||
+
|
||
+L(cross_page_1_xmm):
|
||
+# ifndef USE_AS_WCSCMP
|
||
+ /* Less than 16 bytes to check, try 8 byte vector. NB: No need
|
||
+ for wcscmp nor wcsncmp since wide char is 4 bytes. */
|
||
+ cmpl $(PAGE_SIZE - 8), %eax
|
||
+ jg L(cross_page_8bytes)
|
||
+ vmovq (%rdi, %rdx), %XMM0
|
||
+ vmovq (%rsi, %rdx), %XMM1
|
||
+
|
||
+ /* Each bit in K0 represents a mismatch in XMM0 and XMM1. */
|
||
+ VPCMP $4, %XMM0, %XMM1, %k0
|
||
+ VPCMP $0, %XMMZERO, %XMM0, %k1
|
||
+ VPCMP $0, %XMMZERO, %XMM1, %k2
|
||
+ /* Each bit in K1 represents a NULL in XMM0 or XMM1. */
|
||
+ kord %k1, %k2, %k1
|
||
+ /* Each bit in K1 represents a NULL or a mismatch. */
|
||
+ kord %k0, %k1, %k1
|
||
+ kmovd %k1, %ecx
|
||
+
|
||
+# ifdef USE_AS_WCSCMP
|
||
+ /* Only last 2 bits are valid. */
|
||
+ andl $0x3, %ecx
|
||
+# else
|
||
+ /* Only last 8 bits are valid. */
|
||
+ andl $0xff, %ecx
|
||
+# endif
|
||
+
|
||
+ testl %ecx, %ecx
|
||
+ jne L(last_vector)
|
||
+
|
||
+ addl $8, %edx
|
||
+ addl $8, %eax
|
||
+# ifdef USE_AS_STRNCMP
|
||
+ /* Return 0 if the current offset (%rdx) >= the maximum offset
|
||
+ (%r11). */
|
||
+ cmpq %r11, %rdx
|
||
+ jae L(zero)
|
||
+# endif
|
||
+
|
||
+L(cross_page_8bytes):
|
||
+ /* Less than 8 bytes to check, try 4 byte vector. */
|
||
+ cmpl $(PAGE_SIZE - 4), %eax
|
||
+ jg L(cross_page_4bytes)
|
||
+ vmovd (%rdi, %rdx), %XMM0
|
||
+ vmovd (%rsi, %rdx), %XMM1
|
||
+
|
||
+ /* Each bit in K0 represents a mismatch in XMM0 and XMM1. */
|
||
+ VPCMP $4, %XMM0, %XMM1, %k0
|
||
+ VPCMP $0, %XMMZERO, %XMM0, %k1
|
||
+ VPCMP $0, %XMMZERO, %XMM1, %k2
|
||
+ /* Each bit in K1 represents a NULL in XMM0 or XMM1. */
|
||
+ kord %k1, %k2, %k1
|
||
+ /* Each bit in K1 represents a NULL or a mismatch. */
|
||
+ kord %k0, %k1, %k1
|
||
+ kmovd %k1, %ecx
|
||
+
|
||
+# ifdef USE_AS_WCSCMP
|
||
+ /* Only the last bit is valid. */
|
||
+ andl $0x1, %ecx
|
||
+# else
|
||
+ /* Only last 4 bits are valid. */
|
||
+ andl $0xf, %ecx
|
||
+# endif
|
||
+
|
||
+ testl %ecx, %ecx
|
||
+ jne L(last_vector)
|
||
+
|
||
+ addl $4, %edx
|
||
+# ifdef USE_AS_STRNCMP
|
||
+ /* Return 0 if the current offset (%rdx) >= the maximum offset
|
||
+ (%r11). */
|
||
+ cmpq %r11, %rdx
|
||
+ jae L(zero)
|
||
+# endif
|
||
+
|
||
+L(cross_page_4bytes):
|
||
+# endif
|
||
+ /* Less than 4 bytes to check, try one byte/dword at a time. */
|
||
+# ifdef USE_AS_STRNCMP
|
||
+ cmpq %r11, %rdx
|
||
+ jae L(zero)
|
||
+# endif
|
||
+# ifdef USE_AS_WCSCMP
|
||
+ movl (%rdi, %rdx), %eax
|
||
+ movl (%rsi, %rdx), %ecx
|
||
+# else
|
||
+ movzbl (%rdi, %rdx), %eax
|
||
+ movzbl (%rsi, %rdx), %ecx
|
||
+# endif
|
||
+ testl %eax, %eax
|
||
+ jne L(cross_page_loop)
|
||
+ subl %ecx, %eax
|
||
+ ret
|
||
+END (STRCMP)
|
||
+#endif
|
||
diff --git a/sysdeps/x86_64/multiarch/strcmp.c b/sysdeps/x86_64/multiarch/strcmp.c
|
||
index 16ae72a4c8..df4ba875d9 100644
|
||
--- a/sysdeps/x86_64/multiarch/strcmp.c
|
||
+++ b/sysdeps/x86_64/multiarch/strcmp.c
|
||
@@ -30,16 +30,29 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
|
||
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) attribute_hidden;
|
||
extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
|
||
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
|
||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
|
||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
|
||
|
||
static inline void *
|
||
IFUNC_SELECTOR (void)
|
||
{
|
||
const struct cpu_features* cpu_features = __get_cpu_features ();
|
||
|
||
- if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)
|
||
- && CPU_FEATURE_USABLE_P (cpu_features, AVX2)
|
||
+ if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
|
||
&& CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
|
||
- return OPTIMIZE (avx2);
|
||
+ {
|
||
+ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
|
||
+ && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
|
||
+ && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
|
||
+ && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_AVX2_STRCMP))
|
||
+ return OPTIMIZE (evex);
|
||
+
|
||
+ if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
|
||
+ return OPTIMIZE (avx2_rtm);
|
||
+
|
||
+ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
|
||
+ return OPTIMIZE (avx2);
|
||
+ }
|
||
|
||
if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
|
||
return OPTIMIZE (sse2_unaligned);
|
||
diff --git a/sysdeps/x86_64/multiarch/strcpy-avx2-rtm.S b/sysdeps/x86_64/multiarch/strcpy-avx2-rtm.S
|
||
new file mode 100644
|
||
index 0000000000..c2c581ecf7
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/strcpy-avx2-rtm.S
|
||
@@ -0,0 +1,12 @@
|
||
+#ifndef STRCPY
|
||
+# define STRCPY __strcpy_avx2_rtm
|
||
+#endif
|
||
+
|
||
+#define ZERO_UPPER_VEC_REGISTERS_RETURN \
|
||
+ ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
|
||
+
|
||
+#define VZEROUPPER_RETURN jmp L(return_vzeroupper)
|
||
+
|
||
+#define SECTION(p) p##.avx.rtm
|
||
+
|
||
+#include "strcpy-avx2.S"
|
||
diff --git a/sysdeps/x86_64/multiarch/strcpy-avx2.S b/sysdeps/x86_64/multiarch/strcpy-avx2.S
|
||
index 3f2f9e8170..1ce17253ab 100644
|
||
--- a/sysdeps/x86_64/multiarch/strcpy-avx2.S
|
||
+++ b/sysdeps/x86_64/multiarch/strcpy-avx2.S
|
||
@@ -37,6 +37,10 @@
|
||
# define VZEROUPPER vzeroupper
|
||
# endif
|
||
|
||
+# ifndef SECTION
|
||
+# define SECTION(p) p##.avx
|
||
+# endif
|
||
+
|
||
/* zero register */
|
||
#define xmmZ xmm0
|
||
#define ymmZ ymm0
|
||
@@ -46,7 +50,7 @@
|
||
|
||
# ifndef USE_AS_STRCAT
|
||
|
||
- .section .text.avx,"ax",@progbits
|
||
+ .section SECTION(.text),"ax",@progbits
|
||
ENTRY (STRCPY)
|
||
# ifdef USE_AS_STRNCPY
|
||
mov %RDX_LP, %R8_LP
|
||
@@ -369,8 +373,8 @@ L(CopyVecSizeExit):
|
||
lea 1(%rdi), %rdi
|
||
jnz L(StrncpyFillTailWithZero)
|
||
# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+L(return_vzeroupper):
|
||
+ ZERO_UPPER_VEC_REGISTERS_RETURN
|
||
|
||
.p2align 4
|
||
L(CopyTwoVecSize1):
|
||
@@ -553,8 +557,7 @@ L(Exit1):
|
||
lea 2(%rdi), %rdi
|
||
jnz L(StrncpyFillTailWithZero)
|
||
# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
L(Exit2):
|
||
@@ -569,8 +572,7 @@ L(Exit2):
|
||
lea 3(%rdi), %rdi
|
||
jnz L(StrncpyFillTailWithZero)
|
||
# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
L(Exit3):
|
||
@@ -584,8 +586,7 @@ L(Exit3):
|
||
lea 4(%rdi), %rdi
|
||
jnz L(StrncpyFillTailWithZero)
|
||
# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
L(Exit4_7):
|
||
@@ -602,8 +603,7 @@ L(Exit4_7):
|
||
lea 1(%rdi, %rdx), %rdi
|
||
jnz L(StrncpyFillTailWithZero)
|
||
# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
L(Exit8_15):
|
||
@@ -620,8 +620,7 @@ L(Exit8_15):
|
||
lea 1(%rdi, %rdx), %rdi
|
||
jnz L(StrncpyFillTailWithZero)
|
||
# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
L(Exit16_31):
|
||
@@ -638,8 +637,7 @@ L(Exit16_31):
|
||
lea 1(%rdi, %rdx), %rdi
|
||
jnz L(StrncpyFillTailWithZero)
|
||
# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
L(Exit32_63):
|
||
@@ -656,8 +654,7 @@ L(Exit32_63):
|
||
lea 1(%rdi, %rdx), %rdi
|
||
jnz L(StrncpyFillTailWithZero)
|
||
# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
# ifdef USE_AS_STRNCPY
|
||
|
||
@@ -671,8 +668,7 @@ L(StrncpyExit1):
|
||
# ifdef USE_AS_STRCAT
|
||
movb $0, 1(%rdi)
|
||
# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
L(StrncpyExit2):
|
||
@@ -684,8 +680,7 @@ L(StrncpyExit2):
|
||
# ifdef USE_AS_STRCAT
|
||
movb $0, 2(%rdi)
|
||
# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
L(StrncpyExit3_4):
|
||
@@ -699,8 +694,7 @@ L(StrncpyExit3_4):
|
||
# ifdef USE_AS_STRCAT
|
||
movb $0, (%rdi, %r8)
|
||
# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
L(StrncpyExit5_8):
|
||
@@ -714,8 +708,7 @@ L(StrncpyExit5_8):
|
||
# ifdef USE_AS_STRCAT
|
||
movb $0, (%rdi, %r8)
|
||
# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
L(StrncpyExit9_16):
|
||
@@ -729,8 +722,7 @@ L(StrncpyExit9_16):
|
||
# ifdef USE_AS_STRCAT
|
||
movb $0, (%rdi, %r8)
|
||
# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
L(StrncpyExit17_32):
|
||
@@ -744,8 +736,7 @@ L(StrncpyExit17_32):
|
||
# ifdef USE_AS_STRCAT
|
||
movb $0, (%rdi, %r8)
|
||
# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
L(StrncpyExit33_64):
|
||
@@ -760,8 +751,7 @@ L(StrncpyExit33_64):
|
||
# ifdef USE_AS_STRCAT
|
||
movb $0, (%rdi, %r8)
|
||
# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
L(StrncpyExit65):
|
||
@@ -778,50 +768,43 @@ L(StrncpyExit65):
|
||
# ifdef USE_AS_STRCAT
|
||
movb $0, 65(%rdi)
|
||
# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
# ifndef USE_AS_STRCAT
|
||
|
||
.p2align 4
|
||
L(Fill1):
|
||
mov %dl, (%rdi)
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
L(Fill2):
|
||
mov %dx, (%rdi)
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
L(Fill3_4):
|
||
mov %dx, (%rdi)
|
||
mov %dx, -2(%rdi, %r8)
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
L(Fill5_8):
|
||
mov %edx, (%rdi)
|
||
mov %edx, -4(%rdi, %r8)
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
L(Fill9_16):
|
||
mov %rdx, (%rdi)
|
||
mov %rdx, -8(%rdi, %r8)
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
L(Fill17_32):
|
||
vmovdqu %xmmZ, (%rdi)
|
||
vmovdqu %xmmZ, -16(%rdi, %r8)
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
L(CopyVecSizeUnalignedVec2):
|
||
@@ -898,8 +881,7 @@ L(Fill):
|
||
cmp $1, %r8d
|
||
ja L(Fill2)
|
||
je L(Fill1)
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
/* end of ifndef USE_AS_STRCAT */
|
||
# endif
|
||
@@ -929,8 +911,7 @@ L(UnalignedFourVecSizeLeaveCase3):
|
||
# ifdef USE_AS_STRCAT
|
||
movb $0, (VEC_SIZE * 4)(%rdi)
|
||
# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
L(UnalignedFourVecSizeLeaveCase2):
|
||
@@ -1001,16 +982,14 @@ L(StrncpyExit):
|
||
# ifdef USE_AS_STRCAT
|
||
movb $0, (%rdi)
|
||
# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
L(ExitZero):
|
||
# ifndef USE_AS_STRCAT
|
||
mov %rdi, %rax
|
||
# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
# endif
|
||
|
||
diff --git a/sysdeps/x86_64/multiarch/strcpy-evex.S b/sysdeps/x86_64/multiarch/strcpy-evex.S
|
||
new file mode 100644
|
||
index 0000000000..a343a1a692
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/strcpy-evex.S
|
||
@@ -0,0 +1,1003 @@
|
||
+/* strcpy with 256-bit EVEX instructions.
|
||
+ Copyright (C) 2021 Free Software Foundation, Inc.
|
||
+ This file is part of the GNU C Library.
|
||
+
|
||
+ The GNU C Library is free software; you can redistribute it and/or
|
||
+ modify it under the terms of the GNU Lesser General Public
|
||
+ License as published by the Free Software Foundation; either
|
||
+ version 2.1 of the License, or (at your option) any later version.
|
||
+
|
||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
+ Lesser General Public License for more details.
|
||
+
|
||
+ You should have received a copy of the GNU Lesser General Public
|
||
+ License along with the GNU C Library; if not, see
|
||
+ <https://www.gnu.org/licenses/>. */
|
||
+
|
||
+#if IS_IN (libc)
|
||
+
|
||
+# ifndef USE_AS_STRCAT
|
||
+# include <sysdep.h>
|
||
+
|
||
+# ifndef STRCPY
|
||
+# define STRCPY __strcpy_evex
|
||
+# endif
|
||
+
|
||
+# endif
|
||
+
|
||
+# define VMOVU vmovdqu64
|
||
+# define VMOVA vmovdqa64
|
||
+
|
||
+/* Number of bytes in a vector register */
|
||
+# ifndef VEC_SIZE
|
||
+# define VEC_SIZE 32
|
||
+# endif
|
||
+
|
||
+# define XMM2 xmm18
|
||
+# define XMM3 xmm19
|
||
+
|
||
+# define YMM2 ymm18
|
||
+# define YMM3 ymm19
|
||
+# define YMM4 ymm20
|
||
+# define YMM5 ymm21
|
||
+# define YMM6 ymm22
|
||
+# define YMM7 ymm23
|
||
+
|
||
+# ifndef USE_AS_STRCAT
|
||
+
|
||
+/* zero register */
|
||
+# define XMMZERO xmm16
|
||
+# define YMMZERO ymm16
|
||
+# define YMM1 ymm17
|
||
+
|
||
+ .section .text.evex,"ax",@progbits
|
||
+ENTRY (STRCPY)
|
||
+# ifdef USE_AS_STRNCPY
|
||
+ mov %RDX_LP, %R8_LP
|
||
+ test %R8_LP, %R8_LP
|
||
+ jz L(ExitZero)
|
||
+# endif
|
||
+ mov %rsi, %rcx
|
||
+# ifndef USE_AS_STPCPY
|
||
+ mov %rdi, %rax /* save result */
|
||
+# endif
|
||
+
|
||
+ vpxorq %XMMZERO, %XMMZERO, %XMMZERO
|
||
+# endif
|
||
+
|
||
+ and $((VEC_SIZE * 4) - 1), %ecx
|
||
+ cmp $(VEC_SIZE * 2), %ecx
|
||
+ jbe L(SourceStringAlignmentLessTwoVecSize)
|
||
+
|
||
+ and $-VEC_SIZE, %rsi
|
||
+ and $(VEC_SIZE - 1), %ecx
|
||
+
|
||
+ vpcmpb $0, (%rsi), %YMMZERO, %k0
|
||
+ kmovd %k0, %edx
|
||
+ shr %cl, %rdx
|
||
+
|
||
+# ifdef USE_AS_STRNCPY
|
||
+# if defined USE_AS_STPCPY || defined USE_AS_STRCAT
|
||
+ mov $VEC_SIZE, %r10
|
||
+ sub %rcx, %r10
|
||
+ cmp %r10, %r8
|
||
+# else
|
||
+ mov $(VEC_SIZE + 1), %r10
|
||
+ sub %rcx, %r10
|
||
+ cmp %r10, %r8
|
||
+# endif
|
||
+ jbe L(CopyVecSizeTailCase2OrCase3)
|
||
+# endif
|
||
+ test %edx, %edx
|
||
+ jnz L(CopyVecSizeTail)
|
||
+
|
||
+ vpcmpb $0, VEC_SIZE(%rsi), %YMMZERO, %k1
|
||
+ kmovd %k1, %edx
|
||
+
|
||
+# ifdef USE_AS_STRNCPY
|
||
+ add $VEC_SIZE, %r10
|
||
+ cmp %r10, %r8
|
||
+ jbe L(CopyTwoVecSizeCase2OrCase3)
|
||
+# endif
|
||
+ test %edx, %edx
|
||
+ jnz L(CopyTwoVecSize)
|
||
+
|
||
+ VMOVU (%rsi, %rcx), %YMM2 /* copy VEC_SIZE bytes */
|
||
+ VMOVU %YMM2, (%rdi)
|
||
+
|
||
+/* If source address alignment != destination address alignment */
|
||
+ .p2align 4
|
||
+L(UnalignVecSizeBoth):
|
||
+ sub %rcx, %rdi
|
||
+# ifdef USE_AS_STRNCPY
|
||
+ add %rcx, %r8
|
||
+ sbb %rcx, %rcx
|
||
+ or %rcx, %r8
|
||
+# endif
|
||
+ mov $VEC_SIZE, %rcx
|
||
+ VMOVA (%rsi, %rcx), %YMM2
|
||
+ VMOVU %YMM2, (%rdi, %rcx)
|
||
+ VMOVA VEC_SIZE(%rsi, %rcx), %YMM2
|
||
+ vpcmpb $0, %YMM2, %YMMZERO, %k0
|
||
+ kmovd %k0, %edx
|
||
+ add $VEC_SIZE, %rcx
|
||
+# ifdef USE_AS_STRNCPY
|
||
+ sub $(VEC_SIZE * 3), %r8
|
||
+ jbe L(CopyVecSizeCase2OrCase3)
|
||
+# endif
|
||
+ test %edx, %edx
|
||
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
|
||
+ jnz L(CopyVecSizeUnalignedVec2)
|
||
+# else
|
||
+ jnz L(CopyVecSize)
|
||
+# endif
|
||
+
|
||
+ VMOVU %YMM2, (%rdi, %rcx)
|
||
+ VMOVA VEC_SIZE(%rsi, %rcx), %YMM3
|
||
+ vpcmpb $0, %YMM3, %YMMZERO, %k0
|
||
+ kmovd %k0, %edx
|
||
+ add $VEC_SIZE, %rcx
|
||
+# ifdef USE_AS_STRNCPY
|
||
+ sub $VEC_SIZE, %r8
|
||
+ jbe L(CopyVecSizeCase2OrCase3)
|
||
+# endif
|
||
+ test %edx, %edx
|
||
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
|
||
+ jnz L(CopyVecSizeUnalignedVec3)
|
||
+# else
|
||
+ jnz L(CopyVecSize)
|
||
+# endif
|
||
+
|
||
+ VMOVU %YMM3, (%rdi, %rcx)
|
||
+ VMOVA VEC_SIZE(%rsi, %rcx), %YMM4
|
||
+ vpcmpb $0, %YMM4, %YMMZERO, %k0
|
||
+ kmovd %k0, %edx
|
||
+ add $VEC_SIZE, %rcx
|
||
+# ifdef USE_AS_STRNCPY
|
||
+ sub $VEC_SIZE, %r8
|
||
+ jbe L(CopyVecSizeCase2OrCase3)
|
||
+# endif
|
||
+ test %edx, %edx
|
||
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
|
||
+ jnz L(CopyVecSizeUnalignedVec4)
|
||
+# else
|
||
+ jnz L(CopyVecSize)
|
||
+# endif
|
||
+
|
||
+ VMOVU %YMM4, (%rdi, %rcx)
|
||
+ VMOVA VEC_SIZE(%rsi, %rcx), %YMM2
|
||
+ vpcmpb $0, %YMM2, %YMMZERO, %k0
|
||
+ kmovd %k0, %edx
|
||
+ add $VEC_SIZE, %rcx
|
||
+# ifdef USE_AS_STRNCPY
|
||
+ sub $VEC_SIZE, %r8
|
||
+ jbe L(CopyVecSizeCase2OrCase3)
|
||
+# endif
|
||
+ test %edx, %edx
|
||
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
|
||
+ jnz L(CopyVecSizeUnalignedVec2)
|
||
+# else
|
||
+ jnz L(CopyVecSize)
|
||
+# endif
|
||
+
|
||
+ VMOVU %YMM2, (%rdi, %rcx)
|
||
+ VMOVA VEC_SIZE(%rsi, %rcx), %YMM2
|
||
+ vpcmpb $0, %YMM2, %YMMZERO, %k0
|
||
+ kmovd %k0, %edx
|
||
+ add $VEC_SIZE, %rcx
|
||
+# ifdef USE_AS_STRNCPY
|
||
+ sub $VEC_SIZE, %r8
|
||
+ jbe L(CopyVecSizeCase2OrCase3)
|
||
+# endif
|
||
+ test %edx, %edx
|
||
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
|
||
+ jnz L(CopyVecSizeUnalignedVec2)
|
||
+# else
|
||
+ jnz L(CopyVecSize)
|
||
+# endif
|
||
+
|
||
+ VMOVA VEC_SIZE(%rsi, %rcx), %YMM3
|
||
+ VMOVU %YMM2, (%rdi, %rcx)
|
||
+ vpcmpb $0, %YMM3, %YMMZERO, %k0
|
||
+ kmovd %k0, %edx
|
||
+ add $VEC_SIZE, %rcx
|
||
+# ifdef USE_AS_STRNCPY
|
||
+ sub $VEC_SIZE, %r8
|
||
+ jbe L(CopyVecSizeCase2OrCase3)
|
||
+# endif
|
||
+ test %edx, %edx
|
||
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
|
||
+ jnz L(CopyVecSizeUnalignedVec3)
|
||
+# else
|
||
+ jnz L(CopyVecSize)
|
||
+# endif
|
||
+
|
||
+ VMOVU %YMM3, (%rdi, %rcx)
|
||
+ mov %rsi, %rdx
|
||
+ lea VEC_SIZE(%rsi, %rcx), %rsi
|
||
+ and $-(VEC_SIZE * 4), %rsi
|
||
+ sub %rsi, %rdx
|
||
+ sub %rdx, %rdi
|
||
+# ifdef USE_AS_STRNCPY
|
||
+ lea (VEC_SIZE * 8)(%r8, %rdx), %r8
|
||
+# endif
|
||
+L(UnalignedFourVecSizeLoop):
|
||
+ VMOVA (%rsi), %YMM4
|
||
+ VMOVA VEC_SIZE(%rsi), %YMM5
|
||
+ VMOVA (VEC_SIZE * 2)(%rsi), %YMM6
|
||
+ VMOVA (VEC_SIZE * 3)(%rsi), %YMM7
|
||
+ vpminub %YMM5, %YMM4, %YMM2
|
||
+ vpminub %YMM7, %YMM6, %YMM3
|
||
+ vpminub %YMM2, %YMM3, %YMM2
|
||
+ /* If K7 != 0, there is a null byte. */
|
||
+ vpcmpb $0, %YMM2, %YMMZERO, %k7
|
||
+ kmovd %k7, %edx
|
||
+# ifdef USE_AS_STRNCPY
|
||
+ sub $(VEC_SIZE * 4), %r8
|
||
+ jbe L(UnalignedLeaveCase2OrCase3)
|
||
+# endif
|
||
+ test %edx, %edx
|
||
+ jnz L(UnalignedFourVecSizeLeave)
|
||
+
|
||
+L(UnalignedFourVecSizeLoop_start):
|
||
+ add $(VEC_SIZE * 4), %rdi
|
||
+ add $(VEC_SIZE * 4), %rsi
|
||
+ VMOVU %YMM4, -(VEC_SIZE * 4)(%rdi)
|
||
+ VMOVA (%rsi), %YMM4
|
||
+ VMOVU %YMM5, -(VEC_SIZE * 3)(%rdi)
|
||
+ VMOVA VEC_SIZE(%rsi), %YMM5
|
||
+ vpminub %YMM5, %YMM4, %YMM2
|
||
+ VMOVU %YMM6, -(VEC_SIZE * 2)(%rdi)
|
||
+ VMOVA (VEC_SIZE * 2)(%rsi), %YMM6
|
||
+ VMOVU %YMM7, -VEC_SIZE(%rdi)
|
||
+ VMOVA (VEC_SIZE * 3)(%rsi), %YMM7
|
||
+ vpminub %YMM7, %YMM6, %YMM3
|
||
+ vpminub %YMM2, %YMM3, %YMM2
|
||
+ /* If K7 != 0, there is a null byte. */
|
||
+ vpcmpb $0, %YMM2, %YMMZERO, %k7
|
||
+ kmovd %k7, %edx
|
||
+# ifdef USE_AS_STRNCPY
|
||
+ sub $(VEC_SIZE * 4), %r8
|
||
+ jbe L(UnalignedLeaveCase2OrCase3)
|
||
+# endif
|
||
+ test %edx, %edx
|
||
+ jz L(UnalignedFourVecSizeLoop_start)
|
||
+
|
||
+L(UnalignedFourVecSizeLeave):
|
||
+ vpcmpb $0, %YMM4, %YMMZERO, %k1
|
||
+ kmovd %k1, %edx
|
||
+ test %edx, %edx
|
||
+ jnz L(CopyVecSizeUnaligned_0)
|
||
+
|
||
+ vpcmpb $0, %YMM5, %YMMZERO, %k2
|
||
+ kmovd %k2, %ecx
|
||
+ test %ecx, %ecx
|
||
+ jnz L(CopyVecSizeUnaligned_16)
|
||
+
|
||
+ vpcmpb $0, %YMM6, %YMMZERO, %k3
|
||
+ kmovd %k3, %edx
|
||
+ test %edx, %edx
|
||
+ jnz L(CopyVecSizeUnaligned_32)
|
||
+
|
||
+ vpcmpb $0, %YMM7, %YMMZERO, %k4
|
||
+ kmovd %k4, %ecx
|
||
+ bsf %ecx, %edx
|
||
+ VMOVU %YMM4, (%rdi)
|
||
+ VMOVU %YMM5, VEC_SIZE(%rdi)
|
||
+ VMOVU %YMM6, (VEC_SIZE * 2)(%rdi)
|
||
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
|
||
+# ifdef USE_AS_STPCPY
|
||
+ lea (VEC_SIZE * 3)(%rdi, %rdx), %rax
|
||
+# endif
|
||
+ VMOVU %YMM7, (VEC_SIZE * 3)(%rdi)
|
||
+ add $(VEC_SIZE - 1), %r8
|
||
+ sub %rdx, %r8
|
||
+ lea ((VEC_SIZE * 3) + 1)(%rdi, %rdx), %rdi
|
||
+ jmp L(StrncpyFillTailWithZero)
|
||
+# else
|
||
+ add $(VEC_SIZE * 3), %rsi
|
||
+ add $(VEC_SIZE * 3), %rdi
|
||
+ jmp L(CopyVecSizeExit)
|
||
+# endif
|
||
+
|
||
+/* If source address alignment == destination address alignment */
|
||
+
|
||
+L(SourceStringAlignmentLessTwoVecSize):
|
||
+ VMOVU (%rsi), %YMM3
|
||
+ VMOVU VEC_SIZE(%rsi), %YMM2
|
||
+ vpcmpb $0, %YMM3, %YMMZERO, %k0
|
||
+ kmovd %k0, %edx
|
||
+
|
||
+# ifdef USE_AS_STRNCPY
|
||
+# if defined USE_AS_STPCPY || defined USE_AS_STRCAT
|
||
+ cmp $VEC_SIZE, %r8
|
||
+# else
|
||
+ cmp $(VEC_SIZE + 1), %r8
|
||
+# endif
|
||
+ jbe L(CopyVecSizeTail1Case2OrCase3)
|
||
+# endif
|
||
+ test %edx, %edx
|
||
+ jnz L(CopyVecSizeTail1)
|
||
+
|
||
+ VMOVU %YMM3, (%rdi)
|
||
+ vpcmpb $0, %YMM2, %YMMZERO, %k0
|
||
+ kmovd %k0, %edx
|
||
+
|
||
+# ifdef USE_AS_STRNCPY
|
||
+# if defined USE_AS_STPCPY || defined USE_AS_STRCAT
|
||
+ cmp $(VEC_SIZE * 2), %r8
|
||
+# else
|
||
+ cmp $((VEC_SIZE * 2) + 1), %r8
|
||
+# endif
|
||
+ jbe L(CopyTwoVecSize1Case2OrCase3)
|
||
+# endif
|
||
+ test %edx, %edx
|
||
+ jnz L(CopyTwoVecSize1)
|
||
+
|
||
+ and $-VEC_SIZE, %rsi
|
||
+ and $(VEC_SIZE - 1), %ecx
|
||
+ jmp L(UnalignVecSizeBoth)
|
||
+
|
||
+/*------End of main part with loops---------------------*/
|
||
+
|
||
+/* Case1 */
|
||
+
|
||
+# if (!defined USE_AS_STRNCPY) || (defined USE_AS_STRCAT)
|
||
+ .p2align 4
|
||
+L(CopyVecSize):
|
||
+ add %rcx, %rdi
|
||
+# endif
|
||
+L(CopyVecSizeTail):
|
||
+ add %rcx, %rsi
|
||
+L(CopyVecSizeTail1):
|
||
+ bsf %edx, %edx
|
||
+L(CopyVecSizeExit):
|
||
+ cmp $32, %edx
|
||
+ jae L(Exit32_63)
|
||
+ cmp $16, %edx
|
||
+ jae L(Exit16_31)
|
||
+ cmp $8, %edx
|
||
+ jae L(Exit8_15)
|
||
+ cmp $4, %edx
|
||
+ jae L(Exit4_7)
|
||
+ cmp $3, %edx
|
||
+ je L(Exit3)
|
||
+ cmp $1, %edx
|
||
+ ja L(Exit2)
|
||
+ je L(Exit1)
|
||
+ movb $0, (%rdi)
|
||
+# ifdef USE_AS_STPCPY
|
||
+ lea (%rdi), %rax
|
||
+# endif
|
||
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
|
||
+ sub $1, %r8
|
||
+ lea 1(%rdi), %rdi
|
||
+ jnz L(StrncpyFillTailWithZero)
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(CopyTwoVecSize1):
|
||
+ add $VEC_SIZE, %rsi
|
||
+ add $VEC_SIZE, %rdi
|
||
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
|
||
+ sub $VEC_SIZE, %r8
|
||
+# endif
|
||
+ jmp L(CopyVecSizeTail1)
|
||
+
|
||
+ .p2align 4
|
||
+L(CopyTwoVecSize):
|
||
+ bsf %edx, %edx
|
||
+ add %rcx, %rsi
|
||
+ add $VEC_SIZE, %edx
|
||
+ sub %ecx, %edx
|
||
+ jmp L(CopyVecSizeExit)
|
||
+
|
||
+ .p2align 4
|
||
+L(CopyVecSizeUnaligned_0):
|
||
+ bsf %edx, %edx
|
||
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
|
||
+# ifdef USE_AS_STPCPY
|
||
+ lea (%rdi, %rdx), %rax
|
||
+# endif
|
||
+ VMOVU %YMM4, (%rdi)
|
||
+ add $((VEC_SIZE * 4) - 1), %r8
|
||
+ sub %rdx, %r8
|
||
+ lea 1(%rdi, %rdx), %rdi
|
||
+ jmp L(StrncpyFillTailWithZero)
|
||
+# else
|
||
+ jmp L(CopyVecSizeExit)
|
||
+# endif
|
||
+
|
||
+ .p2align 4
|
||
+L(CopyVecSizeUnaligned_16):
|
||
+ bsf %ecx, %edx
|
||
+ VMOVU %YMM4, (%rdi)
|
||
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
|
||
+# ifdef USE_AS_STPCPY
|
||
+ lea VEC_SIZE(%rdi, %rdx), %rax
|
||
+# endif
|
||
+ VMOVU %YMM5, VEC_SIZE(%rdi)
|
||
+ add $((VEC_SIZE * 3) - 1), %r8
|
||
+ sub %rdx, %r8
|
||
+ lea (VEC_SIZE + 1)(%rdi, %rdx), %rdi
|
||
+ jmp L(StrncpyFillTailWithZero)
|
||
+# else
|
||
+ add $VEC_SIZE, %rsi
|
||
+ add $VEC_SIZE, %rdi
|
||
+ jmp L(CopyVecSizeExit)
|
||
+# endif
|
||
+
|
||
+ .p2align 4
|
||
+L(CopyVecSizeUnaligned_32):
|
||
+ bsf %edx, %edx
|
||
+ VMOVU %YMM4, (%rdi)
|
||
+ VMOVU %YMM5, VEC_SIZE(%rdi)
|
||
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
|
||
+# ifdef USE_AS_STPCPY
|
||
+ lea (VEC_SIZE * 2)(%rdi, %rdx), %rax
|
||
+# endif
|
||
+ VMOVU %YMM6, (VEC_SIZE * 2)(%rdi)
|
||
+ add $((VEC_SIZE * 2) - 1), %r8
|
||
+ sub %rdx, %r8
|
||
+ lea ((VEC_SIZE * 2) + 1)(%rdi, %rdx), %rdi
|
||
+ jmp L(StrncpyFillTailWithZero)
|
||
+# else
|
||
+ add $(VEC_SIZE * 2), %rsi
|
||
+ add $(VEC_SIZE * 2), %rdi
|
||
+ jmp L(CopyVecSizeExit)
|
||
+# endif
|
||
+
|
||
+# ifdef USE_AS_STRNCPY
|
||
+# ifndef USE_AS_STRCAT
|
||
+ .p2align 4
|
||
+L(CopyVecSizeUnalignedVec6):
|
||
+ VMOVU %YMM6, (%rdi, %rcx)
|
||
+ jmp L(CopyVecSizeVecExit)
|
||
+
|
||
+ .p2align 4
|
||
+L(CopyVecSizeUnalignedVec5):
|
||
+ VMOVU %YMM5, (%rdi, %rcx)
|
||
+ jmp L(CopyVecSizeVecExit)
|
||
+
|
||
+ .p2align 4
|
||
+L(CopyVecSizeUnalignedVec4):
|
||
+ VMOVU %YMM4, (%rdi, %rcx)
|
||
+ jmp L(CopyVecSizeVecExit)
|
||
+
|
||
+ .p2align 4
|
||
+L(CopyVecSizeUnalignedVec3):
|
||
+ VMOVU %YMM3, (%rdi, %rcx)
|
||
+ jmp L(CopyVecSizeVecExit)
|
||
+# endif
|
||
+
|
||
+/* Case2 */
|
||
+
|
||
+ .p2align 4
|
||
+L(CopyVecSizeCase2):
|
||
+ add $VEC_SIZE, %r8
|
||
+ add %rcx, %rdi
|
||
+ add %rcx, %rsi
|
||
+ bsf %edx, %edx
|
||
+ cmp %r8d, %edx
|
||
+ jb L(CopyVecSizeExit)
|
||
+ jmp L(StrncpyExit)
|
||
+
|
||
+ .p2align 4
|
||
+L(CopyTwoVecSizeCase2):
|
||
+ add %rcx, %rsi
|
||
+ bsf %edx, %edx
|
||
+ add $VEC_SIZE, %edx
|
||
+ sub %ecx, %edx
|
||
+ cmp %r8d, %edx
|
||
+ jb L(CopyVecSizeExit)
|
||
+ jmp L(StrncpyExit)
|
||
+
|
||
+L(CopyVecSizeTailCase2):
|
||
+ add %rcx, %rsi
|
||
+ bsf %edx, %edx
|
||
+ cmp %r8d, %edx
|
||
+ jb L(CopyVecSizeExit)
|
||
+ jmp L(StrncpyExit)
|
||
+
|
||
+L(CopyVecSizeTail1Case2):
|
||
+ bsf %edx, %edx
|
||
+ cmp %r8d, %edx
|
||
+ jb L(CopyVecSizeExit)
|
||
+ jmp L(StrncpyExit)
|
||
+
|
||
+/* Case2 or Case3, Case3 */
|
||
+
|
||
+ .p2align 4
|
||
+L(CopyVecSizeCase2OrCase3):
|
||
+ test %rdx, %rdx
|
||
+ jnz L(CopyVecSizeCase2)
|
||
+L(CopyVecSizeCase3):
|
||
+ add $VEC_SIZE, %r8
|
||
+ add %rcx, %rdi
|
||
+ add %rcx, %rsi
|
||
+ jmp L(StrncpyExit)
|
||
+
|
||
+ .p2align 4
|
||
+L(CopyTwoVecSizeCase2OrCase3):
|
||
+ test %rdx, %rdx
|
||
+ jnz L(CopyTwoVecSizeCase2)
|
||
+ add %rcx, %rsi
|
||
+ jmp L(StrncpyExit)
|
||
+
|
||
+ .p2align 4
|
||
+L(CopyVecSizeTailCase2OrCase3):
|
||
+ test %rdx, %rdx
|
||
+ jnz L(CopyVecSizeTailCase2)
|
||
+ add %rcx, %rsi
|
||
+ jmp L(StrncpyExit)
|
||
+
|
||
+ .p2align 4
|
||
+L(CopyTwoVecSize1Case2OrCase3):
|
||
+ add $VEC_SIZE, %rdi
|
||
+ add $VEC_SIZE, %rsi
|
||
+ sub $VEC_SIZE, %r8
|
||
+L(CopyVecSizeTail1Case2OrCase3):
|
||
+ test %rdx, %rdx
|
||
+ jnz L(CopyVecSizeTail1Case2)
|
||
+ jmp L(StrncpyExit)
|
||
+# endif
|
||
+
|
||
+/*------------End labels regarding with copying 1-VEC_SIZE bytes--and 1-(VEC_SIZE*2) bytes----*/
|
||
+
|
||
+ .p2align 4
|
||
+L(Exit1):
|
||
+ movzwl (%rsi), %edx
|
||
+ mov %dx, (%rdi)
|
||
+# ifdef USE_AS_STPCPY
|
||
+ lea 1(%rdi), %rax
|
||
+# endif
|
||
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
|
||
+ sub $2, %r8
|
||
+ lea 2(%rdi), %rdi
|
||
+ jnz L(StrncpyFillTailWithZero)
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(Exit2):
|
||
+ movzwl (%rsi), %ecx
|
||
+ mov %cx, (%rdi)
|
||
+ movb $0, 2(%rdi)
|
||
+# ifdef USE_AS_STPCPY
|
||
+ lea 2(%rdi), %rax
|
||
+# endif
|
||
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
|
||
+ sub $3, %r8
|
||
+ lea 3(%rdi), %rdi
|
||
+ jnz L(StrncpyFillTailWithZero)
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(Exit3):
|
||
+ mov (%rsi), %edx
|
||
+ mov %edx, (%rdi)
|
||
+# ifdef USE_AS_STPCPY
|
||
+ lea 3(%rdi), %rax
|
||
+# endif
|
||
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
|
||
+ sub $4, %r8
|
||
+ lea 4(%rdi), %rdi
|
||
+ jnz L(StrncpyFillTailWithZero)
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(Exit4_7):
|
||
+ mov (%rsi), %ecx
|
||
+ mov %ecx, (%rdi)
|
||
+ mov -3(%rsi, %rdx), %ecx
|
||
+ mov %ecx, -3(%rdi, %rdx)
|
||
+# ifdef USE_AS_STPCPY
|
||
+ lea (%rdi, %rdx), %rax
|
||
+# endif
|
||
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
|
||
+ sub %rdx, %r8
|
||
+ sub $1, %r8
|
||
+ lea 1(%rdi, %rdx), %rdi
|
||
+ jnz L(StrncpyFillTailWithZero)
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(Exit8_15):
|
||
+ mov (%rsi), %rcx
|
||
+ mov -7(%rsi, %rdx), %r9
|
||
+ mov %rcx, (%rdi)
|
||
+ mov %r9, -7(%rdi, %rdx)
|
||
+# ifdef USE_AS_STPCPY
|
||
+ lea (%rdi, %rdx), %rax
|
||
+# endif
|
||
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
|
||
+ sub %rdx, %r8
|
||
+ sub $1, %r8
|
||
+ lea 1(%rdi, %rdx), %rdi
|
||
+ jnz L(StrncpyFillTailWithZero)
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(Exit16_31):
|
||
+ VMOVU (%rsi), %XMM2
|
||
+ VMOVU -15(%rsi, %rdx), %XMM3
|
||
+ VMOVU %XMM2, (%rdi)
|
||
+ VMOVU %XMM3, -15(%rdi, %rdx)
|
||
+# ifdef USE_AS_STPCPY
|
||
+ lea (%rdi, %rdx), %rax
|
||
+# endif
|
||
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
|
||
+ sub %rdx, %r8
|
||
+ sub $1, %r8
|
||
+ lea 1(%rdi, %rdx), %rdi
|
||
+ jnz L(StrncpyFillTailWithZero)
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(Exit32_63):
|
||
+ VMOVU (%rsi), %YMM2
|
||
+ VMOVU -31(%rsi, %rdx), %YMM3
|
||
+ VMOVU %YMM2, (%rdi)
|
||
+ VMOVU %YMM3, -31(%rdi, %rdx)
|
||
+# ifdef USE_AS_STPCPY
|
||
+ lea (%rdi, %rdx), %rax
|
||
+# endif
|
||
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
|
||
+ sub %rdx, %r8
|
||
+ sub $1, %r8
|
||
+ lea 1(%rdi, %rdx), %rdi
|
||
+ jnz L(StrncpyFillTailWithZero)
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+# ifdef USE_AS_STRNCPY
|
||
+
|
||
+ .p2align 4
|
||
+L(StrncpyExit1):
|
||
+ movzbl (%rsi), %edx
|
||
+ mov %dl, (%rdi)
|
||
+# ifdef USE_AS_STPCPY
|
||
+ lea 1(%rdi), %rax
|
||
+# endif
|
||
+# ifdef USE_AS_STRCAT
|
||
+ movb $0, 1(%rdi)
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(StrncpyExit2):
|
||
+ movzwl (%rsi), %edx
|
||
+ mov %dx, (%rdi)
|
||
+# ifdef USE_AS_STPCPY
|
||
+ lea 2(%rdi), %rax
|
||
+# endif
|
||
+# ifdef USE_AS_STRCAT
|
||
+ movb $0, 2(%rdi)
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(StrncpyExit3_4):
|
||
+ movzwl (%rsi), %ecx
|
||
+ movzwl -2(%rsi, %r8), %edx
|
||
+ mov %cx, (%rdi)
|
||
+ mov %dx, -2(%rdi, %r8)
|
||
+# ifdef USE_AS_STPCPY
|
||
+ lea (%rdi, %r8), %rax
|
||
+# endif
|
||
+# ifdef USE_AS_STRCAT
|
||
+ movb $0, (%rdi, %r8)
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(StrncpyExit5_8):
|
||
+ mov (%rsi), %ecx
|
||
+ mov -4(%rsi, %r8), %edx
|
||
+ mov %ecx, (%rdi)
|
||
+ mov %edx, -4(%rdi, %r8)
|
||
+# ifdef USE_AS_STPCPY
|
||
+ lea (%rdi, %r8), %rax
|
||
+# endif
|
||
+# ifdef USE_AS_STRCAT
|
||
+ movb $0, (%rdi, %r8)
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(StrncpyExit9_16):
|
||
+ mov (%rsi), %rcx
|
||
+ mov -8(%rsi, %r8), %rdx
|
||
+ mov %rcx, (%rdi)
|
||
+ mov %rdx, -8(%rdi, %r8)
|
||
+# ifdef USE_AS_STPCPY
|
||
+ lea (%rdi, %r8), %rax
|
||
+# endif
|
||
+# ifdef USE_AS_STRCAT
|
||
+ movb $0, (%rdi, %r8)
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(StrncpyExit17_32):
|
||
+ VMOVU (%rsi), %XMM2
|
||
+ VMOVU -16(%rsi, %r8), %XMM3
|
||
+ VMOVU %XMM2, (%rdi)
|
||
+ VMOVU %XMM3, -16(%rdi, %r8)
|
||
+# ifdef USE_AS_STPCPY
|
||
+ lea (%rdi, %r8), %rax
|
||
+# endif
|
||
+# ifdef USE_AS_STRCAT
|
||
+ movb $0, (%rdi, %r8)
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(StrncpyExit33_64):
|
||
+ /* 0/32, 31/16 */
|
||
+ VMOVU (%rsi), %YMM2
|
||
+ VMOVU -VEC_SIZE(%rsi, %r8), %YMM3
|
||
+ VMOVU %YMM2, (%rdi)
|
||
+ VMOVU %YMM3, -VEC_SIZE(%rdi, %r8)
|
||
+# ifdef USE_AS_STPCPY
|
||
+ lea (%rdi, %r8), %rax
|
||
+# endif
|
||
+# ifdef USE_AS_STRCAT
|
||
+ movb $0, (%rdi, %r8)
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(StrncpyExit65):
|
||
+ /* 0/32, 32/32, 64/1 */
|
||
+ VMOVU (%rsi), %YMM2
|
||
+ VMOVU 32(%rsi), %YMM3
|
||
+ mov 64(%rsi), %cl
|
||
+ VMOVU %YMM2, (%rdi)
|
||
+ VMOVU %YMM3, 32(%rdi)
|
||
+ mov %cl, 64(%rdi)
|
||
+# ifdef USE_AS_STPCPY
|
||
+ lea 65(%rdi), %rax
|
||
+# endif
|
||
+# ifdef USE_AS_STRCAT
|
||
+ movb $0, 65(%rdi)
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+# ifndef USE_AS_STRCAT
|
||
+
|
||
+ .p2align 4
|
||
+L(Fill1):
|
||
+ mov %dl, (%rdi)
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(Fill2):
|
||
+ mov %dx, (%rdi)
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(Fill3_4):
|
||
+ mov %dx, (%rdi)
|
||
+ mov %dx, -2(%rdi, %r8)
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(Fill5_8):
|
||
+ mov %edx, (%rdi)
|
||
+ mov %edx, -4(%rdi, %r8)
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(Fill9_16):
|
||
+ mov %rdx, (%rdi)
|
||
+ mov %rdx, -8(%rdi, %r8)
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(Fill17_32):
|
||
+ VMOVU %XMMZERO, (%rdi)
|
||
+ VMOVU %XMMZERO, -16(%rdi, %r8)
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(CopyVecSizeUnalignedVec2):
|
||
+ VMOVU %YMM2, (%rdi, %rcx)
|
||
+
|
||
+ .p2align 4
|
||
+L(CopyVecSizeVecExit):
|
||
+ bsf %edx, %edx
|
||
+ add $(VEC_SIZE - 1), %r8
|
||
+ add %rcx, %rdi
|
||
+# ifdef USE_AS_STPCPY
|
||
+ lea (%rdi, %rdx), %rax
|
||
+# endif
|
||
+ sub %rdx, %r8
|
||
+ lea 1(%rdi, %rdx), %rdi
|
||
+
|
||
+ .p2align 4
|
||
+L(StrncpyFillTailWithZero):
|
||
+ xor %edx, %edx
|
||
+ sub $VEC_SIZE, %r8
|
||
+ jbe L(StrncpyFillExit)
|
||
+
|
||
+ VMOVU %YMMZERO, (%rdi)
|
||
+ add $VEC_SIZE, %rdi
|
||
+
|
||
+ mov %rdi, %rsi
|
||
+ and $(VEC_SIZE - 1), %esi
|
||
+ sub %rsi, %rdi
|
||
+ add %rsi, %r8
|
||
+ sub $(VEC_SIZE * 4), %r8
|
||
+ jb L(StrncpyFillLessFourVecSize)
|
||
+
|
||
+L(StrncpyFillLoopVmovdqa):
|
||
+ VMOVA %YMMZERO, (%rdi)
|
||
+ VMOVA %YMMZERO, VEC_SIZE(%rdi)
|
||
+ VMOVA %YMMZERO, (VEC_SIZE * 2)(%rdi)
|
||
+ VMOVA %YMMZERO, (VEC_SIZE * 3)(%rdi)
|
||
+ add $(VEC_SIZE * 4), %rdi
|
||
+ sub $(VEC_SIZE * 4), %r8
|
||
+ jae L(StrncpyFillLoopVmovdqa)
|
||
+
|
||
+L(StrncpyFillLessFourVecSize):
|
||
+ add $(VEC_SIZE * 2), %r8
|
||
+ jl L(StrncpyFillLessTwoVecSize)
|
||
+ VMOVA %YMMZERO, (%rdi)
|
||
+ VMOVA %YMMZERO, VEC_SIZE(%rdi)
|
||
+ add $(VEC_SIZE * 2), %rdi
|
||
+ sub $VEC_SIZE, %r8
|
||
+ jl L(StrncpyFillExit)
|
||
+ VMOVA %YMMZERO, (%rdi)
|
||
+ add $VEC_SIZE, %rdi
|
||
+ jmp L(Fill)
|
||
+
|
||
+ .p2align 4
|
||
+L(StrncpyFillLessTwoVecSize):
|
||
+ add $VEC_SIZE, %r8
|
||
+ jl L(StrncpyFillExit)
|
||
+ VMOVA %YMMZERO, (%rdi)
|
||
+ add $VEC_SIZE, %rdi
|
||
+ jmp L(Fill)
|
||
+
|
||
+ .p2align 4
|
||
+L(StrncpyFillExit):
|
||
+ add $VEC_SIZE, %r8
|
||
+L(Fill):
|
||
+ cmp $17, %r8d
|
||
+ jae L(Fill17_32)
|
||
+ cmp $9, %r8d
|
||
+ jae L(Fill9_16)
|
||
+ cmp $5, %r8d
|
||
+ jae L(Fill5_8)
|
||
+ cmp $3, %r8d
|
||
+ jae L(Fill3_4)
|
||
+ cmp $1, %r8d
|
||
+ ja L(Fill2)
|
||
+ je L(Fill1)
|
||
+ ret
|
||
+
|
||
+/* end of ifndef USE_AS_STRCAT */
|
||
+# endif
|
||
+
|
||
+ .p2align 4
|
||
+L(UnalignedLeaveCase2OrCase3):
|
||
+ test %rdx, %rdx
|
||
+ jnz L(UnalignedFourVecSizeLeaveCase2)
|
||
+L(UnalignedFourVecSizeLeaveCase3):
|
||
+ lea (VEC_SIZE * 4)(%r8), %rcx
|
||
+ and $-VEC_SIZE, %rcx
|
||
+ add $(VEC_SIZE * 3), %r8
|
||
+ jl L(CopyVecSizeCase3)
|
||
+ VMOVU %YMM4, (%rdi)
|
||
+ sub $VEC_SIZE, %r8
|
||
+ jb L(CopyVecSizeCase3)
|
||
+ VMOVU %YMM5, VEC_SIZE(%rdi)
|
||
+ sub $VEC_SIZE, %r8
|
||
+ jb L(CopyVecSizeCase3)
|
||
+ VMOVU %YMM6, (VEC_SIZE * 2)(%rdi)
|
||
+ sub $VEC_SIZE, %r8
|
||
+ jb L(CopyVecSizeCase3)
|
||
+ VMOVU %YMM7, (VEC_SIZE * 3)(%rdi)
|
||
+# ifdef USE_AS_STPCPY
|
||
+ lea (VEC_SIZE * 4)(%rdi), %rax
|
||
+# endif
|
||
+# ifdef USE_AS_STRCAT
|
||
+ movb $0, (VEC_SIZE * 4)(%rdi)
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(UnalignedFourVecSizeLeaveCase2):
|
||
+ xor %ecx, %ecx
|
||
+ vpcmpb $0, %YMM4, %YMMZERO, %k1
|
||
+ kmovd %k1, %edx
|
||
+ add $(VEC_SIZE * 3), %r8
|
||
+ jle L(CopyVecSizeCase2OrCase3)
|
||
+ test %edx, %edx
|
||
+# ifndef USE_AS_STRCAT
|
||
+ jnz L(CopyVecSizeUnalignedVec4)
|
||
+# else
|
||
+ jnz L(CopyVecSize)
|
||
+# endif
|
||
+ vpcmpb $0, %YMM5, %YMMZERO, %k2
|
||
+ kmovd %k2, %edx
|
||
+ VMOVU %YMM4, (%rdi)
|
||
+ add $VEC_SIZE, %rcx
|
||
+ sub $VEC_SIZE, %r8
|
||
+ jbe L(CopyVecSizeCase2OrCase3)
|
||
+ test %edx, %edx
|
||
+# ifndef USE_AS_STRCAT
|
||
+ jnz L(CopyVecSizeUnalignedVec5)
|
||
+# else
|
||
+ jnz L(CopyVecSize)
|
||
+# endif
|
||
+
|
||
+ vpcmpb $0, %YMM6, %YMMZERO, %k3
|
||
+ kmovd %k3, %edx
|
||
+ VMOVU %YMM5, VEC_SIZE(%rdi)
|
||
+ add $VEC_SIZE, %rcx
|
||
+ sub $VEC_SIZE, %r8
|
||
+ jbe L(CopyVecSizeCase2OrCase3)
|
||
+ test %edx, %edx
|
||
+# ifndef USE_AS_STRCAT
|
||
+ jnz L(CopyVecSizeUnalignedVec6)
|
||
+# else
|
||
+ jnz L(CopyVecSize)
|
||
+# endif
|
||
+
|
||
+ vpcmpb $0, %YMM7, %YMMZERO, %k4
|
||
+ kmovd %k4, %edx
|
||
+ VMOVU %YMM6, (VEC_SIZE * 2)(%rdi)
|
||
+ lea VEC_SIZE(%rdi, %rcx), %rdi
|
||
+ lea VEC_SIZE(%rsi, %rcx), %rsi
|
||
+ bsf %edx, %edx
|
||
+ cmp %r8d, %edx
|
||
+ jb L(CopyVecSizeExit)
|
||
+L(StrncpyExit):
|
||
+ cmp $65, %r8d
|
||
+ je L(StrncpyExit65)
|
||
+ cmp $33, %r8d
|
||
+ jae L(StrncpyExit33_64)
|
||
+ cmp $17, %r8d
|
||
+ jae L(StrncpyExit17_32)
|
||
+ cmp $9, %r8d
|
||
+ jae L(StrncpyExit9_16)
|
||
+ cmp $5, %r8d
|
||
+ jae L(StrncpyExit5_8)
|
||
+ cmp $3, %r8d
|
||
+ jae L(StrncpyExit3_4)
|
||
+ cmp $1, %r8d
|
||
+ ja L(StrncpyExit2)
|
||
+ je L(StrncpyExit1)
|
||
+# ifdef USE_AS_STPCPY
|
||
+ mov %rdi, %rax
|
||
+# endif
|
||
+# ifdef USE_AS_STRCAT
|
||
+ movb $0, (%rdi)
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(ExitZero):
|
||
+# ifndef USE_AS_STRCAT
|
||
+ mov %rdi, %rax
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+# endif
|
||
+
|
||
+# ifndef USE_AS_STRCAT
|
||
+END (STRCPY)
|
||
+# else
|
||
+END (STRCAT)
|
||
+# endif
|
||
+#endif
|
||
diff --git a/sysdeps/x86_64/multiarch/strlen-avx2-rtm.S b/sysdeps/x86_64/multiarch/strlen-avx2-rtm.S
|
||
new file mode 100644
|
||
index 0000000000..75b4b7612c
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/strlen-avx2-rtm.S
|
||
@@ -0,0 +1,12 @@
|
||
+#ifndef STRLEN
|
||
+# define STRLEN __strlen_avx2_rtm
|
||
+#endif
|
||
+
|
||
+#define ZERO_UPPER_VEC_REGISTERS_RETURN \
|
||
+ ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
|
||
+
|
||
+#define VZEROUPPER_RETURN jmp L(return_vzeroupper)
|
||
+
|
||
+#define SECTION(p) p##.avx.rtm
|
||
+
|
||
+#include "strlen-avx2.S"
|
||
diff --git a/sysdeps/x86_64/multiarch/strlen-avx2.S b/sysdeps/x86_64/multiarch/strlen-avx2.S
|
||
index 73421ec1b2..45e08e64d6 100644
|
||
--- a/sysdeps/x86_64/multiarch/strlen-avx2.S
|
||
+++ b/sysdeps/x86_64/multiarch/strlen-avx2.S
|
||
@@ -27,370 +27,531 @@
|
||
# ifdef USE_AS_WCSLEN
|
||
# define VPCMPEQ vpcmpeqd
|
||
# define VPMINU vpminud
|
||
+# define CHAR_SIZE 4
|
||
# else
|
||
# define VPCMPEQ vpcmpeqb
|
||
# define VPMINU vpminub
|
||
+# define CHAR_SIZE 1
|
||
# endif
|
||
|
||
# ifndef VZEROUPPER
|
||
# define VZEROUPPER vzeroupper
|
||
# endif
|
||
|
||
+# ifndef SECTION
|
||
+# define SECTION(p) p##.avx
|
||
+# endif
|
||
+
|
||
# define VEC_SIZE 32
|
||
+# define PAGE_SIZE 4096
|
||
+# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE)
|
||
|
||
- .section .text.avx,"ax",@progbits
|
||
+ .section SECTION(.text),"ax",@progbits
|
||
ENTRY (STRLEN)
|
||
# ifdef USE_AS_STRNLEN
|
||
- /* Check for zero length. */
|
||
+ /* Check zero length. */
|
||
+# ifdef __ILP32__
|
||
+ /* Clear upper bits. */
|
||
+ and %RSI_LP, %RSI_LP
|
||
+# else
|
||
test %RSI_LP, %RSI_LP
|
||
- jz L(zero)
|
||
-# ifdef USE_AS_WCSLEN
|
||
- shl $2, %RSI_LP
|
||
-# elif defined __ILP32__
|
||
- /* Clear the upper 32 bits. */
|
||
- movl %esi, %esi
|
||
# endif
|
||
+ jz L(zero)
|
||
+ /* Store max len in R8_LP before adjusting if using WCSLEN. */
|
||
mov %RSI_LP, %R8_LP
|
||
# endif
|
||
- movl %edi, %ecx
|
||
+ movl %edi, %eax
|
||
movq %rdi, %rdx
|
||
vpxor %xmm0, %xmm0, %xmm0
|
||
-
|
||
+ /* Clear high bits from edi. Only keeping bits relevant to page
|
||
+ cross check. */
|
||
+ andl $(PAGE_SIZE - 1), %eax
|
||
/* Check if we may cross page boundary with one vector load. */
|
||
- andl $(2 * VEC_SIZE - 1), %ecx
|
||
- cmpl $VEC_SIZE, %ecx
|
||
- ja L(cros_page_boundary)
|
||
+ cmpl $(PAGE_SIZE - VEC_SIZE), %eax
|
||
+ ja L(cross_page_boundary)
|
||
|
||
/* Check the first VEC_SIZE bytes. */
|
||
- VPCMPEQ (%rdi), %ymm0, %ymm1
|
||
+ VPCMPEQ (%rdi), %ymm0, %ymm1
|
||
vpmovmskb %ymm1, %eax
|
||
- testl %eax, %eax
|
||
-
|
||
# ifdef USE_AS_STRNLEN
|
||
- jnz L(first_vec_x0_check)
|
||
- /* Adjust length and check the end of data. */
|
||
- subq $VEC_SIZE, %rsi
|
||
- jbe L(max)
|
||
-# else
|
||
- jnz L(first_vec_x0)
|
||
+ /* If length < VEC_SIZE handle special. */
|
||
+ cmpq $CHAR_PER_VEC, %rsi
|
||
+ jbe L(first_vec_x0)
|
||
# endif
|
||
-
|
||
- /* Align data for aligned loads in the loop. */
|
||
- addq $VEC_SIZE, %rdi
|
||
- andl $(VEC_SIZE - 1), %ecx
|
||
- andq $-VEC_SIZE, %rdi
|
||
+ /* If empty continue to aligned_more. Otherwise return bit
|
||
+ position of first match. */
|
||
+ testl %eax, %eax
|
||
+ jz L(aligned_more)
|
||
+ tzcntl %eax, %eax
|
||
+# ifdef USE_AS_WCSLEN
|
||
+ /* NB: Divide bytes by 4 to get wchar_t count. */
|
||
+ shrl $2, %eax
|
||
+# endif
|
||
+ VZEROUPPER_RETURN
|
||
|
||
# ifdef USE_AS_STRNLEN
|
||
- /* Adjust length. */
|
||
- addq %rcx, %rsi
|
||
+L(zero):
|
||
+ xorl %eax, %eax
|
||
+ ret
|
||
|
||
- subq $(VEC_SIZE * 4), %rsi
|
||
- jbe L(last_4x_vec_or_less)
|
||
+ .p2align 4
|
||
+L(first_vec_x0):
|
||
+ /* Set bit for max len so that tzcnt will return min of max len
|
||
+ and position of first match. */
|
||
+# ifdef USE_AS_WCSLEN
|
||
+ /* NB: Multiply length by 4 to get byte count. */
|
||
+ sall $2, %esi
|
||
+# endif
|
||
+ btsq %rsi, %rax
|
||
+ tzcntl %eax, %eax
|
||
+# ifdef USE_AS_WCSLEN
|
||
+ /* NB: Divide bytes by 4 to get wchar_t count. */
|
||
+ shrl $2, %eax
|
||
+# endif
|
||
+ VZEROUPPER_RETURN
|
||
# endif
|
||
- jmp L(more_4x_vec)
|
||
|
||
.p2align 4
|
||
-L(cros_page_boundary):
|
||
- andl $(VEC_SIZE - 1), %ecx
|
||
- andq $-VEC_SIZE, %rdi
|
||
- VPCMPEQ (%rdi), %ymm0, %ymm1
|
||
- vpmovmskb %ymm1, %eax
|
||
- /* Remove the leading bytes. */
|
||
- sarl %cl, %eax
|
||
- testl %eax, %eax
|
||
- jz L(aligned_more)
|
||
+L(first_vec_x1):
|
||
tzcntl %eax, %eax
|
||
+ /* Safe to use 32 bit instructions as these are only called for
|
||
+ size = [1, 159]. */
|
||
# ifdef USE_AS_STRNLEN
|
||
- /* Check the end of data. */
|
||
- cmpq %rax, %rsi
|
||
- jbe L(max)
|
||
+ /* Use ecx which was computed earlier to compute correct value.
|
||
+ */
|
||
+# ifdef USE_AS_WCSLEN
|
||
+ leal -(VEC_SIZE * 4 + 1)(%rax, %rcx, 4), %eax
|
||
+# else
|
||
+ subl $(VEC_SIZE * 4 + 1), %ecx
|
||
+ addl %ecx, %eax
|
||
+# endif
|
||
+# else
|
||
+ subl %edx, %edi
|
||
+ incl %edi
|
||
+ addl %edi, %eax
|
||
# endif
|
||
- addq %rdi, %rax
|
||
- addq %rcx, %rax
|
||
- subq %rdx, %rax
|
||
# ifdef USE_AS_WCSLEN
|
||
- shrq $2, %rax
|
||
+ /* NB: Divide bytes by 4 to get wchar_t count. */
|
||
+ shrl $2, %eax
|
||
# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
-L(aligned_more):
|
||
+L(first_vec_x2):
|
||
+ tzcntl %eax, %eax
|
||
+ /* Safe to use 32 bit instructions as these are only called for
|
||
+ size = [1, 159]. */
|
||
# ifdef USE_AS_STRNLEN
|
||
- /* "rcx" is less than VEC_SIZE. Calculate "rdx + rcx - VEC_SIZE"
|
||
- with "rdx - (VEC_SIZE - rcx)" instead of "(rdx + rcx) - VEC_SIZE"
|
||
- to void possible addition overflow. */
|
||
- negq %rcx
|
||
- addq $VEC_SIZE, %rcx
|
||
-
|
||
- /* Check the end of data. */
|
||
- subq %rcx, %rsi
|
||
- jbe L(max)
|
||
+ /* Use ecx which was computed earlier to compute correct value.
|
||
+ */
|
||
+# ifdef USE_AS_WCSLEN
|
||
+ leal -(VEC_SIZE * 3 + 1)(%rax, %rcx, 4), %eax
|
||
+# else
|
||
+ subl $(VEC_SIZE * 3 + 1), %ecx
|
||
+ addl %ecx, %eax
|
||
+# endif
|
||
+# else
|
||
+ subl %edx, %edi
|
||
+ addl $(VEC_SIZE + 1), %edi
|
||
+ addl %edi, %eax
|
||
# endif
|
||
+# ifdef USE_AS_WCSLEN
|
||
+ /* NB: Divide bytes by 4 to get wchar_t count. */
|
||
+ shrl $2, %eax
|
||
+# endif
|
||
+ VZEROUPPER_RETURN
|
||
|
||
- addq $VEC_SIZE, %rdi
|
||
+ .p2align 4
|
||
+L(first_vec_x3):
|
||
+ tzcntl %eax, %eax
|
||
+ /* Safe to use 32 bit instructions as these are only called for
|
||
+ size = [1, 159]. */
|
||
+# ifdef USE_AS_STRNLEN
|
||
+ /* Use ecx which was computed earlier to compute correct value.
|
||
+ */
|
||
+# ifdef USE_AS_WCSLEN
|
||
+ leal -(VEC_SIZE * 2 + 1)(%rax, %rcx, 4), %eax
|
||
+# else
|
||
+ subl $(VEC_SIZE * 2 + 1), %ecx
|
||
+ addl %ecx, %eax
|
||
+# endif
|
||
+# else
|
||
+ subl %edx, %edi
|
||
+ addl $(VEC_SIZE * 2 + 1), %edi
|
||
+ addl %edi, %eax
|
||
+# endif
|
||
+# ifdef USE_AS_WCSLEN
|
||
+ /* NB: Divide bytes by 4 to get wchar_t count. */
|
||
+ shrl $2, %eax
|
||
+# endif
|
||
+ VZEROUPPER_RETURN
|
||
|
||
+ .p2align 4
|
||
+L(first_vec_x4):
|
||
+ tzcntl %eax, %eax
|
||
+ /* Safe to use 32 bit instructions as these are only called for
|
||
+ size = [1, 159]. */
|
||
# ifdef USE_AS_STRNLEN
|
||
- subq $(VEC_SIZE * 4), %rsi
|
||
- jbe L(last_4x_vec_or_less)
|
||
+ /* Use ecx which was computed earlier to compute correct value.
|
||
+ */
|
||
+# ifdef USE_AS_WCSLEN
|
||
+ leal -(VEC_SIZE * 1 + 1)(%rax, %rcx, 4), %eax
|
||
+# else
|
||
+ subl $(VEC_SIZE + 1), %ecx
|
||
+ addl %ecx, %eax
|
||
+# endif
|
||
+# else
|
||
+ subl %edx, %edi
|
||
+ addl $(VEC_SIZE * 3 + 1), %edi
|
||
+ addl %edi, %eax
|
||
# endif
|
||
+# ifdef USE_AS_WCSLEN
|
||
+ /* NB: Divide bytes by 4 to get wchar_t count. */
|
||
+ shrl $2, %eax
|
||
+# endif
|
||
+ VZEROUPPER_RETURN
|
||
|
||
-L(more_4x_vec):
|
||
+ .p2align 5
|
||
+L(aligned_more):
|
||
+ /* Align data to VEC_SIZE - 1. This is the same number of
|
||
+ instructions as using andq with -VEC_SIZE but saves 4 bytes of
|
||
+ code on the x4 check. */
|
||
+ orq $(VEC_SIZE - 1), %rdi
|
||
+L(cross_page_continue):
|
||
/* Check the first 4 * VEC_SIZE. Only one VEC_SIZE at a time
|
||
since data is only aligned to VEC_SIZE. */
|
||
- VPCMPEQ (%rdi), %ymm0, %ymm1
|
||
- vpmovmskb %ymm1, %eax
|
||
- testl %eax, %eax
|
||
- jnz L(first_vec_x0)
|
||
-
|
||
- VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1
|
||
+# ifdef USE_AS_STRNLEN
|
||
+ /* + 1 because rdi is aligned to VEC_SIZE - 1. + CHAR_SIZE
|
||
+ because it simplies the logic in last_4x_vec_or_less. */
|
||
+ leaq (VEC_SIZE * 4 + CHAR_SIZE + 1)(%rdi), %rcx
|
||
+ subq %rdx, %rcx
|
||
+# ifdef USE_AS_WCSLEN
|
||
+ /* NB: Divide bytes by 4 to get the wchar_t count. */
|
||
+ sarl $2, %ecx
|
||
+# endif
|
||
+# endif
|
||
+ /* Load first VEC regardless. */
|
||
+ VPCMPEQ 1(%rdi), %ymm0, %ymm1
|
||
+# ifdef USE_AS_STRNLEN
|
||
+ /* Adjust length. If near end handle specially. */
|
||
+ subq %rcx, %rsi
|
||
+ jb L(last_4x_vec_or_less)
|
||
+# endif
|
||
vpmovmskb %ymm1, %eax
|
||
testl %eax, %eax
|
||
jnz L(first_vec_x1)
|
||
|
||
- VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm1
|
||
+ VPCMPEQ (VEC_SIZE + 1)(%rdi), %ymm0, %ymm1
|
||
vpmovmskb %ymm1, %eax
|
||
testl %eax, %eax
|
||
jnz L(first_vec_x2)
|
||
|
||
- VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1
|
||
+ VPCMPEQ (VEC_SIZE * 2 + 1)(%rdi), %ymm0, %ymm1
|
||
vpmovmskb %ymm1, %eax
|
||
testl %eax, %eax
|
||
jnz L(first_vec_x3)
|
||
|
||
- addq $(VEC_SIZE * 4), %rdi
|
||
-
|
||
-# ifdef USE_AS_STRNLEN
|
||
- subq $(VEC_SIZE * 4), %rsi
|
||
- jbe L(last_4x_vec_or_less)
|
||
-# endif
|
||
-
|
||
- /* Align data to 4 * VEC_SIZE. */
|
||
- movq %rdi, %rcx
|
||
- andl $(4 * VEC_SIZE - 1), %ecx
|
||
- andq $-(4 * VEC_SIZE), %rdi
|
||
+ VPCMPEQ (VEC_SIZE * 3 + 1)(%rdi), %ymm0, %ymm1
|
||
+ vpmovmskb %ymm1, %eax
|
||
+ testl %eax, %eax
|
||
+ jnz L(first_vec_x4)
|
||
|
||
+ /* Align data to VEC_SIZE * 4 - 1. */
|
||
# ifdef USE_AS_STRNLEN
|
||
- /* Adjust length. */
|
||
+ /* Before adjusting length check if at last VEC_SIZE * 4. */
|
||
+ cmpq $(CHAR_PER_VEC * 4 - 1), %rsi
|
||
+ jbe L(last_4x_vec_or_less_load)
|
||
+ incq %rdi
|
||
+ movl %edi, %ecx
|
||
+ orq $(VEC_SIZE * 4 - 1), %rdi
|
||
+ andl $(VEC_SIZE * 4 - 1), %ecx
|
||
+# ifdef USE_AS_WCSLEN
|
||
+ /* NB: Divide bytes by 4 to get the wchar_t count. */
|
||
+ sarl $2, %ecx
|
||
+# endif
|
||
+ /* Readjust length. */
|
||
addq %rcx, %rsi
|
||
+# else
|
||
+ incq %rdi
|
||
+ orq $(VEC_SIZE * 4 - 1), %rdi
|
||
# endif
|
||
-
|
||
+ /* Compare 4 * VEC at a time forward. */
|
||
.p2align 4
|
||
L(loop_4x_vec):
|
||
- /* Compare 4 * VEC at a time forward. */
|
||
- vmovdqa (%rdi), %ymm1
|
||
- vmovdqa VEC_SIZE(%rdi), %ymm2
|
||
- vmovdqa (VEC_SIZE * 2)(%rdi), %ymm3
|
||
- vmovdqa (VEC_SIZE * 3)(%rdi), %ymm4
|
||
- VPMINU %ymm1, %ymm2, %ymm5
|
||
- VPMINU %ymm3, %ymm4, %ymm6
|
||
- VPMINU %ymm5, %ymm6, %ymm5
|
||
-
|
||
+# ifdef USE_AS_STRNLEN
|
||
+ /* Break if at end of length. */
|
||
+ subq $(CHAR_PER_VEC * 4), %rsi
|
||
+ jb L(last_4x_vec_or_less_cmpeq)
|
||
+# endif
|
||
+ /* Save some code size by microfusing VPMINU with the load.
|
||
+ Since the matches in ymm2/ymm4 can only be returned if there
|
||
+ where no matches in ymm1/ymm3 respectively there is no issue
|
||
+ with overlap. */
|
||
+ vmovdqa 1(%rdi), %ymm1
|
||
+ VPMINU (VEC_SIZE + 1)(%rdi), %ymm1, %ymm2
|
||
+ vmovdqa (VEC_SIZE * 2 + 1)(%rdi), %ymm3
|
||
+ VPMINU (VEC_SIZE * 3 + 1)(%rdi), %ymm3, %ymm4
|
||
+
|
||
+ VPMINU %ymm2, %ymm4, %ymm5
|
||
VPCMPEQ %ymm5, %ymm0, %ymm5
|
||
- vpmovmskb %ymm5, %eax
|
||
- testl %eax, %eax
|
||
- jnz L(4x_vec_end)
|
||
+ vpmovmskb %ymm5, %ecx
|
||
|
||
- addq $(VEC_SIZE * 4), %rdi
|
||
+ subq $-(VEC_SIZE * 4), %rdi
|
||
+ testl %ecx, %ecx
|
||
+ jz L(loop_4x_vec)
|
||
|
||
-# ifndef USE_AS_STRNLEN
|
||
- jmp L(loop_4x_vec)
|
||
-# else
|
||
- subq $(VEC_SIZE * 4), %rsi
|
||
- ja L(loop_4x_vec)
|
||
|
||
-L(last_4x_vec_or_less):
|
||
- /* Less than 4 * VEC and aligned to VEC_SIZE. */
|
||
- addl $(VEC_SIZE * 2), %esi
|
||
- jle L(last_2x_vec)
|
||
-
|
||
- VPCMPEQ (%rdi), %ymm0, %ymm1
|
||
- vpmovmskb %ymm1, %eax
|
||
- testl %eax, %eax
|
||
- jnz L(first_vec_x0)
|
||
-
|
||
- VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1
|
||
+ VPCMPEQ %ymm1, %ymm0, %ymm1
|
||
vpmovmskb %ymm1, %eax
|
||
+ subq %rdx, %rdi
|
||
testl %eax, %eax
|
||
- jnz L(first_vec_x1)
|
||
+ jnz L(last_vec_return_x0)
|
||
|
||
- VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm1
|
||
- vpmovmskb %ymm1, %eax
|
||
+ VPCMPEQ %ymm2, %ymm0, %ymm2
|
||
+ vpmovmskb %ymm2, %eax
|
||
testl %eax, %eax
|
||
+ jnz L(last_vec_return_x1)
|
||
|
||
- jnz L(first_vec_x2_check)
|
||
- subl $VEC_SIZE, %esi
|
||
- jle L(max)
|
||
+ /* Combine last 2 VEC. */
|
||
+ VPCMPEQ %ymm3, %ymm0, %ymm3
|
||
+ vpmovmskb %ymm3, %eax
|
||
+ /* rcx has combined result from all 4 VEC. It will only be used
|
||
+ if the first 3 other VEC all did not contain a match. */
|
||
+ salq $32, %rcx
|
||
+ orq %rcx, %rax
|
||
+ tzcntq %rax, %rax
|
||
+ subq $(VEC_SIZE * 2 - 1), %rdi
|
||
+ addq %rdi, %rax
|
||
+# ifdef USE_AS_WCSLEN
|
||
+ /* NB: Divide bytes by 4 to get wchar_t count. */
|
||
+ shrq $2, %rax
|
||
+# endif
|
||
+ VZEROUPPER_RETURN
|
||
|
||
- VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1
|
||
- vpmovmskb %ymm1, %eax
|
||
- testl %eax, %eax
|
||
|
||
- jnz L(first_vec_x3_check)
|
||
- movq %r8, %rax
|
||
+# ifdef USE_AS_STRNLEN
|
||
+ .p2align 4
|
||
+L(last_4x_vec_or_less_load):
|
||
+ /* Depending on entry adjust rdi / prepare first VEC in ymm1.
|
||
+ */
|
||
+ subq $-(VEC_SIZE * 4), %rdi
|
||
+L(last_4x_vec_or_less_cmpeq):
|
||
+ VPCMPEQ 1(%rdi), %ymm0, %ymm1
|
||
+L(last_4x_vec_or_less):
|
||
# ifdef USE_AS_WCSLEN
|
||
- shrq $2, %rax
|
||
+ /* NB: Multiply length by 4 to get byte count. */
|
||
+ sall $2, %esi
|
||
# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
-
|
||
- .p2align 4
|
||
-L(last_2x_vec):
|
||
- addl $(VEC_SIZE * 2), %esi
|
||
- VPCMPEQ (%rdi), %ymm0, %ymm1
|
||
vpmovmskb %ymm1, %eax
|
||
+ /* If remaining length > VEC_SIZE * 2. This works if esi is off
|
||
+ by VEC_SIZE * 4. */
|
||
+ testl $(VEC_SIZE * 2), %esi
|
||
+ jnz L(last_4x_vec)
|
||
+
|
||
+ /* length may have been negative or positive by an offset of
|
||
+ VEC_SIZE * 4 depending on where this was called from. This fixes
|
||
+ that. */
|
||
+ andl $(VEC_SIZE * 4 - 1), %esi
|
||
testl %eax, %eax
|
||
+ jnz L(last_vec_x1_check)
|
||
|
||
- jnz L(first_vec_x0_check)
|
||
subl $VEC_SIZE, %esi
|
||
- jle L(max)
|
||
+ jb L(max)
|
||
|
||
- VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1
|
||
+ VPCMPEQ (VEC_SIZE + 1)(%rdi), %ymm0, %ymm1
|
||
vpmovmskb %ymm1, %eax
|
||
- testl %eax, %eax
|
||
- jnz L(first_vec_x1_check)
|
||
- movq %r8, %rax
|
||
-# ifdef USE_AS_WCSLEN
|
||
- shrq $2, %rax
|
||
-# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
-
|
||
- .p2align 4
|
||
-L(first_vec_x0_check):
|
||
tzcntl %eax, %eax
|
||
/* Check the end of data. */
|
||
- cmpq %rax, %rsi
|
||
- jbe L(max)
|
||
+ cmpl %eax, %esi
|
||
+ jb L(max)
|
||
+ subq %rdx, %rdi
|
||
+ addl $(VEC_SIZE + 1), %eax
|
||
addq %rdi, %rax
|
||
- subq %rdx, %rax
|
||
# ifdef USE_AS_WCSLEN
|
||
+ /* NB: Divide bytes by 4 to get wchar_t count. */
|
||
shrq $2, %rax
|
||
# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
+# endif
|
||
|
||
.p2align 4
|
||
-L(first_vec_x1_check):
|
||
+L(last_vec_return_x0):
|
||
tzcntl %eax, %eax
|
||
- /* Check the end of data. */
|
||
- cmpq %rax, %rsi
|
||
- jbe L(max)
|
||
- addq $VEC_SIZE, %rax
|
||
+ subq $(VEC_SIZE * 4 - 1), %rdi
|
||
addq %rdi, %rax
|
||
- subq %rdx, %rax
|
||
-# ifdef USE_AS_WCSLEN
|
||
+# ifdef USE_AS_WCSLEN
|
||
+ /* NB: Divide bytes by 4 to get wchar_t count. */
|
||
shrq $2, %rax
|
||
-# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+# endif
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
-L(first_vec_x2_check):
|
||
+L(last_vec_return_x1):
|
||
tzcntl %eax, %eax
|
||
- /* Check the end of data. */
|
||
- cmpq %rax, %rsi
|
||
- jbe L(max)
|
||
- addq $(VEC_SIZE * 2), %rax
|
||
+ subq $(VEC_SIZE * 3 - 1), %rdi
|
||
addq %rdi, %rax
|
||
- subq %rdx, %rax
|
||
-# ifdef USE_AS_WCSLEN
|
||
+# ifdef USE_AS_WCSLEN
|
||
+ /* NB: Divide bytes by 4 to get wchar_t count. */
|
||
shrq $2, %rax
|
||
-# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+# endif
|
||
+ VZEROUPPER_RETURN
|
||
|
||
+# ifdef USE_AS_STRNLEN
|
||
.p2align 4
|
||
-L(first_vec_x3_check):
|
||
+L(last_vec_x1_check):
|
||
+
|
||
tzcntl %eax, %eax
|
||
/* Check the end of data. */
|
||
- cmpq %rax, %rsi
|
||
- jbe L(max)
|
||
- addq $(VEC_SIZE * 3), %rax
|
||
+ cmpl %eax, %esi
|
||
+ jb L(max)
|
||
+ subq %rdx, %rdi
|
||
+ incl %eax
|
||
addq %rdi, %rax
|
||
- subq %rdx, %rax
|
||
# ifdef USE_AS_WCSLEN
|
||
+ /* NB: Divide bytes by 4 to get wchar_t count. */
|
||
shrq $2, %rax
|
||
# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
- .p2align 4
|
||
L(max):
|
||
movq %r8, %rax
|
||
+ VZEROUPPER_RETURN
|
||
+
|
||
+ .p2align 4
|
||
+L(last_4x_vec):
|
||
+ /* Test first 2x VEC normally. */
|
||
+ testl %eax, %eax
|
||
+ jnz L(last_vec_x1)
|
||
+
|
||
+ VPCMPEQ (VEC_SIZE + 1)(%rdi), %ymm0, %ymm1
|
||
+ vpmovmskb %ymm1, %eax
|
||
+ testl %eax, %eax
|
||
+ jnz L(last_vec_x2)
|
||
+
|
||
+ /* Normalize length. */
|
||
+ andl $(VEC_SIZE * 4 - 1), %esi
|
||
+ VPCMPEQ (VEC_SIZE * 2 + 1)(%rdi), %ymm0, %ymm1
|
||
+ vpmovmskb %ymm1, %eax
|
||
+ testl %eax, %eax
|
||
+ jnz L(last_vec_x3)
|
||
+
|
||
+ subl $(VEC_SIZE * 3), %esi
|
||
+ jb L(max)
|
||
+
|
||
+ VPCMPEQ (VEC_SIZE * 3 + 1)(%rdi), %ymm0, %ymm1
|
||
+ vpmovmskb %ymm1, %eax
|
||
+ tzcntl %eax, %eax
|
||
+ /* Check the end of data. */
|
||
+ cmpl %eax, %esi
|
||
+ jb L(max)
|
||
+ subq %rdx, %rdi
|
||
+ addl $(VEC_SIZE * 3 + 1), %eax
|
||
+ addq %rdi, %rax
|
||
# ifdef USE_AS_WCSLEN
|
||
+ /* NB: Divide bytes by 4 to get wchar_t count. */
|
||
shrq $2, %rax
|
||
# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
- .p2align 4
|
||
-L(zero):
|
||
- xorl %eax, %eax
|
||
- ret
|
||
-# endif
|
||
|
||
.p2align 4
|
||
-L(first_vec_x0):
|
||
+L(last_vec_x1):
|
||
+ /* essentially duplicates of first_vec_x1 but use 64 bit
|
||
+ instructions. */
|
||
tzcntl %eax, %eax
|
||
+ subq %rdx, %rdi
|
||
+ incl %eax
|
||
addq %rdi, %rax
|
||
- subq %rdx, %rax
|
||
-# ifdef USE_AS_WCSLEN
|
||
+# ifdef USE_AS_WCSLEN
|
||
+ /* NB: Divide bytes by 4 to get wchar_t count. */
|
||
shrq $2, %rax
|
||
-# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+# endif
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
-L(first_vec_x1):
|
||
+L(last_vec_x2):
|
||
+ /* essentially duplicates of first_vec_x1 but use 64 bit
|
||
+ instructions. */
|
||
tzcntl %eax, %eax
|
||
- addq $VEC_SIZE, %rax
|
||
+ subq %rdx, %rdi
|
||
+ addl $(VEC_SIZE + 1), %eax
|
||
addq %rdi, %rax
|
||
- subq %rdx, %rax
|
||
-# ifdef USE_AS_WCSLEN
|
||
+# ifdef USE_AS_WCSLEN
|
||
+ /* NB: Divide bytes by 4 to get wchar_t count. */
|
||
shrq $2, %rax
|
||
-# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
+# endif
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
-L(first_vec_x2):
|
||
+L(last_vec_x3):
|
||
tzcntl %eax, %eax
|
||
- addq $(VEC_SIZE * 2), %rax
|
||
+ subl $(VEC_SIZE * 2), %esi
|
||
+ /* Check the end of data. */
|
||
+ cmpl %eax, %esi
|
||
+ jb L(max_end)
|
||
+ subq %rdx, %rdi
|
||
+ addl $(VEC_SIZE * 2 + 1), %eax
|
||
addq %rdi, %rax
|
||
- subq %rdx, %rax
|
||
-# ifdef USE_AS_WCSLEN
|
||
+# ifdef USE_AS_WCSLEN
|
||
+ /* NB: Divide bytes by 4 to get wchar_t count. */
|
||
shrq $2, %rax
|
||
+# endif
|
||
+ VZEROUPPER_RETURN
|
||
+L(max_end):
|
||
+ movq %r8, %rax
|
||
+ VZEROUPPER_RETURN
|
||
# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
|
||
+ /* Cold case for crossing page with first load. */
|
||
.p2align 4
|
||
-L(4x_vec_end):
|
||
- VPCMPEQ %ymm1, %ymm0, %ymm1
|
||
+L(cross_page_boundary):
|
||
+ /* Align data to VEC_SIZE - 1. */
|
||
+ orq $(VEC_SIZE - 1), %rdi
|
||
+ VPCMPEQ -(VEC_SIZE - 1)(%rdi), %ymm0, %ymm1
|
||
vpmovmskb %ymm1, %eax
|
||
+ /* Remove the leading bytes. sarxl only uses bits [5:0] of COUNT
|
||
+ so no need to manually mod rdx. */
|
||
+ sarxl %edx, %eax, %eax
|
||
+# ifdef USE_AS_STRNLEN
|
||
testl %eax, %eax
|
||
- jnz L(first_vec_x0)
|
||
- VPCMPEQ %ymm2, %ymm0, %ymm2
|
||
- vpmovmskb %ymm2, %eax
|
||
- testl %eax, %eax
|
||
- jnz L(first_vec_x1)
|
||
- VPCMPEQ %ymm3, %ymm0, %ymm3
|
||
- vpmovmskb %ymm3, %eax
|
||
+ jnz L(cross_page_less_vec)
|
||
+ leaq 1(%rdi), %rcx
|
||
+ subq %rdx, %rcx
|
||
+# ifdef USE_AS_WCSLEN
|
||
+ /* NB: Divide bytes by 4 to get wchar_t count. */
|
||
+ shrl $2, %ecx
|
||
+# endif
|
||
+ /* Check length. */
|
||
+ cmpq %rsi, %rcx
|
||
+ jb L(cross_page_continue)
|
||
+ movq %r8, %rax
|
||
+# else
|
||
testl %eax, %eax
|
||
- jnz L(first_vec_x2)
|
||
- VPCMPEQ %ymm4, %ymm0, %ymm4
|
||
- vpmovmskb %ymm4, %eax
|
||
-L(first_vec_x3):
|
||
+ jz L(cross_page_continue)
|
||
tzcntl %eax, %eax
|
||
- addq $(VEC_SIZE * 3), %rax
|
||
- addq %rdi, %rax
|
||
- subq %rdx, %rax
|
||
-# ifdef USE_AS_WCSLEN
|
||
- shrq $2, %rax
|
||
+# ifdef USE_AS_WCSLEN
|
||
+ /* NB: Divide length by 4 to get wchar_t count. */
|
||
+ shrl $2, %eax
|
||
+# endif
|
||
+# endif
|
||
+L(return_vzeroupper):
|
||
+ ZERO_UPPER_VEC_REGISTERS_RETURN
|
||
+
|
||
+# ifdef USE_AS_STRNLEN
|
||
+ .p2align 4
|
||
+L(cross_page_less_vec):
|
||
+ tzcntl %eax, %eax
|
||
+# ifdef USE_AS_WCSLEN
|
||
+ /* NB: Multiply length by 4 to get byte count. */
|
||
+ sall $2, %esi
|
||
+# endif
|
||
+ cmpq %rax, %rsi
|
||
+ cmovb %esi, %eax
|
||
+# ifdef USE_AS_WCSLEN
|
||
+ shrl $2, %eax
|
||
+# endif
|
||
+ VZEROUPPER_RETURN
|
||
# endif
|
||
- VZEROUPPER
|
||
- ret
|
||
|
||
END (STRLEN)
|
||
#endif
|
||
diff --git a/sysdeps/x86_64/multiarch/strlen-evex.S b/sysdeps/x86_64/multiarch/strlen-evex.S
|
||
new file mode 100644
|
||
index 0000000000..4bf6874b82
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/strlen-evex.S
|
||
@@ -0,0 +1,489 @@
|
||
+/* strlen/strnlen/wcslen/wcsnlen optimized with 256-bit EVEX instructions.
|
||
+ Copyright (C) 2021 Free Software Foundation, Inc.
|
||
+ This file is part of the GNU C Library.
|
||
+
|
||
+ The GNU C Library is free software; you can redistribute it and/or
|
||
+ modify it under the terms of the GNU Lesser General Public
|
||
+ License as published by the Free Software Foundation; either
|
||
+ version 2.1 of the License, or (at your option) any later version.
|
||
+
|
||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
+ Lesser General Public License for more details.
|
||
+
|
||
+ You should have received a copy of the GNU Lesser General Public
|
||
+ License along with the GNU C Library; if not, see
|
||
+ <https://www.gnu.org/licenses/>. */
|
||
+
|
||
+#if IS_IN (libc)
|
||
+
|
||
+# include <sysdep.h>
|
||
+
|
||
+# ifndef STRLEN
|
||
+# define STRLEN __strlen_evex
|
||
+# endif
|
||
+
|
||
+# define VMOVA vmovdqa64
|
||
+
|
||
+# ifdef USE_AS_WCSLEN
|
||
+# define VPCMP vpcmpd
|
||
+# define VPMINU vpminud
|
||
+# define SHIFT_REG ecx
|
||
+# define CHAR_SIZE 4
|
||
+# else
|
||
+# define VPCMP vpcmpb
|
||
+# define VPMINU vpminub
|
||
+# define SHIFT_REG edx
|
||
+# define CHAR_SIZE 1
|
||
+# endif
|
||
+
|
||
+# define XMMZERO xmm16
|
||
+# define YMMZERO ymm16
|
||
+# define YMM1 ymm17
|
||
+# define YMM2 ymm18
|
||
+# define YMM3 ymm19
|
||
+# define YMM4 ymm20
|
||
+# define YMM5 ymm21
|
||
+# define YMM6 ymm22
|
||
+
|
||
+# define VEC_SIZE 32
|
||
+# define PAGE_SIZE 4096
|
||
+# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE)
|
||
+
|
||
+ .section .text.evex,"ax",@progbits
|
||
+ENTRY (STRLEN)
|
||
+# ifdef USE_AS_STRNLEN
|
||
+ /* Check zero length. */
|
||
+ test %RSI_LP, %RSI_LP
|
||
+ jz L(zero)
|
||
+# ifdef __ILP32__
|
||
+ /* Clear the upper 32 bits. */
|
||
+ movl %esi, %esi
|
||
+# endif
|
||
+ mov %RSI_LP, %R8_LP
|
||
+# endif
|
||
+ movl %edi, %eax
|
||
+ vpxorq %XMMZERO, %XMMZERO, %XMMZERO
|
||
+ /* Clear high bits from edi. Only keeping bits relevant to page
|
||
+ cross check. */
|
||
+ andl $(PAGE_SIZE - 1), %eax
|
||
+ /* Check if we may cross page boundary with one vector load. */
|
||
+ cmpl $(PAGE_SIZE - VEC_SIZE), %eax
|
||
+ ja L(cross_page_boundary)
|
||
+
|
||
+ /* Check the first VEC_SIZE bytes. Each bit in K0 represents a
|
||
+ null byte. */
|
||
+ VPCMP $0, (%rdi), %YMMZERO, %k0
|
||
+ kmovd %k0, %eax
|
||
+# ifdef USE_AS_STRNLEN
|
||
+ /* If length < CHAR_PER_VEC handle special. */
|
||
+ cmpq $CHAR_PER_VEC, %rsi
|
||
+ jbe L(first_vec_x0)
|
||
+# endif
|
||
+ testl %eax, %eax
|
||
+ jz L(aligned_more)
|
||
+ tzcntl %eax, %eax
|
||
+ ret
|
||
+# ifdef USE_AS_STRNLEN
|
||
+L(zero):
|
||
+ xorl %eax, %eax
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(first_vec_x0):
|
||
+ /* Set bit for max len so that tzcnt will return min of max len
|
||
+ and position of first match. */
|
||
+ btsq %rsi, %rax
|
||
+ tzcntl %eax, %eax
|
||
+ ret
|
||
+# endif
|
||
+
|
||
+ .p2align 4
|
||
+L(first_vec_x1):
|
||
+ tzcntl %eax, %eax
|
||
+ /* Safe to use 32 bit instructions as these are only called for
|
||
+ size = [1, 159]. */
|
||
+# ifdef USE_AS_STRNLEN
|
||
+ /* Use ecx which was computed earlier to compute correct value.
|
||
+ */
|
||
+ leal -(CHAR_PER_VEC * 4 + 1)(%rcx, %rax), %eax
|
||
+# else
|
||
+ subl %edx, %edi
|
||
+# ifdef USE_AS_WCSLEN
|
||
+ /* NB: Divide bytes by 4 to get the wchar_t count. */
|
||
+ sarl $2, %edi
|
||
+# endif
|
||
+ leal CHAR_PER_VEC(%rdi, %rax), %eax
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(first_vec_x2):
|
||
+ tzcntl %eax, %eax
|
||
+ /* Safe to use 32 bit instructions as these are only called for
|
||
+ size = [1, 159]. */
|
||
+# ifdef USE_AS_STRNLEN
|
||
+ /* Use ecx which was computed earlier to compute correct value.
|
||
+ */
|
||
+ leal -(CHAR_PER_VEC * 3 + 1)(%rcx, %rax), %eax
|
||
+# else
|
||
+ subl %edx, %edi
|
||
+# ifdef USE_AS_WCSLEN
|
||
+ /* NB: Divide bytes by 4 to get the wchar_t count. */
|
||
+ sarl $2, %edi
|
||
+# endif
|
||
+ leal (CHAR_PER_VEC * 2)(%rdi, %rax), %eax
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(first_vec_x3):
|
||
+ tzcntl %eax, %eax
|
||
+ /* Safe to use 32 bit instructions as these are only called for
|
||
+ size = [1, 159]. */
|
||
+# ifdef USE_AS_STRNLEN
|
||
+ /* Use ecx which was computed earlier to compute correct value.
|
||
+ */
|
||
+ leal -(CHAR_PER_VEC * 2 + 1)(%rcx, %rax), %eax
|
||
+# else
|
||
+ subl %edx, %edi
|
||
+# ifdef USE_AS_WCSLEN
|
||
+ /* NB: Divide bytes by 4 to get the wchar_t count. */
|
||
+ sarl $2, %edi
|
||
+# endif
|
||
+ leal (CHAR_PER_VEC * 3)(%rdi, %rax), %eax
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(first_vec_x4):
|
||
+ tzcntl %eax, %eax
|
||
+ /* Safe to use 32 bit instructions as these are only called for
|
||
+ size = [1, 159]. */
|
||
+# ifdef USE_AS_STRNLEN
|
||
+ /* Use ecx which was computed earlier to compute correct value.
|
||
+ */
|
||
+ leal -(CHAR_PER_VEC + 1)(%rcx, %rax), %eax
|
||
+# else
|
||
+ subl %edx, %edi
|
||
+# ifdef USE_AS_WCSLEN
|
||
+ /* NB: Divide bytes by 4 to get the wchar_t count. */
|
||
+ sarl $2, %edi
|
||
+# endif
|
||
+ leal (CHAR_PER_VEC * 4)(%rdi, %rax), %eax
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+ .p2align 5
|
||
+L(aligned_more):
|
||
+ movq %rdi, %rdx
|
||
+ /* Align data to VEC_SIZE. */
|
||
+ andq $-(VEC_SIZE), %rdi
|
||
+L(cross_page_continue):
|
||
+ /* Check the first 4 * VEC_SIZE. Only one VEC_SIZE at a time
|
||
+ since data is only aligned to VEC_SIZE. */
|
||
+# ifdef USE_AS_STRNLEN
|
||
+ /* + CHAR_SIZE because it simplies the logic in
|
||
+ last_4x_vec_or_less. */
|
||
+ leaq (VEC_SIZE * 5 + CHAR_SIZE)(%rdi), %rcx
|
||
+ subq %rdx, %rcx
|
||
+# ifdef USE_AS_WCSLEN
|
||
+ /* NB: Divide bytes by 4 to get the wchar_t count. */
|
||
+ sarl $2, %ecx
|
||
+# endif
|
||
+# endif
|
||
+ /* Load first VEC regardless. */
|
||
+ VPCMP $0, VEC_SIZE(%rdi), %YMMZERO, %k0
|
||
+# ifdef USE_AS_STRNLEN
|
||
+ /* Adjust length. If near end handle specially. */
|
||
+ subq %rcx, %rsi
|
||
+ jb L(last_4x_vec_or_less)
|
||
+# endif
|
||
+ kmovd %k0, %eax
|
||
+ testl %eax, %eax
|
||
+ jnz L(first_vec_x1)
|
||
+
|
||
+ VPCMP $0, (VEC_SIZE * 2)(%rdi), %YMMZERO, %k0
|
||
+ kmovd %k0, %eax
|
||
+ test %eax, %eax
|
||
+ jnz L(first_vec_x2)
|
||
+
|
||
+ VPCMP $0, (VEC_SIZE * 3)(%rdi), %YMMZERO, %k0
|
||
+ kmovd %k0, %eax
|
||
+ testl %eax, %eax
|
||
+ jnz L(first_vec_x3)
|
||
+
|
||
+ VPCMP $0, (VEC_SIZE * 4)(%rdi), %YMMZERO, %k0
|
||
+ kmovd %k0, %eax
|
||
+ testl %eax, %eax
|
||
+ jnz L(first_vec_x4)
|
||
+
|
||
+ addq $VEC_SIZE, %rdi
|
||
+# ifdef USE_AS_STRNLEN
|
||
+ /* Check if at last VEC_SIZE * 4 length. */
|
||
+ cmpq $(CHAR_PER_VEC * 4 - 1), %rsi
|
||
+ jbe L(last_4x_vec_or_less_load)
|
||
+ movl %edi, %ecx
|
||
+ andl $(VEC_SIZE * 4 - 1), %ecx
|
||
+# ifdef USE_AS_WCSLEN
|
||
+ /* NB: Divide bytes by 4 to get the wchar_t count. */
|
||
+ sarl $2, %ecx
|
||
+# endif
|
||
+ /* Readjust length. */
|
||
+ addq %rcx, %rsi
|
||
+# endif
|
||
+ /* Align data to VEC_SIZE * 4. */
|
||
+ andq $-(VEC_SIZE * 4), %rdi
|
||
+
|
||
+ /* Compare 4 * VEC at a time forward. */
|
||
+ .p2align 4
|
||
+L(loop_4x_vec):
|
||
+ /* Load first VEC regardless. */
|
||
+ VMOVA (VEC_SIZE * 4)(%rdi), %YMM1
|
||
+# ifdef USE_AS_STRNLEN
|
||
+ /* Break if at end of length. */
|
||
+ subq $(CHAR_PER_VEC * 4), %rsi
|
||
+ jb L(last_4x_vec_or_less_cmpeq)
|
||
+# endif
|
||
+ /* Save some code size by microfusing VPMINU with the load. Since
|
||
+ the matches in ymm2/ymm4 can only be returned if there where no
|
||
+ matches in ymm1/ymm3 respectively there is no issue with overlap.
|
||
+ */
|
||
+ VPMINU (VEC_SIZE * 5)(%rdi), %YMM1, %YMM2
|
||
+ VMOVA (VEC_SIZE * 6)(%rdi), %YMM3
|
||
+ VPMINU (VEC_SIZE * 7)(%rdi), %YMM3, %YMM4
|
||
+
|
||
+ VPCMP $0, %YMM2, %YMMZERO, %k0
|
||
+ VPCMP $0, %YMM4, %YMMZERO, %k1
|
||
+ subq $-(VEC_SIZE * 4), %rdi
|
||
+ kortestd %k0, %k1
|
||
+ jz L(loop_4x_vec)
|
||
+
|
||
+ /* Check if end was in first half. */
|
||
+ kmovd %k0, %eax
|
||
+ subq %rdx, %rdi
|
||
+# ifdef USE_AS_WCSLEN
|
||
+ shrq $2, %rdi
|
||
+# endif
|
||
+ testl %eax, %eax
|
||
+ jz L(second_vec_return)
|
||
+
|
||
+ VPCMP $0, %YMM1, %YMMZERO, %k2
|
||
+ kmovd %k2, %edx
|
||
+ /* Combine VEC1 matches (edx) with VEC2 matches (eax). */
|
||
+# ifdef USE_AS_WCSLEN
|
||
+ sall $CHAR_PER_VEC, %eax
|
||
+ orl %edx, %eax
|
||
+ tzcntl %eax, %eax
|
||
+# else
|
||
+ salq $CHAR_PER_VEC, %rax
|
||
+ orq %rdx, %rax
|
||
+ tzcntq %rax, %rax
|
||
+# endif
|
||
+ addq %rdi, %rax
|
||
+ ret
|
||
+
|
||
+
|
||
+# ifdef USE_AS_STRNLEN
|
||
+
|
||
+L(last_4x_vec_or_less_load):
|
||
+ /* Depending on entry adjust rdi / prepare first VEC in YMM1. */
|
||
+ VMOVA (VEC_SIZE * 4)(%rdi), %YMM1
|
||
+L(last_4x_vec_or_less_cmpeq):
|
||
+ VPCMP $0, %YMM1, %YMMZERO, %k0
|
||
+ addq $(VEC_SIZE * 3), %rdi
|
||
+L(last_4x_vec_or_less):
|
||
+ kmovd %k0, %eax
|
||
+ /* If remaining length > VEC_SIZE * 2. This works if esi is off by
|
||
+ VEC_SIZE * 4. */
|
||
+ testl $(CHAR_PER_VEC * 2), %esi
|
||
+ jnz L(last_4x_vec)
|
||
+
|
||
+ /* length may have been negative or positive by an offset of
|
||
+ CHAR_PER_VEC * 4 depending on where this was called from. This
|
||
+ fixes that. */
|
||
+ andl $(CHAR_PER_VEC * 4 - 1), %esi
|
||
+ testl %eax, %eax
|
||
+ jnz L(last_vec_x1_check)
|
||
+
|
||
+ /* Check the end of data. */
|
||
+ subl $CHAR_PER_VEC, %esi
|
||
+ jb L(max)
|
||
+
|
||
+ VPCMP $0, (VEC_SIZE * 2)(%rdi), %YMMZERO, %k0
|
||
+ kmovd %k0, %eax
|
||
+ tzcntl %eax, %eax
|
||
+ /* Check the end of data. */
|
||
+ cmpl %eax, %esi
|
||
+ jb L(max)
|
||
+
|
||
+ subq %rdx, %rdi
|
||
+# ifdef USE_AS_WCSLEN
|
||
+ /* NB: Divide bytes by 4 to get the wchar_t count. */
|
||
+ sarq $2, %rdi
|
||
+# endif
|
||
+ leaq (CHAR_PER_VEC * 2)(%rdi, %rax), %rax
|
||
+ ret
|
||
+L(max):
|
||
+ movq %r8, %rax
|
||
+ ret
|
||
+# endif
|
||
+
|
||
+ /* Placed here in strnlen so that the jcc L(last_4x_vec_or_less)
|
||
+ in the 4x VEC loop can use 2 byte encoding. */
|
||
+ .p2align 4
|
||
+L(second_vec_return):
|
||
+ VPCMP $0, %YMM3, %YMMZERO, %k0
|
||
+ /* Combine YMM3 matches (k0) with YMM4 matches (k1). */
|
||
+# ifdef USE_AS_WCSLEN
|
||
+ kunpckbw %k0, %k1, %k0
|
||
+ kmovd %k0, %eax
|
||
+ tzcntl %eax, %eax
|
||
+# else
|
||
+ kunpckdq %k0, %k1, %k0
|
||
+ kmovq %k0, %rax
|
||
+ tzcntq %rax, %rax
|
||
+# endif
|
||
+ leaq (CHAR_PER_VEC * 2)(%rdi, %rax), %rax
|
||
+ ret
|
||
+
|
||
+
|
||
+# ifdef USE_AS_STRNLEN
|
||
+L(last_vec_x1_check):
|
||
+ tzcntl %eax, %eax
|
||
+ /* Check the end of data. */
|
||
+ cmpl %eax, %esi
|
||
+ jb L(max)
|
||
+ subq %rdx, %rdi
|
||
+# ifdef USE_AS_WCSLEN
|
||
+ /* NB: Divide bytes by 4 to get the wchar_t count. */
|
||
+ sarq $2, %rdi
|
||
+# endif
|
||
+ leaq (CHAR_PER_VEC)(%rdi, %rax), %rax
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(last_4x_vec):
|
||
+ /* Test first 2x VEC normally. */
|
||
+ testl %eax, %eax
|
||
+ jnz L(last_vec_x1)
|
||
+
|
||
+ VPCMP $0, (VEC_SIZE * 2)(%rdi), %YMMZERO, %k0
|
||
+ kmovd %k0, %eax
|
||
+ testl %eax, %eax
|
||
+ jnz L(last_vec_x2)
|
||
+
|
||
+ /* Normalize length. */
|
||
+ andl $(CHAR_PER_VEC * 4 - 1), %esi
|
||
+ VPCMP $0, (VEC_SIZE * 3)(%rdi), %YMMZERO, %k0
|
||
+ kmovd %k0, %eax
|
||
+ testl %eax, %eax
|
||
+ jnz L(last_vec_x3)
|
||
+
|
||
+ /* Check the end of data. */
|
||
+ subl $(CHAR_PER_VEC * 3), %esi
|
||
+ jb L(max)
|
||
+
|
||
+ VPCMP $0, (VEC_SIZE * 4)(%rdi), %YMMZERO, %k0
|
||
+ kmovd %k0, %eax
|
||
+ tzcntl %eax, %eax
|
||
+ /* Check the end of data. */
|
||
+ cmpl %eax, %esi
|
||
+ jb L(max_end)
|
||
+
|
||
+ subq %rdx, %rdi
|
||
+# ifdef USE_AS_WCSLEN
|
||
+ /* NB: Divide bytes by 4 to get the wchar_t count. */
|
||
+ sarq $2, %rdi
|
||
+# endif
|
||
+ leaq (CHAR_PER_VEC * 4)(%rdi, %rax), %rax
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(last_vec_x1):
|
||
+ tzcntl %eax, %eax
|
||
+ subq %rdx, %rdi
|
||
+# ifdef USE_AS_WCSLEN
|
||
+ /* NB: Divide bytes by 4 to get the wchar_t count. */
|
||
+ sarq $2, %rdi
|
||
+# endif
|
||
+ leaq (CHAR_PER_VEC)(%rdi, %rax), %rax
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(last_vec_x2):
|
||
+ tzcntl %eax, %eax
|
||
+ subq %rdx, %rdi
|
||
+# ifdef USE_AS_WCSLEN
|
||
+ /* NB: Divide bytes by 4 to get the wchar_t count. */
|
||
+ sarq $2, %rdi
|
||
+# endif
|
||
+ leaq (CHAR_PER_VEC * 2)(%rdi, %rax), %rax
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(last_vec_x3):
|
||
+ tzcntl %eax, %eax
|
||
+ subl $(CHAR_PER_VEC * 2), %esi
|
||
+ /* Check the end of data. */
|
||
+ cmpl %eax, %esi
|
||
+ jb L(max_end)
|
||
+ subq %rdx, %rdi
|
||
+# ifdef USE_AS_WCSLEN
|
||
+ /* NB: Divide bytes by 4 to get the wchar_t count. */
|
||
+ sarq $2, %rdi
|
||
+# endif
|
||
+ leaq (CHAR_PER_VEC * 3)(%rdi, %rax), %rax
|
||
+ ret
|
||
+L(max_end):
|
||
+ movq %r8, %rax
|
||
+ ret
|
||
+# endif
|
||
+
|
||
+ /* Cold case for crossing page with first load. */
|
||
+ .p2align 4
|
||
+L(cross_page_boundary):
|
||
+ movq %rdi, %rdx
|
||
+ /* Align data to VEC_SIZE. */
|
||
+ andq $-VEC_SIZE, %rdi
|
||
+ VPCMP $0, (%rdi), %YMMZERO, %k0
|
||
+ kmovd %k0, %eax
|
||
+ /* Remove the leading bytes. */
|
||
+# ifdef USE_AS_WCSLEN
|
||
+ /* NB: Divide shift count by 4 since each bit in K0 represent 4
|
||
+ bytes. */
|
||
+ movl %edx, %ecx
|
||
+ shrl $2, %ecx
|
||
+ andl $(CHAR_PER_VEC - 1), %ecx
|
||
+# endif
|
||
+ /* SHIFT_REG is ecx for USE_AS_WCSLEN and edx otherwise. */
|
||
+ sarxl %SHIFT_REG, %eax, %eax
|
||
+ testl %eax, %eax
|
||
+# ifndef USE_AS_STRNLEN
|
||
+ jz L(cross_page_continue)
|
||
+ tzcntl %eax, %eax
|
||
+ ret
|
||
+# else
|
||
+ jnz L(cross_page_less_vec)
|
||
+# ifndef USE_AS_WCSLEN
|
||
+ movl %edx, %ecx
|
||
+ andl $(CHAR_PER_VEC - 1), %ecx
|
||
+# endif
|
||
+ movl $CHAR_PER_VEC, %eax
|
||
+ subl %ecx, %eax
|
||
+ /* Check the end of data. */
|
||
+ cmpq %rax, %rsi
|
||
+ ja L(cross_page_continue)
|
||
+ movl %esi, %eax
|
||
+ ret
|
||
+L(cross_page_less_vec):
|
||
+ tzcntl %eax, %eax
|
||
+ /* Select min of length and position of first null. */
|
||
+ cmpq %rax, %rsi
|
||
+ cmovb %esi, %eax
|
||
+ ret
|
||
+# endif
|
||
+
|
||
+END (STRLEN)
|
||
+#endif
|
||
diff --git a/sysdeps/x86_64/multiarch/strlen-sse2.S b/sysdeps/x86_64/multiarch/strlen-sse2.S
|
||
index 055fbbc690..812af73c13 100644
|
||
--- a/sysdeps/x86_64/multiarch/strlen-sse2.S
|
||
+++ b/sysdeps/x86_64/multiarch/strlen-sse2.S
|
||
@@ -20,4 +20,4 @@
|
||
# define strlen __strlen_sse2
|
||
#endif
|
||
|
||
-#include "../strlen.S"
|
||
+#include "strlen-vec.S"
|
||
diff --git a/sysdeps/x86_64/multiarch/strlen-vec.S b/sysdeps/x86_64/multiarch/strlen-vec.S
|
||
new file mode 100644
|
||
index 0000000000..439e486a43
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/strlen-vec.S
|
||
@@ -0,0 +1,270 @@
|
||
+/* SSE2 version of strlen and SSE4.1 version of wcslen.
|
||
+ Copyright (C) 2012-2021 Free Software Foundation, Inc.
|
||
+ This file is part of the GNU C Library.
|
||
+
|
||
+ The GNU C Library is free software; you can redistribute it and/or
|
||
+ modify it under the terms of the GNU Lesser General Public
|
||
+ License as published by the Free Software Foundation; either
|
||
+ version 2.1 of the License, or (at your option) any later version.
|
||
+
|
||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
+ Lesser General Public License for more details.
|
||
+
|
||
+ You should have received a copy of the GNU Lesser General Public
|
||
+ License along with the GNU C Library; if not, see
|
||
+ <https://www.gnu.org/licenses/>. */
|
||
+
|
||
+#include <sysdep.h>
|
||
+
|
||
+#ifdef AS_WCSLEN
|
||
+# define PMINU pminud
|
||
+# define PCMPEQ pcmpeqd
|
||
+# define SHIFT_RETURN shrq $2, %rax
|
||
+#else
|
||
+# define PMINU pminub
|
||
+# define PCMPEQ pcmpeqb
|
||
+# define SHIFT_RETURN
|
||
+#endif
|
||
+
|
||
+/* Long lived register in strlen(s), strnlen(s, n) are:
|
||
+
|
||
+ %xmm3 - zero
|
||
+ %rdi - s
|
||
+ %r10 (s+n) & (~(64-1))
|
||
+ %r11 s+n
|
||
+*/
|
||
+
|
||
+
|
||
+.text
|
||
+ENTRY(strlen)
|
||
+
|
||
+/* Test 64 bytes from %rax for zero. Save result as bitmask in %rdx. */
|
||
+#define FIND_ZERO \
|
||
+ PCMPEQ (%rax), %xmm0; \
|
||
+ PCMPEQ 16(%rax), %xmm1; \
|
||
+ PCMPEQ 32(%rax), %xmm2; \
|
||
+ PCMPEQ 48(%rax), %xmm3; \
|
||
+ pmovmskb %xmm0, %esi; \
|
||
+ pmovmskb %xmm1, %edx; \
|
||
+ pmovmskb %xmm2, %r8d; \
|
||
+ pmovmskb %xmm3, %ecx; \
|
||
+ salq $16, %rdx; \
|
||
+ salq $16, %rcx; \
|
||
+ orq %rsi, %rdx; \
|
||
+ orq %r8, %rcx; \
|
||
+ salq $32, %rcx; \
|
||
+ orq %rcx, %rdx;
|
||
+
|
||
+#ifdef AS_STRNLEN
|
||
+/* Do not read anything when n==0. */
|
||
+ test %RSI_LP, %RSI_LP
|
||
+ jne L(n_nonzero)
|
||
+ xor %rax, %rax
|
||
+ ret
|
||
+L(n_nonzero):
|
||
+# ifdef AS_WCSLEN
|
||
+/* Check for overflow from maxlen * sizeof(wchar_t). If it would
|
||
+ overflow the only way this program doesn't have undefined behavior
|
||
+ is if there is a null terminator in valid memory so wcslen will
|
||
+ suffice. */
|
||
+ mov %RSI_LP, %R10_LP
|
||
+ sar $62, %R10_LP
|
||
+ test %R10_LP, %R10_LP
|
||
+ jnz __wcslen_sse4_1
|
||
+ sal $2, %RSI_LP
|
||
+# endif
|
||
+
|
||
+
|
||
+/* Initialize long lived registers. */
|
||
+
|
||
+ add %RDI_LP, %RSI_LP
|
||
+# ifdef AS_WCSLEN
|
||
+/* Check for overflow again from s + maxlen * sizeof(wchar_t). */
|
||
+ jbe __wcslen_sse4_1
|
||
+# endif
|
||
+ mov %RSI_LP, %R10_LP
|
||
+ and $-64, %R10_LP
|
||
+ mov %RSI_LP, %R11_LP
|
||
+#endif
|
||
+
|
||
+ pxor %xmm0, %xmm0
|
||
+ pxor %xmm1, %xmm1
|
||
+ pxor %xmm2, %xmm2
|
||
+ pxor %xmm3, %xmm3
|
||
+ movq %rdi, %rax
|
||
+ movq %rdi, %rcx
|
||
+ andq $4095, %rcx
|
||
+/* Offsets 4032-4047 will be aligned into 4032 thus fit into page. */
|
||
+ cmpq $4047, %rcx
|
||
+/* We cannot unify this branching as it would be ~6 cycles slower. */
|
||
+ ja L(cross_page)
|
||
+
|
||
+#ifdef AS_STRNLEN
|
||
+/* Test if end is among first 64 bytes. */
|
||
+# define STRNLEN_PROLOG \
|
||
+ mov %r11, %rsi; \
|
||
+ subq %rax, %rsi; \
|
||
+ andq $-64, %rax; \
|
||
+ testq $-64, %rsi; \
|
||
+ je L(strnlen_ret)
|
||
+#else
|
||
+# define STRNLEN_PROLOG andq $-64, %rax;
|
||
+#endif
|
||
+
|
||
+/* Ignore bits in mask that come before start of string. */
|
||
+#define PROLOG(lab) \
|
||
+ movq %rdi, %rcx; \
|
||
+ xorq %rax, %rcx; \
|
||
+ STRNLEN_PROLOG; \
|
||
+ sarq %cl, %rdx; \
|
||
+ test %rdx, %rdx; \
|
||
+ je L(lab); \
|
||
+ bsfq %rdx, %rax; \
|
||
+ SHIFT_RETURN; \
|
||
+ ret
|
||
+
|
||
+#ifdef AS_STRNLEN
|
||
+ andq $-16, %rax
|
||
+ FIND_ZERO
|
||
+#else
|
||
+ /* Test first 16 bytes unaligned. */
|
||
+ movdqu (%rax), %xmm4
|
||
+ PCMPEQ %xmm0, %xmm4
|
||
+ pmovmskb %xmm4, %edx
|
||
+ test %edx, %edx
|
||
+ je L(next48_bytes)
|
||
+ bsf %edx, %eax /* If eax is zeroed 16bit bsf can be used. */
|
||
+ SHIFT_RETURN
|
||
+ ret
|
||
+
|
||
+L(next48_bytes):
|
||
+/* Same as FIND_ZERO except we do not check first 16 bytes. */
|
||
+ andq $-16, %rax
|
||
+ PCMPEQ 16(%rax), %xmm1
|
||
+ PCMPEQ 32(%rax), %xmm2
|
||
+ PCMPEQ 48(%rax), %xmm3
|
||
+ pmovmskb %xmm1, %edx
|
||
+ pmovmskb %xmm2, %r8d
|
||
+ pmovmskb %xmm3, %ecx
|
||
+ salq $16, %rdx
|
||
+ salq $16, %rcx
|
||
+ orq %r8, %rcx
|
||
+ salq $32, %rcx
|
||
+ orq %rcx, %rdx
|
||
+#endif
|
||
+
|
||
+ /* When no zero byte is found xmm1-3 are zero so we do not have to
|
||
+ zero them. */
|
||
+ PROLOG(loop)
|
||
+
|
||
+ .p2align 4
|
||
+L(cross_page):
|
||
+ andq $-64, %rax
|
||
+ FIND_ZERO
|
||
+ PROLOG(loop_init)
|
||
+
|
||
+#ifdef AS_STRNLEN
|
||
+/* We must do this check to correctly handle strnlen (s, -1). */
|
||
+L(strnlen_ret):
|
||
+ bts %rsi, %rdx
|
||
+ sarq %cl, %rdx
|
||
+ test %rdx, %rdx
|
||
+ je L(loop_init)
|
||
+ bsfq %rdx, %rax
|
||
+ SHIFT_RETURN
|
||
+ ret
|
||
+#endif
|
||
+ .p2align 4
|
||
+L(loop_init):
|
||
+ pxor %xmm1, %xmm1
|
||
+ pxor %xmm2, %xmm2
|
||
+ pxor %xmm3, %xmm3
|
||
+#ifdef AS_STRNLEN
|
||
+ .p2align 4
|
||
+L(loop):
|
||
+
|
||
+ addq $64, %rax
|
||
+ cmpq %rax, %r10
|
||
+ je L(exit_end)
|
||
+
|
||
+ movdqa (%rax), %xmm0
|
||
+ PMINU 16(%rax), %xmm0
|
||
+ PMINU 32(%rax), %xmm0
|
||
+ PMINU 48(%rax), %xmm0
|
||
+ PCMPEQ %xmm3, %xmm0
|
||
+ pmovmskb %xmm0, %edx
|
||
+ testl %edx, %edx
|
||
+ jne L(exit)
|
||
+ jmp L(loop)
|
||
+
|
||
+ .p2align 4
|
||
+L(exit_end):
|
||
+ cmp %rax, %r11
|
||
+ je L(first) /* Do not read when end is at page boundary. */
|
||
+ pxor %xmm0, %xmm0
|
||
+ FIND_ZERO
|
||
+
|
||
+L(first):
|
||
+ bts %r11, %rdx
|
||
+ bsfq %rdx, %rdx
|
||
+ addq %rdx, %rax
|
||
+ subq %rdi, %rax
|
||
+ SHIFT_RETURN
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(exit):
|
||
+ pxor %xmm0, %xmm0
|
||
+ FIND_ZERO
|
||
+
|
||
+ bsfq %rdx, %rdx
|
||
+ addq %rdx, %rax
|
||
+ subq %rdi, %rax
|
||
+ SHIFT_RETURN
|
||
+ ret
|
||
+
|
||
+#else
|
||
+
|
||
+ /* Main loop. Unrolled twice to improve L2 cache performance on core2. */
|
||
+ .p2align 4
|
||
+L(loop):
|
||
+
|
||
+ movdqa 64(%rax), %xmm0
|
||
+ PMINU 80(%rax), %xmm0
|
||
+ PMINU 96(%rax), %xmm0
|
||
+ PMINU 112(%rax), %xmm0
|
||
+ PCMPEQ %xmm3, %xmm0
|
||
+ pmovmskb %xmm0, %edx
|
||
+ testl %edx, %edx
|
||
+ jne L(exit64)
|
||
+
|
||
+ subq $-128, %rax
|
||
+
|
||
+ movdqa (%rax), %xmm0
|
||
+ PMINU 16(%rax), %xmm0
|
||
+ PMINU 32(%rax), %xmm0
|
||
+ PMINU 48(%rax), %xmm0
|
||
+ PCMPEQ %xmm3, %xmm0
|
||
+ pmovmskb %xmm0, %edx
|
||
+ testl %edx, %edx
|
||
+ jne L(exit0)
|
||
+ jmp L(loop)
|
||
+
|
||
+ .p2align 4
|
||
+L(exit64):
|
||
+ addq $64, %rax
|
||
+L(exit0):
|
||
+ pxor %xmm0, %xmm0
|
||
+ FIND_ZERO
|
||
+
|
||
+ bsfq %rdx, %rdx
|
||
+ addq %rdx, %rax
|
||
+ subq %rdi, %rax
|
||
+ SHIFT_RETURN
|
||
+ ret
|
||
+
|
||
+#endif
|
||
+
|
||
+END(strlen)
|
||
diff --git a/sysdeps/x86_64/multiarch/strncat-avx2-rtm.S b/sysdeps/x86_64/multiarch/strncat-avx2-rtm.S
|
||
new file mode 100644
|
||
index 0000000000..0dcea18dbb
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/strncat-avx2-rtm.S
|
||
@@ -0,0 +1,3 @@
|
||
+#define USE_AS_STRNCAT
|
||
+#define STRCAT __strncat_avx2_rtm
|
||
+#include "strcat-avx2-rtm.S"
|
||
diff --git a/sysdeps/x86_64/multiarch/strncat-evex.S b/sysdeps/x86_64/multiarch/strncat-evex.S
|
||
new file mode 100644
|
||
index 0000000000..8884f02371
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/strncat-evex.S
|
||
@@ -0,0 +1,3 @@
|
||
+#define USE_AS_STRNCAT
|
||
+#define STRCAT __strncat_evex
|
||
+#include "strcat-evex.S"
|
||
diff --git a/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S
|
||
new file mode 100644
|
||
index 0000000000..37d1224bb9
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S
|
||
@@ -0,0 +1,3 @@
|
||
+#define STRCMP __strncmp_avx2_rtm
|
||
+#define USE_AS_STRNCMP 1
|
||
+#include "strcmp-avx2-rtm.S"
|
||
diff --git a/sysdeps/x86_64/multiarch/strncmp-evex.S b/sysdeps/x86_64/multiarch/strncmp-evex.S
|
||
new file mode 100644
|
||
index 0000000000..a1d53e8c9f
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/strncmp-evex.S
|
||
@@ -0,0 +1,3 @@
|
||
+#define STRCMP __strncmp_evex
|
||
+#define USE_AS_STRNCMP 1
|
||
+#include "strcmp-evex.S"
|
||
diff --git a/sysdeps/x86_64/multiarch/strncmp.c b/sysdeps/x86_64/multiarch/strncmp.c
|
||
index 3c94b3ffd9..7accba2b7c 100644
|
||
--- a/sysdeps/x86_64/multiarch/strncmp.c
|
||
+++ b/sysdeps/x86_64/multiarch/strncmp.c
|
||
@@ -30,16 +30,29 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
|
||
extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
|
||
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
|
||
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
|
||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
|
||
+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
|
||
|
||
static inline void *
|
||
IFUNC_SELECTOR (void)
|
||
{
|
||
const struct cpu_features* cpu_features = __get_cpu_features ();
|
||
|
||
- if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)
|
||
- && CPU_FEATURE_USABLE_P (cpu_features, AVX2)
|
||
+ if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
|
||
&& CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
|
||
- return OPTIMIZE (avx2);
|
||
+ {
|
||
+ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
|
||
+ && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
|
||
+ && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
|
||
+ && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_AVX2_STRCMP))
|
||
+ return OPTIMIZE (evex);
|
||
+
|
||
+ if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
|
||
+ return OPTIMIZE (avx2_rtm);
|
||
+
|
||
+ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
|
||
+ return OPTIMIZE (avx2);
|
||
+ }
|
||
|
||
if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2)
|
||
&& !CPU_FEATURES_ARCH_P (cpu_features, Slow_SSE4_2))
|
||
diff --git a/sysdeps/x86_64/multiarch/strncpy-avx2-rtm.S b/sysdeps/x86_64/multiarch/strncpy-avx2-rtm.S
|
||
new file mode 100644
|
||
index 0000000000..79e7083299
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/strncpy-avx2-rtm.S
|
||
@@ -0,0 +1,3 @@
|
||
+#define USE_AS_STRNCPY
|
||
+#define STRCPY __strncpy_avx2_rtm
|
||
+#include "strcpy-avx2-rtm.S"
|
||
diff --git a/sysdeps/x86_64/multiarch/strncpy-evex.S b/sysdeps/x86_64/multiarch/strncpy-evex.S
|
||
new file mode 100644
|
||
index 0000000000..40e391f0da
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/strncpy-evex.S
|
||
@@ -0,0 +1,3 @@
|
||
+#define USE_AS_STRNCPY
|
||
+#define STRCPY __strncpy_evex
|
||
+#include "strcpy-evex.S"
|
||
diff --git a/sysdeps/x86_64/multiarch/strnlen-avx2-rtm.S b/sysdeps/x86_64/multiarch/strnlen-avx2-rtm.S
|
||
new file mode 100644
|
||
index 0000000000..04f1626a5c
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/strnlen-avx2-rtm.S
|
||
@@ -0,0 +1,4 @@
|
||
+#define STRLEN __strnlen_avx2_rtm
|
||
+#define USE_AS_STRNLEN 1
|
||
+
|
||
+#include "strlen-avx2-rtm.S"
|
||
diff --git a/sysdeps/x86_64/multiarch/strnlen-evex.S b/sysdeps/x86_64/multiarch/strnlen-evex.S
|
||
new file mode 100644
|
||
index 0000000000..722022f303
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/strnlen-evex.S
|
||
@@ -0,0 +1,4 @@
|
||
+#define STRLEN __strnlen_evex
|
||
+#define USE_AS_STRNLEN 1
|
||
+
|
||
+#include "strlen-evex.S"
|
||
diff --git a/sysdeps/x86_64/multiarch/strrchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/strrchr-avx2-rtm.S
|
||
new file mode 100644
|
||
index 0000000000..5def14ec1c
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/strrchr-avx2-rtm.S
|
||
@@ -0,0 +1,12 @@
|
||
+#ifndef STRRCHR
|
||
+# define STRRCHR __strrchr_avx2_rtm
|
||
+#endif
|
||
+
|
||
+#define ZERO_UPPER_VEC_REGISTERS_RETURN \
|
||
+ ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
|
||
+
|
||
+#define VZEROUPPER_RETURN jmp L(return_vzeroupper)
|
||
+
|
||
+#define SECTION(p) p##.avx.rtm
|
||
+
|
||
+#include "strrchr-avx2.S"
|
||
diff --git a/sysdeps/x86_64/multiarch/strrchr-avx2.S b/sysdeps/x86_64/multiarch/strrchr-avx2.S
|
||
index 146bdd51d0..ad91fab991 100644
|
||
--- a/sysdeps/x86_64/multiarch/strrchr-avx2.S
|
||
+++ b/sysdeps/x86_64/multiarch/strrchr-avx2.S
|
||
@@ -36,9 +36,13 @@
|
||
# define VZEROUPPER vzeroupper
|
||
# endif
|
||
|
||
+# ifndef SECTION
|
||
+# define SECTION(p) p##.avx
|
||
+# endif
|
||
+
|
||
# define VEC_SIZE 32
|
||
|
||
- .section .text.avx,"ax",@progbits
|
||
+ .section SECTION(.text),"ax",@progbits
|
||
ENTRY (STRRCHR)
|
||
movd %esi, %xmm4
|
||
movl %edi, %ecx
|
||
@@ -166,8 +170,8 @@ L(return_value):
|
||
# endif
|
||
bsrl %eax, %eax
|
||
leaq -VEC_SIZE(%rdi, %rax), %rax
|
||
- VZEROUPPER
|
||
- ret
|
||
+L(return_vzeroupper):
|
||
+ ZERO_UPPER_VEC_REGISTERS_RETURN
|
||
|
||
.p2align 4
|
||
L(match):
|
||
@@ -198,8 +202,7 @@ L(find_nul):
|
||
jz L(return_value)
|
||
bsrl %eax, %eax
|
||
leaq -VEC_SIZE(%rdi, %rax), %rax
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
L(char_and_nul):
|
||
@@ -222,14 +225,12 @@ L(char_and_nul_in_first_vec):
|
||
jz L(return_null)
|
||
bsrl %eax, %eax
|
||
leaq -VEC_SIZE(%rdi, %rax), %rax
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
.p2align 4
|
||
L(return_null):
|
||
xorl %eax, %eax
|
||
- VZEROUPPER
|
||
- ret
|
||
+ VZEROUPPER_RETURN
|
||
|
||
END (STRRCHR)
|
||
#endif
|
||
diff --git a/sysdeps/x86_64/multiarch/strrchr-evex.S b/sysdeps/x86_64/multiarch/strrchr-evex.S
|
||
new file mode 100644
|
||
index 0000000000..f920b5a584
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/strrchr-evex.S
|
||
@@ -0,0 +1,265 @@
|
||
+/* strrchr/wcsrchr optimized with 256-bit EVEX instructions.
|
||
+ Copyright (C) 2021 Free Software Foundation, Inc.
|
||
+ This file is part of the GNU C Library.
|
||
+
|
||
+ The GNU C Library is free software; you can redistribute it and/or
|
||
+ modify it under the terms of the GNU Lesser General Public
|
||
+ License as published by the Free Software Foundation; either
|
||
+ version 2.1 of the License, or (at your option) any later version.
|
||
+
|
||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
+ Lesser General Public License for more details.
|
||
+
|
||
+ You should have received a copy of the GNU Lesser General Public
|
||
+ License along with the GNU C Library; if not, see
|
||
+ <https://www.gnu.org/licenses/>. */
|
||
+
|
||
+#if IS_IN (libc)
|
||
+
|
||
+# include <sysdep.h>
|
||
+
|
||
+# ifndef STRRCHR
|
||
+# define STRRCHR __strrchr_evex
|
||
+# endif
|
||
+
|
||
+# define VMOVU vmovdqu64
|
||
+# define VMOVA vmovdqa64
|
||
+
|
||
+# ifdef USE_AS_WCSRCHR
|
||
+# define VPBROADCAST vpbroadcastd
|
||
+# define VPCMP vpcmpd
|
||
+# define SHIFT_REG r8d
|
||
+# else
|
||
+# define VPBROADCAST vpbroadcastb
|
||
+# define VPCMP vpcmpb
|
||
+# define SHIFT_REG ecx
|
||
+# endif
|
||
+
|
||
+# define XMMZERO xmm16
|
||
+# define YMMZERO ymm16
|
||
+# define YMMMATCH ymm17
|
||
+# define YMM1 ymm18
|
||
+
|
||
+# define VEC_SIZE 32
|
||
+
|
||
+ .section .text.evex,"ax",@progbits
|
||
+ENTRY (STRRCHR)
|
||
+ movl %edi, %ecx
|
||
+ /* Broadcast CHAR to YMMMATCH. */
|
||
+ VPBROADCAST %esi, %YMMMATCH
|
||
+
|
||
+ vpxorq %XMMZERO, %XMMZERO, %XMMZERO
|
||
+
|
||
+ /* Check if we may cross page boundary with one vector load. */
|
||
+ andl $(2 * VEC_SIZE - 1), %ecx
|
||
+ cmpl $VEC_SIZE, %ecx
|
||
+ ja L(cros_page_boundary)
|
||
+
|
||
+ VMOVU (%rdi), %YMM1
|
||
+
|
||
+ /* Each bit in K0 represents a null byte in YMM1. */
|
||
+ VPCMP $0, %YMMZERO, %YMM1, %k0
|
||
+ /* Each bit in K1 represents a CHAR in YMM1. */
|
||
+ VPCMP $0, %YMMMATCH, %YMM1, %k1
|
||
+ kmovd %k0, %ecx
|
||
+ kmovd %k1, %eax
|
||
+
|
||
+ addq $VEC_SIZE, %rdi
|
||
+
|
||
+ testl %eax, %eax
|
||
+ jnz L(first_vec)
|
||
+
|
||
+ testl %ecx, %ecx
|
||
+ jnz L(return_null)
|
||
+
|
||
+ andq $-VEC_SIZE, %rdi
|
||
+ xorl %edx, %edx
|
||
+ jmp L(aligned_loop)
|
||
+
|
||
+ .p2align 4
|
||
+L(first_vec):
|
||
+ /* Check if there is a null byte. */
|
||
+ testl %ecx, %ecx
|
||
+ jnz L(char_and_nul_in_first_vec)
|
||
+
|
||
+ /* Remember the match and keep searching. */
|
||
+ movl %eax, %edx
|
||
+ movq %rdi, %rsi
|
||
+ andq $-VEC_SIZE, %rdi
|
||
+ jmp L(aligned_loop)
|
||
+
|
||
+ .p2align 4
|
||
+L(cros_page_boundary):
|
||
+ andl $(VEC_SIZE - 1), %ecx
|
||
+ andq $-VEC_SIZE, %rdi
|
||
+
|
||
+# ifdef USE_AS_WCSRCHR
|
||
+ /* NB: Divide shift count by 4 since each bit in K1 represent 4
|
||
+ bytes. */
|
||
+ movl %ecx, %SHIFT_REG
|
||
+ sarl $2, %SHIFT_REG
|
||
+# endif
|
||
+
|
||
+ VMOVA (%rdi), %YMM1
|
||
+
|
||
+ /* Each bit in K0 represents a null byte in YMM1. */
|
||
+ VPCMP $0, %YMMZERO, %YMM1, %k0
|
||
+ /* Each bit in K1 represents a CHAR in YMM1. */
|
||
+ VPCMP $0, %YMMMATCH, %YMM1, %k1
|
||
+ kmovd %k0, %edx
|
||
+ kmovd %k1, %eax
|
||
+
|
||
+ shrxl %SHIFT_REG, %edx, %edx
|
||
+ shrxl %SHIFT_REG, %eax, %eax
|
||
+ addq $VEC_SIZE, %rdi
|
||
+
|
||
+ /* Check if there is a CHAR. */
|
||
+ testl %eax, %eax
|
||
+ jnz L(found_char)
|
||
+
|
||
+ testl %edx, %edx
|
||
+ jnz L(return_null)
|
||
+
|
||
+ jmp L(aligned_loop)
|
||
+
|
||
+ .p2align 4
|
||
+L(found_char):
|
||
+ testl %edx, %edx
|
||
+ jnz L(char_and_nul)
|
||
+
|
||
+ /* Remember the match and keep searching. */
|
||
+ movl %eax, %edx
|
||
+ leaq (%rdi, %rcx), %rsi
|
||
+
|
||
+ .p2align 4
|
||
+L(aligned_loop):
|
||
+ VMOVA (%rdi), %YMM1
|
||
+ addq $VEC_SIZE, %rdi
|
||
+
|
||
+ /* Each bit in K0 represents a null byte in YMM1. */
|
||
+ VPCMP $0, %YMMZERO, %YMM1, %k0
|
||
+ /* Each bit in K1 represents a CHAR in YMM1. */
|
||
+ VPCMP $0, %YMMMATCH, %YMM1, %k1
|
||
+ kmovd %k0, %ecx
|
||
+ kmovd %k1, %eax
|
||
+ orl %eax, %ecx
|
||
+ jnz L(char_nor_null)
|
||
+
|
||
+ VMOVA (%rdi), %YMM1
|
||
+ add $VEC_SIZE, %rdi
|
||
+
|
||
+ /* Each bit in K0 represents a null byte in YMM1. */
|
||
+ VPCMP $0, %YMMZERO, %YMM1, %k0
|
||
+ /* Each bit in K1 represents a CHAR in YMM1. */
|
||
+ VPCMP $0, %YMMMATCH, %YMM1, %k1
|
||
+ kmovd %k0, %ecx
|
||
+ kmovd %k1, %eax
|
||
+ orl %eax, %ecx
|
||
+ jnz L(char_nor_null)
|
||
+
|
||
+ VMOVA (%rdi), %YMM1
|
||
+ addq $VEC_SIZE, %rdi
|
||
+
|
||
+ /* Each bit in K0 represents a null byte in YMM1. */
|
||
+ VPCMP $0, %YMMZERO, %YMM1, %k0
|
||
+ /* Each bit in K1 represents a CHAR in YMM1. */
|
||
+ VPCMP $0, %YMMMATCH, %YMM1, %k1
|
||
+ kmovd %k0, %ecx
|
||
+ kmovd %k1, %eax
|
||
+ orl %eax, %ecx
|
||
+ jnz L(char_nor_null)
|
||
+
|
||
+ VMOVA (%rdi), %YMM1
|
||
+ addq $VEC_SIZE, %rdi
|
||
+
|
||
+ /* Each bit in K0 represents a null byte in YMM1. */
|
||
+ VPCMP $0, %YMMZERO, %YMM1, %k0
|
||
+ /* Each bit in K1 represents a CHAR in YMM1. */
|
||
+ VPCMP $0, %YMMMATCH, %YMM1, %k1
|
||
+ kmovd %k0, %ecx
|
||
+ kmovd %k1, %eax
|
||
+ orl %eax, %ecx
|
||
+ jz L(aligned_loop)
|
||
+
|
||
+ .p2align 4
|
||
+L(char_nor_null):
|
||
+ /* Find a CHAR or a null byte in a loop. */
|
||
+ testl %eax, %eax
|
||
+ jnz L(match)
|
||
+L(return_value):
|
||
+ testl %edx, %edx
|
||
+ jz L(return_null)
|
||
+ movl %edx, %eax
|
||
+ movq %rsi, %rdi
|
||
+ bsrl %eax, %eax
|
||
+# ifdef USE_AS_WCSRCHR
|
||
+ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
|
||
+ leaq -VEC_SIZE(%rdi, %rax, 4), %rax
|
||
+# else
|
||
+ leaq -VEC_SIZE(%rdi, %rax), %rax
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(match):
|
||
+ /* Find a CHAR. Check if there is a null byte. */
|
||
+ kmovd %k0, %ecx
|
||
+ testl %ecx, %ecx
|
||
+ jnz L(find_nul)
|
||
+
|
||
+ /* Remember the match and keep searching. */
|
||
+ movl %eax, %edx
|
||
+ movq %rdi, %rsi
|
||
+ jmp L(aligned_loop)
|
||
+
|
||
+ .p2align 4
|
||
+L(find_nul):
|
||
+ /* Mask out any matching bits after the null byte. */
|
||
+ movl %ecx, %r8d
|
||
+ subl $1, %r8d
|
||
+ xorl %ecx, %r8d
|
||
+ andl %r8d, %eax
|
||
+ testl %eax, %eax
|
||
+ /* If there is no CHAR here, return the remembered one. */
|
||
+ jz L(return_value)
|
||
+ bsrl %eax, %eax
|
||
+# ifdef USE_AS_WCSRCHR
|
||
+ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
|
||
+ leaq -VEC_SIZE(%rdi, %rax, 4), %rax
|
||
+# else
|
||
+ leaq -VEC_SIZE(%rdi, %rax), %rax
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(char_and_nul):
|
||
+ /* Find both a CHAR and a null byte. */
|
||
+ addq %rcx, %rdi
|
||
+ movl %edx, %ecx
|
||
+L(char_and_nul_in_first_vec):
|
||
+ /* Mask out any matching bits after the null byte. */
|
||
+ movl %ecx, %r8d
|
||
+ subl $1, %r8d
|
||
+ xorl %ecx, %r8d
|
||
+ andl %r8d, %eax
|
||
+ testl %eax, %eax
|
||
+ /* Return null pointer if the null byte comes first. */
|
||
+ jz L(return_null)
|
||
+ bsrl %eax, %eax
|
||
+# ifdef USE_AS_WCSRCHR
|
||
+ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
|
||
+ leaq -VEC_SIZE(%rdi, %rax, 4), %rax
|
||
+# else
|
||
+ leaq -VEC_SIZE(%rdi, %rax), %rax
|
||
+# endif
|
||
+ ret
|
||
+
|
||
+ .p2align 4
|
||
+L(return_null):
|
||
+ xorl %eax, %eax
|
||
+ ret
|
||
+
|
||
+END (STRRCHR)
|
||
+#endif
|
||
diff --git a/sysdeps/x86_64/multiarch/wcschr-avx2-rtm.S b/sysdeps/x86_64/multiarch/wcschr-avx2-rtm.S
|
||
new file mode 100644
|
||
index 0000000000..d49dbbf0b4
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/wcschr-avx2-rtm.S
|
||
@@ -0,0 +1,3 @@
|
||
+#define STRCHR __wcschr_avx2_rtm
|
||
+#define USE_AS_WCSCHR 1
|
||
+#include "strchr-avx2-rtm.S"
|
||
diff --git a/sysdeps/x86_64/multiarch/wcschr-evex.S b/sysdeps/x86_64/multiarch/wcschr-evex.S
|
||
new file mode 100644
|
||
index 0000000000..7cb8f1e41a
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/wcschr-evex.S
|
||
@@ -0,0 +1,3 @@
|
||
+#define STRCHR __wcschr_evex
|
||
+#define USE_AS_WCSCHR 1
|
||
+#include "strchr-evex.S"
|
||
diff --git a/sysdeps/x86_64/multiarch/wcscmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/wcscmp-avx2-rtm.S
|
||
new file mode 100644
|
||
index 0000000000..d6ca2b8064
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/wcscmp-avx2-rtm.S
|
||
@@ -0,0 +1,4 @@
|
||
+#define STRCMP __wcscmp_avx2_rtm
|
||
+#define USE_AS_WCSCMP 1
|
||
+
|
||
+#include "strcmp-avx2-rtm.S"
|
||
diff --git a/sysdeps/x86_64/multiarch/wcscmp-evex.S b/sysdeps/x86_64/multiarch/wcscmp-evex.S
|
||
new file mode 100644
|
||
index 0000000000..42e73e51eb
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/wcscmp-evex.S
|
||
@@ -0,0 +1,4 @@
|
||
+#define STRCMP __wcscmp_evex
|
||
+#define USE_AS_WCSCMP 1
|
||
+
|
||
+#include "strcmp-evex.S"
|
||
diff --git a/sysdeps/x86_64/multiarch/wcslen-avx2-rtm.S b/sysdeps/x86_64/multiarch/wcslen-avx2-rtm.S
|
||
new file mode 100644
|
||
index 0000000000..35658d7365
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/wcslen-avx2-rtm.S
|
||
@@ -0,0 +1,4 @@
|
||
+#define STRLEN __wcslen_avx2_rtm
|
||
+#define USE_AS_WCSLEN 1
|
||
+
|
||
+#include "strlen-avx2-rtm.S"
|
||
diff --git a/sysdeps/x86_64/multiarch/wcslen-evex.S b/sysdeps/x86_64/multiarch/wcslen-evex.S
|
||
new file mode 100644
|
||
index 0000000000..bdafa83bd5
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/wcslen-evex.S
|
||
@@ -0,0 +1,4 @@
|
||
+#define STRLEN __wcslen_evex
|
||
+#define USE_AS_WCSLEN 1
|
||
+
|
||
+#include "strlen-evex.S"
|
||
diff --git a/sysdeps/x86_64/multiarch/wcslen-sse4_1.S b/sysdeps/x86_64/multiarch/wcslen-sse4_1.S
|
||
new file mode 100644
|
||
index 0000000000..7e62621afc
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/wcslen-sse4_1.S
|
||
@@ -0,0 +1,4 @@
|
||
+#define AS_WCSLEN
|
||
+#define strlen __wcslen_sse4_1
|
||
+
|
||
+#include "strlen-vec.S"
|
||
diff --git a/sysdeps/x86_64/multiarch/wcslen.c b/sysdeps/x86_64/multiarch/wcslen.c
|
||
index bb97438c7f..26b5fdffd6 100644
|
||
--- a/sysdeps/x86_64/multiarch/wcslen.c
|
||
+++ b/sysdeps/x86_64/multiarch/wcslen.c
|
||
@@ -24,7 +24,7 @@
|
||
# undef __wcslen
|
||
|
||
# define SYMBOL_NAME wcslen
|
||
-# include "ifunc-avx2.h"
|
||
+# include "ifunc-wcslen.h"
|
||
|
||
libc_ifunc_redirected (__redirect_wcslen, __wcslen, IFUNC_SELECTOR ());
|
||
weak_alias (__wcslen, wcslen);
|
||
diff --git a/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S
|
||
new file mode 100644
|
||
index 0000000000..4e88c70cc6
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S
|
||
@@ -0,0 +1,5 @@
|
||
+#define STRCMP __wcsncmp_avx2_rtm
|
||
+#define USE_AS_STRNCMP 1
|
||
+#define USE_AS_WCSCMP 1
|
||
+
|
||
+#include "strcmp-avx2-rtm.S"
|
||
diff --git a/sysdeps/x86_64/multiarch/wcsncmp-evex.S b/sysdeps/x86_64/multiarch/wcsncmp-evex.S
|
||
new file mode 100644
|
||
index 0000000000..8a8e310713
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/wcsncmp-evex.S
|
||
@@ -0,0 +1,5 @@
|
||
+#define STRCMP __wcsncmp_evex
|
||
+#define USE_AS_STRNCMP 1
|
||
+#define USE_AS_WCSCMP 1
|
||
+
|
||
+#include "strcmp-evex.S"
|
||
diff --git a/sysdeps/x86_64/multiarch/wcsnlen-avx2-rtm.S b/sysdeps/x86_64/multiarch/wcsnlen-avx2-rtm.S
|
||
new file mode 100644
|
||
index 0000000000..7437ebee2d
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/wcsnlen-avx2-rtm.S
|
||
@@ -0,0 +1,5 @@
|
||
+#define STRLEN __wcsnlen_avx2_rtm
|
||
+#define USE_AS_WCSLEN 1
|
||
+#define USE_AS_STRNLEN 1
|
||
+
|
||
+#include "strlen-avx2-rtm.S"
|
||
diff --git a/sysdeps/x86_64/multiarch/wcsnlen-evex.S b/sysdeps/x86_64/multiarch/wcsnlen-evex.S
|
||
new file mode 100644
|
||
index 0000000000..24773bb4e2
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/wcsnlen-evex.S
|
||
@@ -0,0 +1,5 @@
|
||
+#define STRLEN __wcsnlen_evex
|
||
+#define USE_AS_WCSLEN 1
|
||
+#define USE_AS_STRNLEN 1
|
||
+
|
||
+#include "strlen-evex.S"
|
||
diff --git a/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S b/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S
|
||
index a8cab0cb00..5fa51fe07c 100644
|
||
--- a/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S
|
||
+++ b/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S
|
||
@@ -2,4 +2,4 @@
|
||
#define AS_STRNLEN
|
||
#define strlen __wcsnlen_sse4_1
|
||
|
||
-#include "../strlen.S"
|
||
+#include "strlen-vec.S"
|
||
diff --git a/sysdeps/x86_64/multiarch/wcsnlen.c b/sysdeps/x86_64/multiarch/wcsnlen.c
|
||
index 52e7e5d4f3..f15c1b328b 100644
|
||
--- a/sysdeps/x86_64/multiarch/wcsnlen.c
|
||
+++ b/sysdeps/x86_64/multiarch/wcsnlen.c
|
||
@@ -24,27 +24,7 @@
|
||
# undef __wcsnlen
|
||
|
||
# define SYMBOL_NAME wcsnlen
|
||
-# include <init-arch.h>
|
||
-
|
||
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
|
||
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden;
|
||
-extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
|
||
-
|
||
-static inline void *
|
||
-IFUNC_SELECTOR (void)
|
||
-{
|
||
- const struct cpu_features* cpu_features = __get_cpu_features ();
|
||
-
|
||
- if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)
|
||
- && CPU_FEATURE_USABLE_P (cpu_features, AVX2)
|
||
- && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
|
||
- return OPTIMIZE (avx2);
|
||
-
|
||
- if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1))
|
||
- return OPTIMIZE (sse4_1);
|
||
-
|
||
- return OPTIMIZE (sse2);
|
||
-}
|
||
+# include "ifunc-wcslen.h"
|
||
|
||
libc_ifunc_redirected (__redirect_wcsnlen, __wcsnlen, IFUNC_SELECTOR ());
|
||
weak_alias (__wcsnlen, wcsnlen);
|
||
diff --git a/sysdeps/x86_64/multiarch/wcsrchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/wcsrchr-avx2-rtm.S
|
||
new file mode 100644
|
||
index 0000000000..9bf760833f
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/wcsrchr-avx2-rtm.S
|
||
@@ -0,0 +1,3 @@
|
||
+#define STRRCHR __wcsrchr_avx2_rtm
|
||
+#define USE_AS_WCSRCHR 1
|
||
+#include "strrchr-avx2-rtm.S"
|
||
diff --git a/sysdeps/x86_64/multiarch/wcsrchr-evex.S b/sysdeps/x86_64/multiarch/wcsrchr-evex.S
|
||
new file mode 100644
|
||
index 0000000000..c64602f7dc
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/wcsrchr-evex.S
|
||
@@ -0,0 +1,3 @@
|
||
+#define STRRCHR __wcsrchr_evex
|
||
+#define USE_AS_WCSRCHR 1
|
||
+#include "strrchr-evex.S"
|
||
diff --git a/sysdeps/x86_64/multiarch/wmemchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/wmemchr-avx2-rtm.S
|
||
new file mode 100644
|
||
index 0000000000..58ed21db01
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/wmemchr-avx2-rtm.S
|
||
@@ -0,0 +1,4 @@
|
||
+#define MEMCHR __wmemchr_avx2_rtm
|
||
+#define USE_AS_WMEMCHR 1
|
||
+
|
||
+#include "memchr-avx2-rtm.S"
|
||
diff --git a/sysdeps/x86_64/multiarch/wmemchr-evex.S b/sysdeps/x86_64/multiarch/wmemchr-evex.S
|
||
new file mode 100644
|
||
index 0000000000..06cd0f9f5a
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/wmemchr-evex.S
|
||
@@ -0,0 +1,4 @@
|
||
+#define MEMCHR __wmemchr_evex
|
||
+#define USE_AS_WMEMCHR 1
|
||
+
|
||
+#include "memchr-evex.S"
|
||
diff --git a/sysdeps/x86_64/multiarch/wmemcmp-avx2-movbe-rtm.S b/sysdeps/x86_64/multiarch/wmemcmp-avx2-movbe-rtm.S
|
||
new file mode 100644
|
||
index 0000000000..31104d1215
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/wmemcmp-avx2-movbe-rtm.S
|
||
@@ -0,0 +1,4 @@
|
||
+#define MEMCMP __wmemcmp_avx2_movbe_rtm
|
||
+#define USE_AS_WMEMCMP 1
|
||
+
|
||
+#include "memcmp-avx2-movbe-rtm.S"
|
||
diff --git a/sysdeps/x86_64/multiarch/wmemcmp-evex-movbe.S b/sysdeps/x86_64/multiarch/wmemcmp-evex-movbe.S
|
||
new file mode 100644
|
||
index 0000000000..4726d74aa1
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/multiarch/wmemcmp-evex-movbe.S
|
||
@@ -0,0 +1,4 @@
|
||
+#define MEMCMP __wmemcmp_evex_movbe
|
||
+#define USE_AS_WMEMCMP 1
|
||
+
|
||
+#include "memcmp-evex-movbe.S"
|
||
diff --git a/sysdeps/x86_64/strlen.S b/sysdeps/x86_64/strlen.S
|
||
index 2e226d0d55..8422c15cc8 100644
|
||
--- a/sysdeps/x86_64/strlen.S
|
||
+++ b/sysdeps/x86_64/strlen.S
|
||
@@ -1,5 +1,5 @@
|
||
-/* SSE2 version of strlen/wcslen.
|
||
- Copyright (C) 2012-2020 Free Software Foundation, Inc.
|
||
+/* SSE2 version of strlen.
|
||
+ Copyright (C) 2021 Free Software Foundation, Inc.
|
||
This file is part of the GNU C Library.
|
||
|
||
The GNU C Library is free software; you can redistribute it and/or
|
||
@@ -16,243 +16,6 @@
|
||
License along with the GNU C Library; if not, see
|
||
<https://www.gnu.org/licenses/>. */
|
||
|
||
-#include <sysdep.h>
|
||
+#include "multiarch/strlen-vec.S"
|
||
|
||
-#ifdef AS_WCSLEN
|
||
-# define PMINU pminud
|
||
-# define PCMPEQ pcmpeqd
|
||
-# define SHIFT_RETURN shrq $2, %rax
|
||
-#else
|
||
-# define PMINU pminub
|
||
-# define PCMPEQ pcmpeqb
|
||
-# define SHIFT_RETURN
|
||
-#endif
|
||
-
|
||
-/* Long lived register in strlen(s), strnlen(s, n) are:
|
||
-
|
||
- %xmm3 - zero
|
||
- %rdi - s
|
||
- %r10 (s+n) & (~(64-1))
|
||
- %r11 s+n
|
||
-*/
|
||
-
|
||
-
|
||
-.text
|
||
-ENTRY(strlen)
|
||
-
|
||
-/* Test 64 bytes from %rax for zero. Save result as bitmask in %rdx. */
|
||
-#define FIND_ZERO \
|
||
- PCMPEQ (%rax), %xmm0; \
|
||
- PCMPEQ 16(%rax), %xmm1; \
|
||
- PCMPEQ 32(%rax), %xmm2; \
|
||
- PCMPEQ 48(%rax), %xmm3; \
|
||
- pmovmskb %xmm0, %esi; \
|
||
- pmovmskb %xmm1, %edx; \
|
||
- pmovmskb %xmm2, %r8d; \
|
||
- pmovmskb %xmm3, %ecx; \
|
||
- salq $16, %rdx; \
|
||
- salq $16, %rcx; \
|
||
- orq %rsi, %rdx; \
|
||
- orq %r8, %rcx; \
|
||
- salq $32, %rcx; \
|
||
- orq %rcx, %rdx;
|
||
-
|
||
-#ifdef AS_STRNLEN
|
||
-/* Do not read anything when n==0. */
|
||
- test %RSI_LP, %RSI_LP
|
||
- jne L(n_nonzero)
|
||
- xor %rax, %rax
|
||
- ret
|
||
-L(n_nonzero):
|
||
-# ifdef AS_WCSLEN
|
||
- shl $2, %RSI_LP
|
||
-# endif
|
||
-
|
||
-/* Initialize long lived registers. */
|
||
-
|
||
- add %RDI_LP, %RSI_LP
|
||
- mov %RSI_LP, %R10_LP
|
||
- and $-64, %R10_LP
|
||
- mov %RSI_LP, %R11_LP
|
||
-#endif
|
||
-
|
||
- pxor %xmm0, %xmm0
|
||
- pxor %xmm1, %xmm1
|
||
- pxor %xmm2, %xmm2
|
||
- pxor %xmm3, %xmm3
|
||
- movq %rdi, %rax
|
||
- movq %rdi, %rcx
|
||
- andq $4095, %rcx
|
||
-/* Offsets 4032-4047 will be aligned into 4032 thus fit into page. */
|
||
- cmpq $4047, %rcx
|
||
-/* We cannot unify this branching as it would be ~6 cycles slower. */
|
||
- ja L(cross_page)
|
||
-
|
||
-#ifdef AS_STRNLEN
|
||
-/* Test if end is among first 64 bytes. */
|
||
-# define STRNLEN_PROLOG \
|
||
- mov %r11, %rsi; \
|
||
- subq %rax, %rsi; \
|
||
- andq $-64, %rax; \
|
||
- testq $-64, %rsi; \
|
||
- je L(strnlen_ret)
|
||
-#else
|
||
-# define STRNLEN_PROLOG andq $-64, %rax;
|
||
-#endif
|
||
-
|
||
-/* Ignore bits in mask that come before start of string. */
|
||
-#define PROLOG(lab) \
|
||
- movq %rdi, %rcx; \
|
||
- xorq %rax, %rcx; \
|
||
- STRNLEN_PROLOG; \
|
||
- sarq %cl, %rdx; \
|
||
- test %rdx, %rdx; \
|
||
- je L(lab); \
|
||
- bsfq %rdx, %rax; \
|
||
- SHIFT_RETURN; \
|
||
- ret
|
||
-
|
||
-#ifdef AS_STRNLEN
|
||
- andq $-16, %rax
|
||
- FIND_ZERO
|
||
-#else
|
||
- /* Test first 16 bytes unaligned. */
|
||
- movdqu (%rax), %xmm4
|
||
- PCMPEQ %xmm0, %xmm4
|
||
- pmovmskb %xmm4, %edx
|
||
- test %edx, %edx
|
||
- je L(next48_bytes)
|
||
- bsf %edx, %eax /* If eax is zeroed 16bit bsf can be used. */
|
||
- SHIFT_RETURN
|
||
- ret
|
||
-
|
||
-L(next48_bytes):
|
||
-/* Same as FIND_ZERO except we do not check first 16 bytes. */
|
||
- andq $-16, %rax
|
||
- PCMPEQ 16(%rax), %xmm1
|
||
- PCMPEQ 32(%rax), %xmm2
|
||
- PCMPEQ 48(%rax), %xmm3
|
||
- pmovmskb %xmm1, %edx
|
||
- pmovmskb %xmm2, %r8d
|
||
- pmovmskb %xmm3, %ecx
|
||
- salq $16, %rdx
|
||
- salq $16, %rcx
|
||
- orq %r8, %rcx
|
||
- salq $32, %rcx
|
||
- orq %rcx, %rdx
|
||
-#endif
|
||
-
|
||
- /* When no zero byte is found xmm1-3 are zero so we do not have to
|
||
- zero them. */
|
||
- PROLOG(loop)
|
||
-
|
||
- .p2align 4
|
||
-L(cross_page):
|
||
- andq $-64, %rax
|
||
- FIND_ZERO
|
||
- PROLOG(loop_init)
|
||
-
|
||
-#ifdef AS_STRNLEN
|
||
-/* We must do this check to correctly handle strnlen (s, -1). */
|
||
-L(strnlen_ret):
|
||
- bts %rsi, %rdx
|
||
- sarq %cl, %rdx
|
||
- test %rdx, %rdx
|
||
- je L(loop_init)
|
||
- bsfq %rdx, %rax
|
||
- SHIFT_RETURN
|
||
- ret
|
||
-#endif
|
||
- .p2align 4
|
||
-L(loop_init):
|
||
- pxor %xmm1, %xmm1
|
||
- pxor %xmm2, %xmm2
|
||
- pxor %xmm3, %xmm3
|
||
-#ifdef AS_STRNLEN
|
||
- .p2align 4
|
||
-L(loop):
|
||
-
|
||
- addq $64, %rax
|
||
- cmpq %rax, %r10
|
||
- je L(exit_end)
|
||
-
|
||
- movdqa (%rax), %xmm0
|
||
- PMINU 16(%rax), %xmm0
|
||
- PMINU 32(%rax), %xmm0
|
||
- PMINU 48(%rax), %xmm0
|
||
- PCMPEQ %xmm3, %xmm0
|
||
- pmovmskb %xmm0, %edx
|
||
- testl %edx, %edx
|
||
- jne L(exit)
|
||
- jmp L(loop)
|
||
-
|
||
- .p2align 4
|
||
-L(exit_end):
|
||
- cmp %rax, %r11
|
||
- je L(first) /* Do not read when end is at page boundary. */
|
||
- pxor %xmm0, %xmm0
|
||
- FIND_ZERO
|
||
-
|
||
-L(first):
|
||
- bts %r11, %rdx
|
||
- bsfq %rdx, %rdx
|
||
- addq %rdx, %rax
|
||
- subq %rdi, %rax
|
||
- SHIFT_RETURN
|
||
- ret
|
||
-
|
||
- .p2align 4
|
||
-L(exit):
|
||
- pxor %xmm0, %xmm0
|
||
- FIND_ZERO
|
||
-
|
||
- bsfq %rdx, %rdx
|
||
- addq %rdx, %rax
|
||
- subq %rdi, %rax
|
||
- SHIFT_RETURN
|
||
- ret
|
||
-
|
||
-#else
|
||
-
|
||
- /* Main loop. Unrolled twice to improve L2 cache performance on core2. */
|
||
- .p2align 4
|
||
-L(loop):
|
||
-
|
||
- movdqa 64(%rax), %xmm0
|
||
- PMINU 80(%rax), %xmm0
|
||
- PMINU 96(%rax), %xmm0
|
||
- PMINU 112(%rax), %xmm0
|
||
- PCMPEQ %xmm3, %xmm0
|
||
- pmovmskb %xmm0, %edx
|
||
- testl %edx, %edx
|
||
- jne L(exit64)
|
||
-
|
||
- subq $-128, %rax
|
||
-
|
||
- movdqa (%rax), %xmm0
|
||
- PMINU 16(%rax), %xmm0
|
||
- PMINU 32(%rax), %xmm0
|
||
- PMINU 48(%rax), %xmm0
|
||
- PCMPEQ %xmm3, %xmm0
|
||
- pmovmskb %xmm0, %edx
|
||
- testl %edx, %edx
|
||
- jne L(exit0)
|
||
- jmp L(loop)
|
||
-
|
||
- .p2align 4
|
||
-L(exit64):
|
||
- addq $64, %rax
|
||
-L(exit0):
|
||
- pxor %xmm0, %xmm0
|
||
- FIND_ZERO
|
||
-
|
||
- bsfq %rdx, %rdx
|
||
- addq %rdx, %rax
|
||
- subq %rdi, %rax
|
||
- SHIFT_RETURN
|
||
- ret
|
||
-
|
||
-#endif
|
||
-
|
||
-END(strlen)
|
||
libc_hidden_builtin_def (strlen)
|
||
diff --git a/sysdeps/x86_64/sysdep.h b/sysdeps/x86_64/sysdep.h
|
||
index 0b73674f68..c8ad778fee 100644
|
||
--- a/sysdeps/x86_64/sysdep.h
|
||
+++ b/sysdeps/x86_64/sysdep.h
|
||
@@ -95,6 +95,28 @@ lose: \
|
||
#define R14_LP r14
|
||
#define R15_LP r15
|
||
|
||
+/* Zero upper vector registers and return with xtest. NB: Use VZEROALL
|
||
+ to avoid RTM abort triggered by VZEROUPPER inside transactionally. */
|
||
+#define ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST \
|
||
+ xtest; \
|
||
+ jz 1f; \
|
||
+ vzeroall; \
|
||
+ ret; \
|
||
+1: \
|
||
+ vzeroupper; \
|
||
+ ret
|
||
+
|
||
+/* Zero upper vector registers and return. */
|
||
+#ifndef ZERO_UPPER_VEC_REGISTERS_RETURN
|
||
+# define ZERO_UPPER_VEC_REGISTERS_RETURN \
|
||
+ VZEROUPPER; \
|
||
+ ret
|
||
+#endif
|
||
+
|
||
+#ifndef VZEROUPPER_RETURN
|
||
+# define VZEROUPPER_RETURN VZEROUPPER; ret
|
||
+#endif
|
||
+
|
||
#else /* __ASSEMBLER__ */
|
||
|
||
/* Long and pointer size in bytes. */
|
||
diff --git a/sysdeps/x86_64/tst-rsi-strlen.c b/sysdeps/x86_64/tst-rsi-strlen.c
|
||
new file mode 100644
|
||
index 0000000000..a80c4f85c2
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/tst-rsi-strlen.c
|
||
@@ -0,0 +1,81 @@
|
||
+/* Test strlen with 0 in the RSI register.
|
||
+ Copyright (C) 2021 Free Software Foundation, Inc.
|
||
+ This file is part of the GNU C Library.
|
||
+
|
||
+ The GNU C Library is free software; you can redistribute it and/or
|
||
+ modify it under the terms of the GNU Lesser General Public
|
||
+ License as published by the Free Software Foundation; either
|
||
+ version 2.1 of the License, or (at your option) any later version.
|
||
+
|
||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
+ Lesser General Public License for more details.
|
||
+
|
||
+ You should have received a copy of the GNU Lesser General Public
|
||
+ License along with the GNU C Library; if not, see
|
||
+ <https://www.gnu.org/licenses/>. */
|
||
+
|
||
+#ifdef WIDE
|
||
+# define TEST_NAME "wcslen"
|
||
+#else
|
||
+# define TEST_NAME "strlen"
|
||
+#endif /* WIDE */
|
||
+
|
||
+#define TEST_MAIN
|
||
+#include <string/test-string.h>
|
||
+
|
||
+#ifdef WIDE
|
||
+# include <wchar.h>
|
||
+# define STRLEN wcslen
|
||
+# define CHAR wchar_t
|
||
+#else
|
||
+# define STRLEN strlen
|
||
+# define CHAR char
|
||
+#endif /* WIDE */
|
||
+
|
||
+IMPL (STRLEN, 1)
|
||
+
|
||
+typedef size_t (*proto_t) (const CHAR *);
|
||
+
|
||
+typedef struct
|
||
+{
|
||
+ void (*fn) (void);
|
||
+} parameter_t;
|
||
+
|
||
+size_t
|
||
+__attribute__ ((weak, noinline, noclone))
|
||
+do_strlen (parameter_t *a, int zero, const CHAR *str)
|
||
+{
|
||
+ return CALL (a, str);
|
||
+}
|
||
+
|
||
+static int
|
||
+test_main (void)
|
||
+{
|
||
+ test_init ();
|
||
+
|
||
+ size_t size = page_size / sizeof (CHAR) - 1;
|
||
+ CHAR *buf = (CHAR *) buf2;
|
||
+ buf[size] = 0;
|
||
+
|
||
+ parameter_t a;
|
||
+
|
||
+ int ret = 0;
|
||
+ FOR_EACH_IMPL (impl, 0)
|
||
+ {
|
||
+ a.fn = impl->fn;
|
||
+ /* NB: Pass 0 in RSI. */
|
||
+ size_t res = do_strlen (&a, 0, buf);
|
||
+ if (res != size)
|
||
+ {
|
||
+ error (0, 0, "Wrong result in function %s: %zu != %zu",
|
||
+ impl->name, res, size);
|
||
+ ret = 1;
|
||
+ }
|
||
+ }
|
||
+
|
||
+ return ret ? EXIT_FAILURE : EXIT_SUCCESS;
|
||
+}
|
||
+
|
||
+#include <support/test-driver.c>
|
||
diff --git a/sysdeps/x86_64/tst-rsi-wcslen.c b/sysdeps/x86_64/tst-rsi-wcslen.c
|
||
new file mode 100644
|
||
index 0000000000..f45a7dfb51
|
||
--- /dev/null
|
||
+++ b/sysdeps/x86_64/tst-rsi-wcslen.c
|
||
@@ -0,0 +1,20 @@
|
||
+/* Test wcslen with 0 in the RSI register.
|
||
+ Copyright (C) 2021 Free Software Foundation, Inc.
|
||
+ This file is part of the GNU C Library.
|
||
+
|
||
+ The GNU C Library is free software; you can redistribute it and/or
|
||
+ modify it under the terms of the GNU Lesser General Public
|
||
+ License as published by the Free Software Foundation; either
|
||
+ version 2.1 of the License, or (at your option) any later version.
|
||
+
|
||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
+ Lesser General Public License for more details.
|
||
+
|
||
+ You should have received a copy of the GNU Lesser General Public
|
||
+ License along with the GNU C Library; if not, see
|
||
+ <https://www.gnu.org/licenses/>. */
|
||
+
|
||
+#define WIDE 1
|
||
+#include "tst-rsi-strlen.c"
|
||
diff --git a/sysvipc/test-sysvsem.c b/sysvipc/test-sysvsem.c
|
||
index 01dbff343a..b7284e0b48 100644
|
||
--- a/sysvipc/test-sysvsem.c
|
||
+++ b/sysvipc/test-sysvsem.c
|
||
@@ -20,6 +20,7 @@
|
||
#include <stdlib.h>
|
||
#include <errno.h>
|
||
#include <string.h>
|
||
+#include <stdbool.h>
|
||
#include <sys/types.h>
|
||
#include <sys/ipc.h>
|
||
#include <sys/sem.h>
|
||
diff --git a/version.h b/version.h
|
||
index 83cd196798..e6ca7a8857 100644
|
||
--- a/version.h
|
||
+++ b/version.h
|
||
@@ -1,4 +1,4 @@
|
||
/* This file just defines the current version number of libc. */
|
||
|
||
-#define RELEASE "release"
|
||
+#define RELEASE "stable"
|
||
#define VERSION "2.32"
|
||
diff -pruN glibc-2.32.orig/sysdeps/unix/sysv/linux/x86_64/64/configure glibc-2.32/sysdeps/unix/sysv/linux/x86_64/64/configure
|
||
--- glibc-2.32.orig/sysdeps/unix/sysv/linux/x86_64/64/configure 2021-09-18 21:02:32.741186019 +1000
|
||
+++ glibc-2.32/sysdeps/unix/sysv/linux/x86_64/64/configure 2021-09-18 21:03:05.314302356 +1000
|
||
@@ -4,10 +4,10 @@
|
||
test -n "$libc_cv_slibdir" ||
|
||
case "$prefix" in
|
||
/usr | /usr/)
|
||
- libc_cv_slibdir='/lib64'
|
||
- libc_cv_rtlddir='/lib64'
|
||
+ libc_cv_slibdir='/lib'
|
||
+ libc_cv_rtlddir='/lib'
|
||
if test "$libdir" = '${exec_prefix}/lib'; then
|
||
- libdir='${exec_prefix}/lib64';
|
||
+ libdir='${exec_prefix}/lib';
|
||
# Locale data can be shared between 32-bit and 64-bit libraries.
|
||
libc_cv_complocaledir='${exec_prefix}/lib/locale'
|
||
fi
|
||
diff -pruN glibc-2.32.orig/sysdeps/unix/sysv/linux/x86_64/ldconfig.h glibc-2.32/sysdeps/unix/sysv/linux/x86_64/ldconfig.h
|
||
--- glibc-2.32.orig/sysdeps/unix/sysv/linux/x86_64/ldconfig.h 2021-09-18 21:02:32.742186053 +1000
|
||
+++ glibc-2.32/sysdeps/unix/sysv/linux/x86_64/ldconfig.h 2021-09-18 21:03:05.314302356 +1000
|
||
@@ -18,9 +18,9 @@
|
||
#include <sysdeps/generic/ldconfig.h>
|
||
|
||
#define SYSDEP_KNOWN_INTERPRETER_NAMES \
|
||
- { "/lib/ld-linux.so.2", FLAG_ELF_LIBC6 }, \
|
||
+ { "/lib32/ld-linux.so.2", FLAG_ELF_LIBC6 }, \
|
||
{ "/libx32/ld-linux-x32.so.2", FLAG_ELF_LIBC6 }, \
|
||
- { "/lib64/ld-linux-x86-64.so.2", FLAG_ELF_LIBC6 },
|
||
+ { "/lib/ld-linux-x86-64.so.2", FLAG_ELF_LIBC6 },
|
||
#define SYSDEP_KNOWN_LIBRARY_NAMES \
|
||
{ "libc.so.6", FLAG_ELF_LIBC6 }, \
|
||
{ "libm.so.6", FLAG_ELF_LIBC6 },
|