glibc/sysdeps/x86_64/multiarch/memcpy-avx-unaligned.S

377 lines
8.3 KiB
ArmAsm
Raw Normal View History

/* memcpy with AVX
Copyright (C) 2014-2015 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
Remove NOT_IN_libc Replace with !IS_IN (libc). This completes the transition from the IS_IN/NOT_IN macros to the IN_MODULE macro set. The generated code is unchanged on x86_64. * stdlib/isomac.c (fmt): Replace NOT_IN_libc with IN_MODULE. (get_null_defines): Adjust. * sunrpc/Makefile: Adjust comment. * Makerules (CPPFLAGS-nonlib): Remove NOT_IN_libc. * elf/Makefile (CPPFLAGS-sotruss-lib): Likewise. (CFLAGS-interp.c): Likewise. (CFLAGS-ldconfig.c): Likewise. (CPPFLAGS-.os): Likewise. * elf/rtld-Rules (rtld-CPPFLAGS): Likewise. * extra-lib.mk (CPPFLAGS-$(lib)): Likewise. * extra-modules.mk (extra-modules.mk): Likewise. * iconv/Makefile (CPPFLAGS-iconvprogs): Likewise. * locale/Makefile (CPPFLAGS-locale_programs): Likewise. * malloc/Makefile (CPPFLAGS-memusagestat): Likewise. * nscd/Makefile (CPPFLAGS-nscd): Likewise. * nss/Makefile (CPPFLAGS-nss_test1): Likewise. * stdlib/Makefile (CFLAGS-tst-putenvmod.c): Likewise. * sysdeps/gnu/Makefile ($(objpfx)errlist-compat.c): Likewise. * sysdeps/unix/sysv/linux/Makefile (CPPFLAGS-lddlibc4): Likewise. * iconvdata/Makefile (CPPFLAGS): Likewise. (cpp-srcs-left): Add libof for all iconvdata routines. * bits/stdio-lock.h: Replace NOT_IN_libc with IS_IN. * include/assert.h: Likewise. * include/ctype.h: Likewise. * include/errno.h: Likewise. * include/libc-symbols.h: Likewise. * include/math.h: Likewise. * include/netdb.h: Likewise. * include/resolv.h: Likewise. * include/stdio.h: Likewise. * include/stdlib.h: Likewise. * include/string.h: Likewise. * include/sys/stat.h: Likewise. * include/wctype.h: Likewise. * intl/l10nflist.c: Likewise. * libidn/idn-stub.c: Likewise. * libio/libioP.h: Likewise. * nptl/libc_multiple_threads.c: Likewise. * nptl/pthreadP.h: Likewise. * posix/regex_internal.h: Likewise. * resolv/res_hconf.c: Likewise. * sysdeps/arm/armv7/multiarch/memcpy.S: Likewise. * sysdeps/arm/memmove.S: Likewise. * sysdeps/arm/sysdep.h: Likewise. * sysdeps/generic/_itoa.h: Likewise. * sysdeps/generic/symbol-hacks.h: Likewise. * sysdeps/gnu/errlist.awk: Likewise. * sysdeps/gnu/errlist.c: Likewise. * sysdeps/i386/i586/memcpy.S: Likewise. * sysdeps/i386/i586/memset.S: Likewise. * sysdeps/i386/i686/memcpy.S: Likewise. * sysdeps/i386/i686/memmove.S: Likewise. * sysdeps/i386/i686/mempcpy.S: Likewise. * sysdeps/i386/i686/memset.S: Likewise. * sysdeps/i386/i686/multiarch/bcopy.S: Likewise. * sysdeps/i386/i686/multiarch/bzero.S: Likewise. * sysdeps/i386/i686/multiarch/memchr-sse2-bsf.S: Likewise. * sysdeps/i386/i686/multiarch/memchr-sse2.S: Likewise. * sysdeps/i386/i686/multiarch/memchr.S: Likewise. * sysdeps/i386/i686/multiarch/memcmp-sse4.S: Likewise. * sysdeps/i386/i686/multiarch/memcmp-ssse3.S: Likewise. * sysdeps/i386/i686/multiarch/memcmp.S: Likewise. * sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S: Likewise. * sysdeps/i386/i686/multiarch/memcpy-ssse3.S: Likewise. * sysdeps/i386/i686/multiarch/memcpy.S: Likewise. * sysdeps/i386/i686/multiarch/memcpy_chk.S: Likewise. * sysdeps/i386/i686/multiarch/memmove.S: Likewise. * sysdeps/i386/i686/multiarch/memmove_chk.S: Likewise. * sysdeps/i386/i686/multiarch/mempcpy.S: Likewise. * sysdeps/i386/i686/multiarch/mempcpy_chk.S: Likewise. * sysdeps/i386/i686/multiarch/memrchr-c.c: Likewise. * sysdeps/i386/i686/multiarch/memrchr-sse2-bsf.S: Likewise. * sysdeps/i386/i686/multiarch/memrchr-sse2.S: Likewise. * sysdeps/i386/i686/multiarch/memrchr.S: Likewise. * sysdeps/i386/i686/multiarch/memset-sse2-rep.S: Likewise. * sysdeps/i386/i686/multiarch/memset-sse2.S: Likewise. * sysdeps/i386/i686/multiarch/memset.S: Likewise. * sysdeps/i386/i686/multiarch/memset_chk.S: Likewise. * sysdeps/i386/i686/multiarch/rawmemchr.S: Likewise. * sysdeps/i386/i686/multiarch/strcat-sse2.S: Likewise. * sysdeps/i386/i686/multiarch/strcat-ssse3.S: Likewise. * sysdeps/i386/i686/multiarch/strcat.S: Likewise. * sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S: Likewise. * sysdeps/i386/i686/multiarch/strchr-sse2.S: Likewise. * sysdeps/i386/i686/multiarch/strchr.S: Likewise. * sysdeps/i386/i686/multiarch/strcmp-sse4.S: Likewise. * sysdeps/i386/i686/multiarch/strcmp-ssse3.S: Likewise. * sysdeps/i386/i686/multiarch/strcmp.S: Likewise. * sysdeps/i386/i686/multiarch/strcpy-sse2.S: Likewise. * sysdeps/i386/i686/multiarch/strcpy-ssse3.S: Likewise. * sysdeps/i386/i686/multiarch/strcpy.S: Likewise. * sysdeps/i386/i686/multiarch/strcspn.S: Likewise. * sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S: Likewise. * sysdeps/i386/i686/multiarch/strlen-sse2.S: Likewise. * sysdeps/i386/i686/multiarch/strlen.S: Likewise. * sysdeps/i386/i686/multiarch/strnlen.S: Likewise. * sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S: Likewise. * sysdeps/i386/i686/multiarch/strrchr-sse2.S: Likewise. * sysdeps/i386/i686/multiarch/strrchr.S: Likewise. * sysdeps/i386/i686/multiarch/strspn.S: Likewise. * sysdeps/i386/i686/multiarch/wcschr-c.c: Likewise. * sysdeps/i386/i686/multiarch/wcschr-sse2.S: Likewise. * sysdeps/i386/i686/multiarch/wcschr.S: Likewise. * sysdeps/i386/i686/multiarch/wcscmp-sse2.S: Likewise. * sysdeps/i386/i686/multiarch/wcscmp.S: Likewise. * sysdeps/i386/i686/multiarch/wcscpy-c.c: Likewise. * sysdeps/i386/i686/multiarch/wcscpy-ssse3.S: Likewise. * sysdeps/i386/i686/multiarch/wcscpy.S: Likewise. * sysdeps/i386/i686/multiarch/wcslen-c.c: Likewise. * sysdeps/i386/i686/multiarch/wcslen-sse2.S: Likewise. * sysdeps/i386/i686/multiarch/wcslen.S: Likewise. * sysdeps/i386/i686/multiarch/wcsrchr-c.c: Likewise. * sysdeps/i386/i686/multiarch/wcsrchr-sse2.S: Likewise. * sysdeps/i386/i686/multiarch/wcsrchr.S: Likewise. * sysdeps/i386/i686/multiarch/wmemcmp-c.c: Likewise. * sysdeps/i386/i686/multiarch/wmemcmp.S: Likewise. * sysdeps/ia64/fpu/libm-symbols.h: Likewise. * sysdeps/nptl/bits/libc-lock.h: Likewise. * sysdeps/nptl/bits/libc-lockP.h: Likewise. * sysdeps/nptl/bits/stdio-lock.h: Likewise. * sysdeps/posix/closedir.c: Likewise. * sysdeps/posix/opendir.c: Likewise. * sysdeps/posix/readdir.c: Likewise. * sysdeps/posix/rewinddir.c: Likewise. * sysdeps/powerpc/novmx-sigjmp.c: Likewise. * sysdeps/powerpc/powerpc32/__longjmp.S: Likewise. * sysdeps/powerpc/powerpc32/bsd-_setjmp.S: Likewise. * sysdeps/powerpc/powerpc32/fpu/__longjmp.S: Likewise. * sysdeps/powerpc/powerpc32/fpu/setjmp.S: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/bzero.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/memchr.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/memcmp-ppc32.S: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/memcmp.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/memcpy-ppc32.S: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/memcpy.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/memmove.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/mempcpy.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/memrchr-ppc32.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/memrchr.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/memset-ppc32.S: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/memset.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/rawmemchr.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/strcasecmp.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/strcasecmp_l.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/strchr.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/strchrnul.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/strlen-ppc32.S: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/strlen.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/strncase.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/strncase_l.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/strncmp-ppc32.S: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/strncmp.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/strnlen.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/wcschr-ppc32.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/wcschr.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy-ppc32.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/wcsrchr-ppc32.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/wcsrchr.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/wordcopy.c: Likewise. * sysdeps/powerpc/powerpc32/power6/memset.S: Likewise. * sysdeps/powerpc/powerpc32/setjmp.S: Likewise. * sysdeps/powerpc/powerpc64/__longjmp.S: Likewise. * sysdeps/powerpc/powerpc64/multiarch/bzero.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/memchr.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/memcmp-ppc64.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/memcmp.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/memcpy-ppc64.S: Likewise. * sysdeps/powerpc/powerpc64/multiarch/memcpy.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/memmove-ppc64.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/memmove.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/mempcpy.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/memrchr.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/memset-ppc64.S: Likewise. * sysdeps/powerpc/powerpc64/multiarch/memset.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/rawmemchr.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/stpcpy-ppc64.S: Likewise. * sysdeps/powerpc/powerpc64/multiarch/stpcpy.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/stpncpy.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strcasecmp_l.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strcat.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strchr.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strchrnul.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strcmp-ppc64.S: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strcmp.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strcpy-ppc64.S: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strcpy.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strcspn.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strlen-ppc64.S: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strlen.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strncase.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strncase_l.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strncat.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strncmp-ppc64.S: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strncmp.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strncpy-ppc64.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strncpy.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strnlen.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strpbrk.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strrchr-ppc64.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strrchr.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strspn-ppc64.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strspn.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/wcschr.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/wcscpy.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/wcsrchr.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/wordcopy.c: Likewise. * sysdeps/powerpc/powerpc64/setjmp.S: Likewise. * sysdeps/s390/s390-32/multiarch/ifunc-resolve.c: Likewise. * sysdeps/s390/s390-32/multiarch/memcmp.S: Likewise. * sysdeps/s390/s390-32/multiarch/memcpy.S: Likewise. * sysdeps/s390/s390-32/multiarch/memset.S: Likewise. * sysdeps/s390/s390-64/multiarch/ifunc-resolve.c: Likewise. * sysdeps/s390/s390-64/multiarch/memcmp.S: Likewise. * sysdeps/s390/s390-64/multiarch/memcpy.S: Likewise. * sysdeps/s390/s390-64/multiarch/memset.S: Likewise. * sysdeps/sparc/sparc64/multiarch/memcpy-niagara1.S: Likewise. * sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S: Likewise. * sysdeps/sparc/sparc64/multiarch/memcpy-niagara4.S: Likewise. * sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S: Likewise. * sysdeps/sparc/sparc64/multiarch/memcpy.S: Likewise. * sysdeps/sparc/sparc64/multiarch/memset-niagara1.S: Likewise. * sysdeps/sparc/sparc64/multiarch/memset-niagara4.S: Likewise. * sysdeps/sparc/sparc64/multiarch/memset.S: Likewise. * sysdeps/unix/alpha/sysdep.S: Likewise. * sysdeps/unix/alpha/sysdep.h: Likewise. * sysdeps/unix/make-syscalls.sh: Likewise. * sysdeps/unix/sysv/linux/aarch64/sysdep-cancel.h: Likewise. * sysdeps/unix/sysv/linux/aarch64/sysdep.h: Likewise. * sysdeps/unix/sysv/linux/alpha/sysdep-cancel.h: Likewise. * sysdeps/unix/sysv/linux/alpha/vfork.S: Likewise. * sysdeps/unix/sysv/linux/arm/sysdep-cancel.h: Likewise. * sysdeps/unix/sysv/linux/arm/sysdep.h: Likewise. * sysdeps/unix/sysv/linux/getpid.c: Likewise. * sysdeps/unix/sysv/linux/hppa/nptl/lowlevellock.h: Likewise. * sysdeps/unix/sysv/linux/hppa/nptl/sysdep-cancel.h: Likewise. * sysdeps/unix/sysv/linux/i386/i486/lowlevellock.S: Likewise. * sysdeps/unix/sysv/linux/i386/lowlevellock.h: Likewise. * sysdeps/unix/sysv/linux/i386/sysdep-cancel.h: Likewise. * sysdeps/unix/sysv/linux/i386/sysdep.h: Likewise. * sysdeps/unix/sysv/linux/ia64/lowlevellock.h: Likewise. * sysdeps/unix/sysv/linux/ia64/sysdep-cancel.h: Likewise. * sysdeps/unix/sysv/linux/ia64/sysdep.S: Likewise. * sysdeps/unix/sysv/linux/ia64/sysdep.h: Likewise. * sysdeps/unix/sysv/linux/lowlevellock-futex.h: Likewise. * sysdeps/unix/sysv/linux/m68k/bits/m68k-vdso.h: Likewise. * sysdeps/unix/sysv/linux/m68k/sysdep-cancel.h: Likewise. * sysdeps/unix/sysv/linux/m68k/sysdep.h: Likewise. * sysdeps/unix/sysv/linux/microblaze/lowlevellock.h: Likewise. * sysdeps/unix/sysv/linux/microblaze/sysdep-cancel.h: Likewise. * sysdeps/unix/sysv/linux/mips/mips64/sysdep-cancel.h: Likewise. * sysdeps/unix/sysv/linux/mips/sysdep-cancel.h: Likewise. * sysdeps/unix/sysv/linux/not-cancel.h: Likewise. * sysdeps/unix/sysv/linux/powerpc/lowlevellock.h: Likewise. * sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep-cancel.h: Likewise. * sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep-cancel.h: Likewise. * sysdeps/unix/sysv/linux/s390/longjmp_chk.c: Likewise. * sysdeps/unix/sysv/linux/s390/lowlevellock.h: Likewise. * sysdeps/unix/sysv/linux/s390/s390-32/sysdep-cancel.h: Likewise. * sysdeps/unix/sysv/linux/s390/s390-32/sysdep.S: Likewise. * sysdeps/unix/sysv/linux/s390/s390-32/sysdep.h: Likewise. * sysdeps/unix/sysv/linux/s390/s390-32/vfork.S: Likewise. * sysdeps/unix/sysv/linux/s390/s390-64/sysdep-cancel.h: Likewise. * sysdeps/unix/sysv/linux/s390/s390-64/sysdep.S: Likewise. * sysdeps/unix/sysv/linux/s390/s390-64/sysdep.h: Likewise. * sysdeps/unix/sysv/linux/s390/s390-64/vfork.S: Likewise. * sysdeps/unix/sysv/linux/sh/lowlevellock.S: Likewise. * sysdeps/unix/sysv/linux/sh/lowlevellock.h: Likewise. * sysdeps/unix/sysv/linux/sh/sysdep-cancel.h: Likewise. * sysdeps/unix/sysv/linux/sh/sysdep.h: Likewise. * sysdeps/unix/sysv/linux/sh/vfork.S: Likewise. * sysdeps/unix/sysv/linux/sparc/lowlevellock.h: Likewise. * sysdeps/unix/sysv/linux/sparc/sparc32/sysdep-cancel.h: Likewise. * sysdeps/unix/sysv/linux/sparc/sparc32/sysdep.h: Likewise. * sysdeps/unix/sysv/linux/sparc/sparc64/brk.S: Likewise. * sysdeps/unix/sysv/linux/sparc/sparc64/sysdep-cancel.h: Likewise. * sysdeps/unix/sysv/linux/sparc/sparc64/sysdep.h: Likewise. * sysdeps/unix/sysv/linux/tile/lowlevellock.h: Likewise. * sysdeps/unix/sysv/linux/tile/sysdep-cancel.h: Likewise. * sysdeps/unix/sysv/linux/tile/sysdep.h: Likewise. * sysdeps/unix/sysv/linux/tile/waitpid.S: Likewise. * sysdeps/unix/sysv/linux/x86_64/lowlevellock.S: Likewise. * sysdeps/unix/sysv/linux/x86_64/lowlevellock.h: Likewise. * sysdeps/unix/sysv/linux/x86_64/sysdep-cancel.h: Likewise. * sysdeps/unix/sysv/linux/x86_64/sysdep.h: Likewise. * sysdeps/wordsize-32/symbol-hacks.h: Likewise. * sysdeps/x86_64/memcpy.S: Likewise. * sysdeps/x86_64/memmove.c: Likewise. * sysdeps/x86_64/memset.S: Likewise. * sysdeps/x86_64/multiarch/init-arch.h: Likewise. * sysdeps/x86_64/multiarch/memcmp-sse4.S: Likewise. * sysdeps/x86_64/multiarch/memcmp-ssse3.S: Likewise. * sysdeps/x86_64/multiarch/memcmp.S: Likewise. * sysdeps/x86_64/multiarch/memcpy-avx-unaligned.S: Likewise. * sysdeps/x86_64/multiarch/memcpy-ssse3-back.S: Likewise. * sysdeps/x86_64/multiarch/memcpy-ssse3.S: Likewise. * sysdeps/x86_64/multiarch/memcpy.S: Likewise. * sysdeps/x86_64/multiarch/memcpy_chk.S: Likewise. * sysdeps/x86_64/multiarch/memmove.c: Likewise. * sysdeps/x86_64/multiarch/mempcpy.S: Likewise. * sysdeps/x86_64/multiarch/mempcpy_chk.S: Likewise. * sysdeps/x86_64/multiarch/memset-avx2.S: Likewise. * sysdeps/x86_64/multiarch/memset.S: Likewise. * sysdeps/x86_64/multiarch/memset_chk.S: Likewise. * sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S: Likewise. * sysdeps/x86_64/multiarch/strcat-ssse3.S: Likewise. * sysdeps/x86_64/multiarch/strcat.S: Likewise. * sysdeps/x86_64/multiarch/strchr-sse2-no-bsf.S: Likewise. * sysdeps/x86_64/multiarch/strchr.S: Likewise. * sysdeps/x86_64/multiarch/strcmp-ssse3.S: Likewise. * sysdeps/x86_64/multiarch/strcmp.S: Likewise. * sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S: Likewise. * sysdeps/x86_64/multiarch/strcpy-ssse3.S: Likewise. * sysdeps/x86_64/multiarch/strcpy.S: Likewise. * sysdeps/x86_64/multiarch/strcspn.S: Likewise. * sysdeps/x86_64/multiarch/strspn.S: Likewise. * sysdeps/x86_64/multiarch/wcscpy-c.c: Likewise. * sysdeps/x86_64/multiarch/wcscpy-ssse3.S: Likewise. * sysdeps/x86_64/multiarch/wcscpy.S: Likewise. * sysdeps/x86_64/multiarch/wmemcmp-c.c: Likewise. * sysdeps/x86_64/multiarch/wmemcmp.S: Likewise. * sysdeps/x86_64/strcmp.S: Likewise.
2014-11-24 15:03:45 +05:30
#if IS_IN (libc) \
&& (defined SHARED \
|| defined USE_AS_MEMMOVE \
|| !defined USE_MULTIARCH)
#include "asm-syntax.h"
#ifndef MEMCPY
# define MEMCPY __memcpy_avx_unaligned
# define MEMCPY_CHK __memcpy_chk_avx_unaligned
#endif
.section .text.avx,"ax",@progbits
#if !defined USE_AS_BCOPY
ENTRY (MEMCPY_CHK)
cmpq %rdx, %rcx
jb HIDDEN_JUMPTARGET (__chk_fail)
END (MEMCPY_CHK)
#endif
ENTRY (MEMCPY)
mov %rdi, %rax
#ifdef USE_AS_MEMPCPY
add %rdx, %rax
#endif
cmp $256, %rdx
jae L(256bytesormore)
cmp $16, %dl
jb L(less_16bytes)
cmp $128, %dl
jb L(less_128bytes)
vmovdqu (%rsi), %xmm0
lea (%rsi, %rdx), %rcx
vmovdqu 0x10(%rsi), %xmm1
vmovdqu 0x20(%rsi), %xmm2
vmovdqu 0x30(%rsi), %xmm3
vmovdqu 0x40(%rsi), %xmm4
vmovdqu 0x50(%rsi), %xmm5
vmovdqu 0x60(%rsi), %xmm6
vmovdqu 0x70(%rsi), %xmm7
vmovdqu -0x80(%rcx), %xmm8
vmovdqu -0x70(%rcx), %xmm9
vmovdqu -0x60(%rcx), %xmm10
vmovdqu -0x50(%rcx), %xmm11
vmovdqu -0x40(%rcx), %xmm12
vmovdqu -0x30(%rcx), %xmm13
vmovdqu -0x20(%rcx), %xmm14
vmovdqu -0x10(%rcx), %xmm15
lea (%rdi, %rdx), %rdx
vmovdqu %xmm0, (%rdi)
vmovdqu %xmm1, 0x10(%rdi)
vmovdqu %xmm2, 0x20(%rdi)
vmovdqu %xmm3, 0x30(%rdi)
vmovdqu %xmm4, 0x40(%rdi)
vmovdqu %xmm5, 0x50(%rdi)
vmovdqu %xmm6, 0x60(%rdi)
vmovdqu %xmm7, 0x70(%rdi)
vmovdqu %xmm8, -0x80(%rdx)
vmovdqu %xmm9, -0x70(%rdx)
vmovdqu %xmm10, -0x60(%rdx)
vmovdqu %xmm11, -0x50(%rdx)
vmovdqu %xmm12, -0x40(%rdx)
vmovdqu %xmm13, -0x30(%rdx)
vmovdqu %xmm14, -0x20(%rdx)
vmovdqu %xmm15, -0x10(%rdx)
ret
.p2align 4
L(less_128bytes):
cmp $64, %dl
jb L(less_64bytes)
vmovdqu (%rsi), %xmm0
lea (%rsi, %rdx), %rcx
vmovdqu 0x10(%rsi), %xmm1
vmovdqu 0x20(%rsi), %xmm2
lea (%rdi, %rdx), %rdx
vmovdqu 0x30(%rsi), %xmm3
vmovdqu -0x40(%rcx), %xmm4
vmovdqu -0x30(%rcx), %xmm5
vmovdqu -0x20(%rcx), %xmm6
vmovdqu -0x10(%rcx), %xmm7
vmovdqu %xmm0, (%rdi)
vmovdqu %xmm1, 0x10(%rdi)
vmovdqu %xmm2, 0x20(%rdi)
vmovdqu %xmm3, 0x30(%rdi)
vmovdqu %xmm4, -0x40(%rdx)
vmovdqu %xmm5, -0x30(%rdx)
vmovdqu %xmm6, -0x20(%rdx)
vmovdqu %xmm7, -0x10(%rdx)
ret
.p2align 4
L(less_64bytes):
cmp $32, %dl
jb L(less_32bytes)
vmovdqu (%rsi), %xmm0
vmovdqu 0x10(%rsi), %xmm1
vmovdqu -0x20(%rsi, %rdx), %xmm6
vmovdqu -0x10(%rsi, %rdx), %xmm7
vmovdqu %xmm0, (%rdi)
vmovdqu %xmm1, 0x10(%rdi)
vmovdqu %xmm6, -0x20(%rdi, %rdx)
vmovdqu %xmm7, -0x10(%rdi, %rdx)
ret
.p2align 4
L(less_32bytes):
vmovdqu (%rsi), %xmm0
vmovdqu -0x10(%rsi, %rdx), %xmm7
vmovdqu %xmm0, (%rdi)
vmovdqu %xmm7, -0x10(%rdi, %rdx)
ret
.p2align 4
L(less_16bytes):
cmp $8, %dl
jb L(less_8bytes)
movq -0x08(%rsi, %rdx), %rcx
movq (%rsi), %rsi
movq %rsi, (%rdi)
movq %rcx, -0x08(%rdi, %rdx)
ret
.p2align 4
L(less_8bytes):
cmp $4, %dl
jb L(less_4bytes)
mov -0x04(%rsi, %rdx), %ecx
mov (%rsi), %esi
mov %esi, (%rdi)
mov %ecx, -0x04(%rdi, %rdx)
ret
L(less_4bytes):
cmp $1, %dl
jbe L(less_2bytes)
mov -0x02(%rsi, %rdx), %cx
mov (%rsi), %si
mov %si, (%rdi)
mov %cx, -0x02(%rdi, %rdx)
ret
L(less_2bytes):
jb L(less_0bytes)
mov (%rsi), %cl
mov %cl, (%rdi)
L(less_0bytes):
ret
.p2align 4
L(256bytesormore):
#ifdef USE_AS_MEMMOVE
mov %rdi, %rcx
sub %rsi, %rcx
cmp %rdx, %rcx
jc L(copy_backward)
#endif
cmp $2048, %rdx
jae L(gobble_data_movsb)
mov %rax, %r8
lea (%rsi, %rdx), %rcx
mov %rdi, %r10
vmovdqu -0x80(%rcx), %xmm5
vmovdqu -0x70(%rcx), %xmm6
mov $0x80, %rax
and $-32, %rdi
add $32, %rdi
vmovdqu -0x60(%rcx), %xmm7
vmovdqu -0x50(%rcx), %xmm8
mov %rdi, %r11
sub %r10, %r11
vmovdqu -0x40(%rcx), %xmm9
vmovdqu -0x30(%rcx), %xmm10
sub %r11, %rdx
vmovdqu -0x20(%rcx), %xmm11
vmovdqu -0x10(%rcx), %xmm12
vmovdqu (%rsi), %ymm4
add %r11, %rsi
sub %eax, %edx
L(goble_128_loop):
vmovdqu (%rsi), %ymm0
vmovdqu 0x20(%rsi), %ymm1
vmovdqu 0x40(%rsi), %ymm2
vmovdqu 0x60(%rsi), %ymm3
add %rax, %rsi
vmovdqa %ymm0, (%rdi)
vmovdqa %ymm1, 0x20(%rdi)
vmovdqa %ymm2, 0x40(%rdi)
vmovdqa %ymm3, 0x60(%rdi)
add %rax, %rdi
sub %eax, %edx
jae L(goble_128_loop)
add %eax, %edx
add %rdi, %rdx
vmovdqu %ymm4, (%r10)
vzeroupper
vmovdqu %xmm5, -0x80(%rdx)
vmovdqu %xmm6, -0x70(%rdx)
vmovdqu %xmm7, -0x60(%rdx)
vmovdqu %xmm8, -0x50(%rdx)
vmovdqu %xmm9, -0x40(%rdx)
vmovdqu %xmm10, -0x30(%rdx)
vmovdqu %xmm11, -0x20(%rdx)
vmovdqu %xmm12, -0x10(%rdx)
mov %r8, %rax
ret
.p2align 4
L(gobble_data_movsb):
#ifdef SHARED_CACHE_SIZE_HALF
mov $SHARED_CACHE_SIZE_HALF, %rcx
#else
mov __x86_shared_cache_size_half(%rip), %rcx
#endif
shl $3, %rcx
cmp %rcx, %rdx
jae L(gobble_big_data_fwd)
mov %rdx, %rcx
mov %rdx, %rcx
rep movsb
ret
.p2align 4
L(gobble_big_data_fwd):
lea (%rsi, %rdx), %rcx
vmovdqu (%rsi), %ymm4
vmovdqu -0x80(%rsi,%rdx), %xmm5
vmovdqu -0x70(%rcx), %xmm6
vmovdqu -0x60(%rcx), %xmm7
vmovdqu -0x50(%rcx), %xmm8
vmovdqu -0x40(%rcx), %xmm9
vmovdqu -0x30(%rcx), %xmm10
vmovdqu -0x20(%rcx), %xmm11
vmovdqu -0x10(%rcx), %xmm12
mov %rdi, %r8
and $-32, %rdi
add $32, %rdi
mov %rdi, %r10
sub %r8, %r10
sub %r10, %rdx
add %r10, %rsi
lea (%rdi, %rdx), %rcx
add $-0x80, %rdx
L(gobble_mem_fwd_loop):
prefetchnta 0x1c0(%rsi)
prefetchnta 0x280(%rsi)
vmovdqu (%rsi), %ymm0
vmovdqu 0x20(%rsi), %ymm1
vmovdqu 0x40(%rsi), %ymm2
vmovdqu 0x60(%rsi), %ymm3
sub $-0x80, %rsi
vmovntdq %ymm0, (%rdi)
vmovntdq %ymm1, 0x20(%rdi)
vmovntdq %ymm2, 0x40(%rdi)
vmovntdq %ymm3, 0x60(%rdi)
sub $-0x80, %rdi
add $-0x80, %rdx
jb L(gobble_mem_fwd_loop)
sfence
vmovdqu %ymm4, (%r8)
vzeroupper
vmovdqu %xmm5, -0x80(%rcx)
vmovdqu %xmm6, -0x70(%rcx)
vmovdqu %xmm7, -0x60(%rcx)
vmovdqu %xmm8, -0x50(%rcx)
vmovdqu %xmm9, -0x40(%rcx)
vmovdqu %xmm10, -0x30(%rcx)
vmovdqu %xmm11, -0x20(%rcx)
vmovdqu %xmm12, -0x10(%rcx)
ret
#ifdef USE_AS_MEMMOVE
.p2align 4
L(copy_backward):
#ifdef SHARED_CACHE_SIZE_HALF
mov $SHARED_CACHE_SIZE_HALF, %rcx
#else
mov __x86_shared_cache_size_half(%rip), %rcx
#endif
shl $3, %rcx
vmovdqu (%rsi), %xmm5
vmovdqu 0x10(%rsi), %xmm6
add %rdx, %rdi
vmovdqu 0x20(%rsi), %xmm7
vmovdqu 0x30(%rsi), %xmm8
lea -0x20(%rdi), %r10
mov %rdi, %r11
vmovdqu 0x40(%rsi), %xmm9
vmovdqu 0x50(%rsi), %xmm10
and $0x1f, %r11
vmovdqu 0x60(%rsi), %xmm11
vmovdqu 0x70(%rsi), %xmm12
xor %r11, %rdi
add %rdx, %rsi
vmovdqu -0x20(%rsi), %ymm4
sub %r11, %rsi
sub %r11, %rdx
cmp %rcx, %rdx
ja L(gobble_big_data_bwd)
add $-0x80, %rdx
L(gobble_mem_bwd_llc):
vmovdqu -0x20(%rsi), %ymm0
vmovdqu -0x40(%rsi), %ymm1
vmovdqu -0x60(%rsi), %ymm2
vmovdqu -0x80(%rsi), %ymm3
lea -0x80(%rsi), %rsi
vmovdqa %ymm0, -0x20(%rdi)
vmovdqa %ymm1, -0x40(%rdi)
vmovdqa %ymm2, -0x60(%rdi)
vmovdqa %ymm3, -0x80(%rdi)
lea -0x80(%rdi), %rdi
add $-0x80, %rdx
jb L(gobble_mem_bwd_llc)
vmovdqu %ymm4, (%r10)
vzeroupper
vmovdqu %xmm5, (%rax)
vmovdqu %xmm6, 0x10(%rax)
vmovdqu %xmm7, 0x20(%rax)
vmovdqu %xmm8, 0x30(%rax)
vmovdqu %xmm9, 0x40(%rax)
vmovdqu %xmm10, 0x50(%rax)
vmovdqu %xmm11, 0x60(%rax)
vmovdqu %xmm12, 0x70(%rax)
ret
.p2align 4
L(gobble_big_data_bwd):
add $-0x80, %rdx
L(gobble_mem_bwd_loop):
prefetchnta -0x1c0(%rsi)
prefetchnta -0x280(%rsi)
vmovdqu -0x20(%rsi), %ymm0
vmovdqu -0x40(%rsi), %ymm1
vmovdqu -0x60(%rsi), %ymm2
vmovdqu -0x80(%rsi), %ymm3
lea -0x80(%rsi), %rsi
vmovntdq %ymm0, -0x20(%rdi)
vmovntdq %ymm1, -0x40(%rdi)
vmovntdq %ymm2, -0x60(%rdi)
vmovntdq %ymm3, -0x80(%rdi)
lea -0x80(%rdi), %rdi
add $-0x80, %rdx
jb L(gobble_mem_bwd_loop)
sfence
vmovdqu %ymm4, (%r10)
vzeroupper
vmovdqu %xmm5, (%rax)
vmovdqu %xmm6, 0x10(%rax)
vmovdqu %xmm7, 0x20(%rax)
vmovdqu %xmm8, 0x30(%rax)
vmovdqu %xmm9, 0x40(%rax)
vmovdqu %xmm10, 0x50(%rax)
vmovdqu %xmm11, 0x60(%rax)
vmovdqu %xmm12, 0x70(%rax)
ret
#endif
END (MEMCPY)
#endif