4f41c682f3
Replace with !IS_IN (libc). This completes the transition from the IS_IN/NOT_IN macros to the IN_MODULE macro set. The generated code is unchanged on x86_64. * stdlib/isomac.c (fmt): Replace NOT_IN_libc with IN_MODULE. (get_null_defines): Adjust. * sunrpc/Makefile: Adjust comment. * Makerules (CPPFLAGS-nonlib): Remove NOT_IN_libc. * elf/Makefile (CPPFLAGS-sotruss-lib): Likewise. (CFLAGS-interp.c): Likewise. (CFLAGS-ldconfig.c): Likewise. (CPPFLAGS-.os): Likewise. * elf/rtld-Rules (rtld-CPPFLAGS): Likewise. * extra-lib.mk (CPPFLAGS-$(lib)): Likewise. * extra-modules.mk (extra-modules.mk): Likewise. * iconv/Makefile (CPPFLAGS-iconvprogs): Likewise. * locale/Makefile (CPPFLAGS-locale_programs): Likewise. * malloc/Makefile (CPPFLAGS-memusagestat): Likewise. * nscd/Makefile (CPPFLAGS-nscd): Likewise. * nss/Makefile (CPPFLAGS-nss_test1): Likewise. * stdlib/Makefile (CFLAGS-tst-putenvmod.c): Likewise. * sysdeps/gnu/Makefile ($(objpfx)errlist-compat.c): Likewise. * sysdeps/unix/sysv/linux/Makefile (CPPFLAGS-lddlibc4): Likewise. * iconvdata/Makefile (CPPFLAGS): Likewise. (cpp-srcs-left): Add libof for all iconvdata routines. * bits/stdio-lock.h: Replace NOT_IN_libc with IS_IN. * include/assert.h: Likewise. * include/ctype.h: Likewise. * include/errno.h: Likewise. * include/libc-symbols.h: Likewise. * include/math.h: Likewise. * include/netdb.h: Likewise. * include/resolv.h: Likewise. * include/stdio.h: Likewise. * include/stdlib.h: Likewise. * include/string.h: Likewise. * include/sys/stat.h: Likewise. * include/wctype.h: Likewise. * intl/l10nflist.c: Likewise. * libidn/idn-stub.c: Likewise. * libio/libioP.h: Likewise. * nptl/libc_multiple_threads.c: Likewise. * nptl/pthreadP.h: Likewise. * posix/regex_internal.h: Likewise. * resolv/res_hconf.c: Likewise. * sysdeps/arm/armv7/multiarch/memcpy.S: Likewise. * sysdeps/arm/memmove.S: Likewise. * sysdeps/arm/sysdep.h: Likewise. * sysdeps/generic/_itoa.h: Likewise. * sysdeps/generic/symbol-hacks.h: Likewise. * sysdeps/gnu/errlist.awk: Likewise. * sysdeps/gnu/errlist.c: Likewise. * sysdeps/i386/i586/memcpy.S: Likewise. * sysdeps/i386/i586/memset.S: Likewise. * sysdeps/i386/i686/memcpy.S: Likewise. * sysdeps/i386/i686/memmove.S: Likewise. * sysdeps/i386/i686/mempcpy.S: Likewise. * sysdeps/i386/i686/memset.S: Likewise. * sysdeps/i386/i686/multiarch/bcopy.S: Likewise. * sysdeps/i386/i686/multiarch/bzero.S: Likewise. * sysdeps/i386/i686/multiarch/memchr-sse2-bsf.S: Likewise. * sysdeps/i386/i686/multiarch/memchr-sse2.S: Likewise. * sysdeps/i386/i686/multiarch/memchr.S: Likewise. * sysdeps/i386/i686/multiarch/memcmp-sse4.S: Likewise. * sysdeps/i386/i686/multiarch/memcmp-ssse3.S: Likewise. * sysdeps/i386/i686/multiarch/memcmp.S: Likewise. * sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S: Likewise. * sysdeps/i386/i686/multiarch/memcpy-ssse3.S: Likewise. * sysdeps/i386/i686/multiarch/memcpy.S: Likewise. * sysdeps/i386/i686/multiarch/memcpy_chk.S: Likewise. * sysdeps/i386/i686/multiarch/memmove.S: Likewise. * sysdeps/i386/i686/multiarch/memmove_chk.S: Likewise. * sysdeps/i386/i686/multiarch/mempcpy.S: Likewise. * sysdeps/i386/i686/multiarch/mempcpy_chk.S: Likewise. * sysdeps/i386/i686/multiarch/memrchr-c.c: Likewise. * sysdeps/i386/i686/multiarch/memrchr-sse2-bsf.S: Likewise. * sysdeps/i386/i686/multiarch/memrchr-sse2.S: Likewise. * sysdeps/i386/i686/multiarch/memrchr.S: Likewise. * sysdeps/i386/i686/multiarch/memset-sse2-rep.S: Likewise. * sysdeps/i386/i686/multiarch/memset-sse2.S: Likewise. * sysdeps/i386/i686/multiarch/memset.S: Likewise. * sysdeps/i386/i686/multiarch/memset_chk.S: Likewise. * sysdeps/i386/i686/multiarch/rawmemchr.S: Likewise. * sysdeps/i386/i686/multiarch/strcat-sse2.S: Likewise. * sysdeps/i386/i686/multiarch/strcat-ssse3.S: Likewise. * sysdeps/i386/i686/multiarch/strcat.S: Likewise. * sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S: Likewise. * sysdeps/i386/i686/multiarch/strchr-sse2.S: Likewise. * sysdeps/i386/i686/multiarch/strchr.S: Likewise. * sysdeps/i386/i686/multiarch/strcmp-sse4.S: Likewise. * sysdeps/i386/i686/multiarch/strcmp-ssse3.S: Likewise. * sysdeps/i386/i686/multiarch/strcmp.S: Likewise. * sysdeps/i386/i686/multiarch/strcpy-sse2.S: Likewise. * sysdeps/i386/i686/multiarch/strcpy-ssse3.S: Likewise. * sysdeps/i386/i686/multiarch/strcpy.S: Likewise. * sysdeps/i386/i686/multiarch/strcspn.S: Likewise. * sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S: Likewise. * sysdeps/i386/i686/multiarch/strlen-sse2.S: Likewise. * sysdeps/i386/i686/multiarch/strlen.S: Likewise. * sysdeps/i386/i686/multiarch/strnlen.S: Likewise. * sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S: Likewise. * sysdeps/i386/i686/multiarch/strrchr-sse2.S: Likewise. * sysdeps/i386/i686/multiarch/strrchr.S: Likewise. * sysdeps/i386/i686/multiarch/strspn.S: Likewise. * sysdeps/i386/i686/multiarch/wcschr-c.c: Likewise. * sysdeps/i386/i686/multiarch/wcschr-sse2.S: Likewise. * sysdeps/i386/i686/multiarch/wcschr.S: Likewise. * sysdeps/i386/i686/multiarch/wcscmp-sse2.S: Likewise. * sysdeps/i386/i686/multiarch/wcscmp.S: Likewise. * sysdeps/i386/i686/multiarch/wcscpy-c.c: Likewise. * sysdeps/i386/i686/multiarch/wcscpy-ssse3.S: Likewise. * sysdeps/i386/i686/multiarch/wcscpy.S: Likewise. * sysdeps/i386/i686/multiarch/wcslen-c.c: Likewise. * sysdeps/i386/i686/multiarch/wcslen-sse2.S: Likewise. * sysdeps/i386/i686/multiarch/wcslen.S: Likewise. * sysdeps/i386/i686/multiarch/wcsrchr-c.c: Likewise. * sysdeps/i386/i686/multiarch/wcsrchr-sse2.S: Likewise. * sysdeps/i386/i686/multiarch/wcsrchr.S: Likewise. * sysdeps/i386/i686/multiarch/wmemcmp-c.c: Likewise. * sysdeps/i386/i686/multiarch/wmemcmp.S: Likewise. * sysdeps/ia64/fpu/libm-symbols.h: Likewise. * sysdeps/nptl/bits/libc-lock.h: Likewise. * sysdeps/nptl/bits/libc-lockP.h: Likewise. * sysdeps/nptl/bits/stdio-lock.h: Likewise. * sysdeps/posix/closedir.c: Likewise. * sysdeps/posix/opendir.c: Likewise. * sysdeps/posix/readdir.c: Likewise. * sysdeps/posix/rewinddir.c: Likewise. * sysdeps/powerpc/novmx-sigjmp.c: Likewise. * sysdeps/powerpc/powerpc32/__longjmp.S: Likewise. * sysdeps/powerpc/powerpc32/bsd-_setjmp.S: Likewise. * sysdeps/powerpc/powerpc32/fpu/__longjmp.S: Likewise. * sysdeps/powerpc/powerpc32/fpu/setjmp.S: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/bzero.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/memchr.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/memcmp-ppc32.S: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/memcmp.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/memcpy-ppc32.S: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/memcpy.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/memmove.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/mempcpy.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/memrchr-ppc32.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/memrchr.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/memset-ppc32.S: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/memset.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/rawmemchr.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/strcasecmp.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/strcasecmp_l.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/strchr.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/strchrnul.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/strlen-ppc32.S: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/strlen.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/strncase.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/strncase_l.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/strncmp-ppc32.S: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/strncmp.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/strnlen.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/wcschr-ppc32.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/wcschr.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy-ppc32.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/wcscpy.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/wcsrchr-ppc32.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/wcsrchr.c: Likewise. * sysdeps/powerpc/powerpc32/power4/multiarch/wordcopy.c: Likewise. * sysdeps/powerpc/powerpc32/power6/memset.S: Likewise. * sysdeps/powerpc/powerpc32/setjmp.S: Likewise. * sysdeps/powerpc/powerpc64/__longjmp.S: Likewise. * sysdeps/powerpc/powerpc64/multiarch/bzero.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/memchr.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/memcmp-ppc64.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/memcmp.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/memcpy-ppc64.S: Likewise. * sysdeps/powerpc/powerpc64/multiarch/memcpy.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/memmove-ppc64.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/memmove.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/mempcpy.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/memrchr.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/memset-ppc64.S: Likewise. * sysdeps/powerpc/powerpc64/multiarch/memset.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/rawmemchr.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/stpcpy-ppc64.S: Likewise. * sysdeps/powerpc/powerpc64/multiarch/stpcpy.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/stpncpy.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strcasecmp_l.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strcat.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strchr.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strchrnul.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strcmp-ppc64.S: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strcmp.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strcpy-ppc64.S: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strcpy.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strcspn.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strlen-ppc64.S: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strlen.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strncase.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strncase_l.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strncat.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strncmp-ppc64.S: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strncmp.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strncpy-ppc64.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strncpy.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strnlen.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strpbrk.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strrchr-ppc64.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strrchr.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strspn-ppc64.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strspn.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/wcschr.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/wcscpy.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/wcsrchr.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/wordcopy.c: Likewise. * sysdeps/powerpc/powerpc64/setjmp.S: Likewise. * sysdeps/s390/s390-32/multiarch/ifunc-resolve.c: Likewise. * sysdeps/s390/s390-32/multiarch/memcmp.S: Likewise. * sysdeps/s390/s390-32/multiarch/memcpy.S: Likewise. * sysdeps/s390/s390-32/multiarch/memset.S: Likewise. * sysdeps/s390/s390-64/multiarch/ifunc-resolve.c: Likewise. * sysdeps/s390/s390-64/multiarch/memcmp.S: Likewise. * sysdeps/s390/s390-64/multiarch/memcpy.S: Likewise. * sysdeps/s390/s390-64/multiarch/memset.S: Likewise. * sysdeps/sparc/sparc64/multiarch/memcpy-niagara1.S: Likewise. * sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S: Likewise. * sysdeps/sparc/sparc64/multiarch/memcpy-niagara4.S: Likewise. * sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S: Likewise. * sysdeps/sparc/sparc64/multiarch/memcpy.S: Likewise. * sysdeps/sparc/sparc64/multiarch/memset-niagara1.S: Likewise. * sysdeps/sparc/sparc64/multiarch/memset-niagara4.S: Likewise. * sysdeps/sparc/sparc64/multiarch/memset.S: Likewise. * sysdeps/unix/alpha/sysdep.S: Likewise. * sysdeps/unix/alpha/sysdep.h: Likewise. * sysdeps/unix/make-syscalls.sh: Likewise. * sysdeps/unix/sysv/linux/aarch64/sysdep-cancel.h: Likewise. * sysdeps/unix/sysv/linux/aarch64/sysdep.h: Likewise. * sysdeps/unix/sysv/linux/alpha/sysdep-cancel.h: Likewise. * sysdeps/unix/sysv/linux/alpha/vfork.S: Likewise. * sysdeps/unix/sysv/linux/arm/sysdep-cancel.h: Likewise. * sysdeps/unix/sysv/linux/arm/sysdep.h: Likewise. * sysdeps/unix/sysv/linux/getpid.c: Likewise. * sysdeps/unix/sysv/linux/hppa/nptl/lowlevellock.h: Likewise. * sysdeps/unix/sysv/linux/hppa/nptl/sysdep-cancel.h: Likewise. * sysdeps/unix/sysv/linux/i386/i486/lowlevellock.S: Likewise. * sysdeps/unix/sysv/linux/i386/lowlevellock.h: Likewise. * sysdeps/unix/sysv/linux/i386/sysdep-cancel.h: Likewise. * sysdeps/unix/sysv/linux/i386/sysdep.h: Likewise. * sysdeps/unix/sysv/linux/ia64/lowlevellock.h: Likewise. * sysdeps/unix/sysv/linux/ia64/sysdep-cancel.h: Likewise. * sysdeps/unix/sysv/linux/ia64/sysdep.S: Likewise. * sysdeps/unix/sysv/linux/ia64/sysdep.h: Likewise. * sysdeps/unix/sysv/linux/lowlevellock-futex.h: Likewise. * sysdeps/unix/sysv/linux/m68k/bits/m68k-vdso.h: Likewise. * sysdeps/unix/sysv/linux/m68k/sysdep-cancel.h: Likewise. * sysdeps/unix/sysv/linux/m68k/sysdep.h: Likewise. * sysdeps/unix/sysv/linux/microblaze/lowlevellock.h: Likewise. * sysdeps/unix/sysv/linux/microblaze/sysdep-cancel.h: Likewise. * sysdeps/unix/sysv/linux/mips/mips64/sysdep-cancel.h: Likewise. * sysdeps/unix/sysv/linux/mips/sysdep-cancel.h: Likewise. * sysdeps/unix/sysv/linux/not-cancel.h: Likewise. * sysdeps/unix/sysv/linux/powerpc/lowlevellock.h: Likewise. * sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep-cancel.h: Likewise. * sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep-cancel.h: Likewise. * sysdeps/unix/sysv/linux/s390/longjmp_chk.c: Likewise. * sysdeps/unix/sysv/linux/s390/lowlevellock.h: Likewise. * sysdeps/unix/sysv/linux/s390/s390-32/sysdep-cancel.h: Likewise. * sysdeps/unix/sysv/linux/s390/s390-32/sysdep.S: Likewise. * sysdeps/unix/sysv/linux/s390/s390-32/sysdep.h: Likewise. * sysdeps/unix/sysv/linux/s390/s390-32/vfork.S: Likewise. * sysdeps/unix/sysv/linux/s390/s390-64/sysdep-cancel.h: Likewise. * sysdeps/unix/sysv/linux/s390/s390-64/sysdep.S: Likewise. * sysdeps/unix/sysv/linux/s390/s390-64/sysdep.h: Likewise. * sysdeps/unix/sysv/linux/s390/s390-64/vfork.S: Likewise. * sysdeps/unix/sysv/linux/sh/lowlevellock.S: Likewise. * sysdeps/unix/sysv/linux/sh/lowlevellock.h: Likewise. * sysdeps/unix/sysv/linux/sh/sysdep-cancel.h: Likewise. * sysdeps/unix/sysv/linux/sh/sysdep.h: Likewise. * sysdeps/unix/sysv/linux/sh/vfork.S: Likewise. * sysdeps/unix/sysv/linux/sparc/lowlevellock.h: Likewise. * sysdeps/unix/sysv/linux/sparc/sparc32/sysdep-cancel.h: Likewise. * sysdeps/unix/sysv/linux/sparc/sparc32/sysdep.h: Likewise. * sysdeps/unix/sysv/linux/sparc/sparc64/brk.S: Likewise. * sysdeps/unix/sysv/linux/sparc/sparc64/sysdep-cancel.h: Likewise. * sysdeps/unix/sysv/linux/sparc/sparc64/sysdep.h: Likewise. * sysdeps/unix/sysv/linux/tile/lowlevellock.h: Likewise. * sysdeps/unix/sysv/linux/tile/sysdep-cancel.h: Likewise. * sysdeps/unix/sysv/linux/tile/sysdep.h: Likewise. * sysdeps/unix/sysv/linux/tile/waitpid.S: Likewise. * sysdeps/unix/sysv/linux/x86_64/lowlevellock.S: Likewise. * sysdeps/unix/sysv/linux/x86_64/lowlevellock.h: Likewise. * sysdeps/unix/sysv/linux/x86_64/sysdep-cancel.h: Likewise. * sysdeps/unix/sysv/linux/x86_64/sysdep.h: Likewise. * sysdeps/wordsize-32/symbol-hacks.h: Likewise. * sysdeps/x86_64/memcpy.S: Likewise. * sysdeps/x86_64/memmove.c: Likewise. * sysdeps/x86_64/memset.S: Likewise. * sysdeps/x86_64/multiarch/init-arch.h: Likewise. * sysdeps/x86_64/multiarch/memcmp-sse4.S: Likewise. * sysdeps/x86_64/multiarch/memcmp-ssse3.S: Likewise. * sysdeps/x86_64/multiarch/memcmp.S: Likewise. * sysdeps/x86_64/multiarch/memcpy-avx-unaligned.S: Likewise. * sysdeps/x86_64/multiarch/memcpy-ssse3-back.S: Likewise. * sysdeps/x86_64/multiarch/memcpy-ssse3.S: Likewise. * sysdeps/x86_64/multiarch/memcpy.S: Likewise. * sysdeps/x86_64/multiarch/memcpy_chk.S: Likewise. * sysdeps/x86_64/multiarch/memmove.c: Likewise. * sysdeps/x86_64/multiarch/mempcpy.S: Likewise. * sysdeps/x86_64/multiarch/mempcpy_chk.S: Likewise. * sysdeps/x86_64/multiarch/memset-avx2.S: Likewise. * sysdeps/x86_64/multiarch/memset.S: Likewise. * sysdeps/x86_64/multiarch/memset_chk.S: Likewise. * sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S: Likewise. * sysdeps/x86_64/multiarch/strcat-ssse3.S: Likewise. * sysdeps/x86_64/multiarch/strcat.S: Likewise. * sysdeps/x86_64/multiarch/strchr-sse2-no-bsf.S: Likewise. * sysdeps/x86_64/multiarch/strchr.S: Likewise. * sysdeps/x86_64/multiarch/strcmp-ssse3.S: Likewise. * sysdeps/x86_64/multiarch/strcmp.S: Likewise. * sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S: Likewise. * sysdeps/x86_64/multiarch/strcpy-ssse3.S: Likewise. * sysdeps/x86_64/multiarch/strcpy.S: Likewise. * sysdeps/x86_64/multiarch/strcspn.S: Likewise. * sysdeps/x86_64/multiarch/strspn.S: Likewise. * sysdeps/x86_64/multiarch/wcscpy-c.c: Likewise. * sysdeps/x86_64/multiarch/wcscpy-ssse3.S: Likewise. * sysdeps/x86_64/multiarch/wcscpy.S: Likewise. * sysdeps/x86_64/multiarch/wmemcmp-c.c: Likewise. * sysdeps/x86_64/multiarch/wmemcmp.S: Likewise. * sysdeps/x86_64/strcmp.S: Likewise.
3902 lines
70 KiB
ArmAsm
3902 lines
70 KiB
ArmAsm
/* strcpy with SSSE3
|
|
Copyright (C) 2011-2014 Free Software Foundation, Inc.
|
|
Contributed by Intel Corporation.
|
|
This file is part of the GNU C Library.
|
|
|
|
The GNU C Library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
The GNU C Library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with the GNU C Library; if not, see
|
|
<http://www.gnu.org/licenses/>. */
|
|
|
|
|
|
#if IS_IN (libc)
|
|
|
|
# ifndef USE_AS_STRCAT
|
|
# include <sysdep.h>
|
|
|
|
# define CFI_PUSH(REG) \
|
|
cfi_adjust_cfa_offset (4); \
|
|
cfi_rel_offset (REG, 0)
|
|
|
|
# define CFI_POP(REG) \
|
|
cfi_adjust_cfa_offset (-4); \
|
|
cfi_restore (REG)
|
|
|
|
# define PUSH(REG) pushl REG; CFI_PUSH (REG)
|
|
# define POP(REG) popl REG; CFI_POP (REG)
|
|
|
|
# ifndef STRCPY
|
|
# define STRCPY __strcpy_ssse3
|
|
# endif
|
|
|
|
# ifdef USE_AS_STRNCPY
|
|
# define PARMS 8
|
|
# define ENTRANCE PUSH (%ebx)
|
|
# define RETURN POP (%ebx); ret; CFI_PUSH (%ebx);
|
|
# define RETURN1 POP (%edi); POP (%ebx); ret; CFI_PUSH (%ebx); CFI_PUSH (%edi)
|
|
# else
|
|
# define PARMS 4
|
|
# define ENTRANCE
|
|
# define RETURN ret
|
|
# define RETURN1 POP (%edi); ret; CFI_PUSH (%edi)
|
|
# endif
|
|
|
|
# ifdef USE_AS_STPCPY
|
|
# define SAVE_RESULT(n) lea n(%edx), %eax
|
|
# define SAVE_RESULT_TAIL(n) lea n(%edx), %eax
|
|
# else
|
|
# define SAVE_RESULT(n) movl %edi, %eax
|
|
# define SAVE_RESULT_TAIL(n) movl %edx, %eax
|
|
# endif
|
|
|
|
# define STR1 PARMS
|
|
# define STR2 STR1+4
|
|
# define LEN STR2+4
|
|
|
|
/* In this code following instructions are used for copying:
|
|
movb - 1 byte
|
|
movw - 2 byte
|
|
movl - 4 byte
|
|
movlpd - 8 byte
|
|
movaps - 16 byte - requires 16 byte alignment
|
|
of sourse and destination adresses.
|
|
*/
|
|
|
|
.text
|
|
ENTRY (STRCPY)
|
|
ENTRANCE
|
|
mov STR1(%esp), %edx
|
|
mov STR2(%esp), %ecx
|
|
# ifdef USE_AS_STRNCPY
|
|
movl LEN(%esp), %ebx
|
|
cmp $8, %ebx
|
|
jbe L(StrncpyExit8Bytes)
|
|
# endif
|
|
cmpb $0, (%ecx)
|
|
jz L(ExitTail1)
|
|
cmpb $0, 1(%ecx)
|
|
jz L(ExitTail2)
|
|
cmpb $0, 2(%ecx)
|
|
jz L(ExitTail3)
|
|
cmpb $0, 3(%ecx)
|
|
jz L(ExitTail4)
|
|
cmpb $0, 4(%ecx)
|
|
jz L(ExitTail5)
|
|
cmpb $0, 5(%ecx)
|
|
jz L(ExitTail6)
|
|
cmpb $0, 6(%ecx)
|
|
jz L(ExitTail7)
|
|
cmpb $0, 7(%ecx)
|
|
jz L(ExitTail8)
|
|
# ifdef USE_AS_STRNCPY
|
|
cmp $16, %ebx
|
|
jb L(StrncpyExit15Bytes)
|
|
# endif
|
|
cmpb $0, 8(%ecx)
|
|
jz L(ExitTail9)
|
|
cmpb $0, 9(%ecx)
|
|
jz L(ExitTail10)
|
|
cmpb $0, 10(%ecx)
|
|
jz L(ExitTail11)
|
|
cmpb $0, 11(%ecx)
|
|
jz L(ExitTail12)
|
|
cmpb $0, 12(%ecx)
|
|
jz L(ExitTail13)
|
|
cmpb $0, 13(%ecx)
|
|
jz L(ExitTail14)
|
|
cmpb $0, 14(%ecx)
|
|
jz L(ExitTail15)
|
|
# ifdef USE_AS_STRNCPY
|
|
cmp $16, %ebx
|
|
je L(ExitTail16)
|
|
# endif
|
|
cmpb $0, 15(%ecx)
|
|
jz L(ExitTail16)
|
|
|
|
PUSH (%edi)
|
|
mov %edx, %edi
|
|
# endif
|
|
PUSH (%esi)
|
|
# ifdef USE_AS_STRNCPY
|
|
mov %ecx, %esi
|
|
sub $16, %ebx
|
|
and $0xf, %esi
|
|
|
|
/* add 16 bytes ecx_offset to ebx */
|
|
|
|
add %esi, %ebx
|
|
# endif
|
|
lea 16(%ecx), %esi
|
|
and $-16, %esi
|
|
pxor %xmm0, %xmm0
|
|
movlpd (%ecx), %xmm1
|
|
movlpd %xmm1, (%edx)
|
|
|
|
pcmpeqb (%esi), %xmm0
|
|
movlpd 8(%ecx), %xmm1
|
|
movlpd %xmm1, 8(%edx)
|
|
|
|
pmovmskb %xmm0, %eax
|
|
sub %ecx, %esi
|
|
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(CopyFrom1To16BytesCase2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16Bytes)
|
|
|
|
mov %edx, %eax
|
|
lea 16(%edx), %edx
|
|
and $-16, %edx
|
|
sub %edx, %eax
|
|
|
|
# ifdef USE_AS_STRNCPY
|
|
add %eax, %esi
|
|
lea -1(%esi), %esi
|
|
and $1<<31, %esi
|
|
test %esi, %esi
|
|
jnz L(ContinueCopy)
|
|
lea 16(%ebx), %ebx
|
|
|
|
L(ContinueCopy):
|
|
# endif
|
|
sub %eax, %ecx
|
|
mov %ecx, %eax
|
|
and $0xf, %eax
|
|
mov $0, %esi
|
|
|
|
/* case: ecx_offset == edx_offset */
|
|
|
|
jz L(Align16Both)
|
|
|
|
cmp $8, %eax
|
|
jae L(ShlHigh8)
|
|
cmp $1, %eax
|
|
je L(Shl1)
|
|
cmp $2, %eax
|
|
je L(Shl2)
|
|
cmp $3, %eax
|
|
je L(Shl3)
|
|
cmp $4, %eax
|
|
je L(Shl4)
|
|
cmp $5, %eax
|
|
je L(Shl5)
|
|
cmp $6, %eax
|
|
je L(Shl6)
|
|
jmp L(Shl7)
|
|
|
|
L(ShlHigh8):
|
|
je L(Shl8)
|
|
cmp $9, %eax
|
|
je L(Shl9)
|
|
cmp $10, %eax
|
|
je L(Shl10)
|
|
cmp $11, %eax
|
|
je L(Shl11)
|
|
cmp $12, %eax
|
|
je L(Shl12)
|
|
cmp $13, %eax
|
|
je L(Shl13)
|
|
cmp $14, %eax
|
|
je L(Shl14)
|
|
jmp L(Shl15)
|
|
|
|
L(Align16Both):
|
|
movaps (%ecx), %xmm1
|
|
movaps 16(%ecx), %xmm2
|
|
movaps %xmm1, (%edx)
|
|
pcmpeqb %xmm2, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%esi), %esi
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(CopyFrom1To16BytesCase2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16Bytes)
|
|
|
|
movaps 16(%ecx, %esi), %xmm3
|
|
movaps %xmm2, (%edx, %esi)
|
|
pcmpeqb %xmm3, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%esi), %esi
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(CopyFrom1To16BytesCase2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16Bytes)
|
|
|
|
movaps 16(%ecx, %esi), %xmm4
|
|
movaps %xmm3, (%edx, %esi)
|
|
pcmpeqb %xmm4, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%esi), %esi
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(CopyFrom1To16BytesCase2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16Bytes)
|
|
|
|
movaps 16(%ecx, %esi), %xmm1
|
|
movaps %xmm4, (%edx, %esi)
|
|
pcmpeqb %xmm1, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%esi), %esi
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(CopyFrom1To16BytesCase2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16Bytes)
|
|
|
|
movaps 16(%ecx, %esi), %xmm2
|
|
movaps %xmm1, (%edx, %esi)
|
|
pcmpeqb %xmm2, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%esi), %esi
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(CopyFrom1To16BytesCase2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16Bytes)
|
|
|
|
movaps 16(%ecx, %esi), %xmm3
|
|
movaps %xmm2, (%edx, %esi)
|
|
pcmpeqb %xmm3, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%esi), %esi
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(CopyFrom1To16BytesCase2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16Bytes)
|
|
|
|
movaps %xmm3, (%edx, %esi)
|
|
mov %ecx, %eax
|
|
lea 16(%ecx, %esi), %ecx
|
|
and $-0x40, %ecx
|
|
sub %ecx, %eax
|
|
sub %eax, %edx
|
|
# ifdef USE_AS_STRNCPY
|
|
lea 112(%ebx, %eax), %ebx
|
|
# endif
|
|
mov $-0x40, %esi
|
|
|
|
L(Aligned64Loop):
|
|
movaps (%ecx), %xmm2
|
|
movaps 32(%ecx), %xmm3
|
|
movaps %xmm2, %xmm4
|
|
movaps 16(%ecx), %xmm5
|
|
movaps %xmm3, %xmm6
|
|
movaps 48(%ecx), %xmm7
|
|
pminub %xmm5, %xmm2
|
|
pminub %xmm7, %xmm3
|
|
pminub %xmm2, %xmm3
|
|
lea 64(%edx), %edx
|
|
pcmpeqb %xmm0, %xmm3
|
|
lea 64(%ecx), %ecx
|
|
pmovmskb %xmm3, %eax
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $64, %ebx
|
|
jbe L(StrncpyLeaveCase2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Aligned64Leave)
|
|
movaps %xmm4, -64(%edx)
|
|
movaps %xmm5, -48(%edx)
|
|
movaps %xmm6, -32(%edx)
|
|
movaps %xmm7, -16(%edx)
|
|
jmp L(Aligned64Loop)
|
|
|
|
L(Aligned64Leave):
|
|
# ifdef USE_AS_STRNCPY
|
|
lea 48(%ebx), %ebx
|
|
# endif
|
|
pcmpeqb %xmm4, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16Bytes)
|
|
|
|
pcmpeqb %xmm5, %xmm0
|
|
# ifdef USE_AS_STRNCPY
|
|
lea -16(%ebx), %ebx
|
|
# endif
|
|
pmovmskb %xmm0, %eax
|
|
movaps %xmm4, -64(%edx)
|
|
test %eax, %eax
|
|
lea 16(%esi), %esi
|
|
jnz L(CopyFrom1To16Bytes)
|
|
|
|
pcmpeqb %xmm6, %xmm0
|
|
# ifdef USE_AS_STRNCPY
|
|
lea -16(%ebx), %ebx
|
|
# endif
|
|
pmovmskb %xmm0, %eax
|
|
movaps %xmm5, -48(%edx)
|
|
test %eax, %eax
|
|
lea 16(%esi), %esi
|
|
jnz L(CopyFrom1To16Bytes)
|
|
|
|
movaps %xmm6, -32(%edx)
|
|
pcmpeqb %xmm7, %xmm0
|
|
# ifdef USE_AS_STRNCPY
|
|
lea -16(%ebx), %ebx
|
|
# endif
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%esi), %esi
|
|
jmp L(CopyFrom1To16Bytes)
|
|
|
|
.p2align 4
|
|
L(Shl1):
|
|
movaps -1(%ecx), %xmm1
|
|
movaps 15(%ecx), %xmm2
|
|
L(Shl1Start):
|
|
pcmpeqb %xmm2, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit1Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl1LoopExit)
|
|
|
|
palignr $1, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 31(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit1Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl1LoopExit)
|
|
|
|
palignr $1, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 31(%ecx), %xmm2
|
|
movaps %xmm3, %xmm1
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit1Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl1LoopExit)
|
|
|
|
palignr $1, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 31(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit1Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl1LoopExit)
|
|
|
|
palignr $1, %xmm3, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
lea 31(%ecx), %ecx
|
|
lea 16(%edx), %edx
|
|
|
|
mov %ecx, %eax
|
|
and $-0x40, %ecx
|
|
sub %ecx, %eax
|
|
lea -15(%ecx), %ecx
|
|
sub %eax, %edx
|
|
# ifdef USE_AS_STRNCPY
|
|
add %eax, %ebx
|
|
# endif
|
|
movaps -1(%ecx), %xmm1
|
|
|
|
L(Shl1LoopStart):
|
|
movaps 15(%ecx), %xmm2
|
|
movaps 31(%ecx), %xmm3
|
|
movaps %xmm3, %xmm6
|
|
movaps 47(%ecx), %xmm4
|
|
movaps %xmm4, %xmm7
|
|
movaps 63(%ecx), %xmm5
|
|
pminub %xmm2, %xmm6
|
|
pminub %xmm5, %xmm7
|
|
pminub %xmm6, %xmm7
|
|
pcmpeqb %xmm0, %xmm7
|
|
pmovmskb %xmm7, %eax
|
|
movaps %xmm5, %xmm7
|
|
palignr $1, %xmm4, %xmm5
|
|
test %eax, %eax
|
|
palignr $1, %xmm3, %xmm4
|
|
jnz L(Shl1Start)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $64, %ebx
|
|
jbe L(StrncpyLeave1)
|
|
# endif
|
|
palignr $1, %xmm2, %xmm3
|
|
lea 64(%ecx), %ecx
|
|
palignr $1, %xmm1, %xmm2
|
|
movaps %xmm7, %xmm1
|
|
movaps %xmm5, 48(%edx)
|
|
movaps %xmm4, 32(%edx)
|
|
movaps %xmm3, 16(%edx)
|
|
movaps %xmm2, (%edx)
|
|
lea 64(%edx), %edx
|
|
jmp L(Shl1LoopStart)
|
|
|
|
L(Shl1LoopExit):
|
|
movlpd (%ecx), %xmm0
|
|
movlpd %xmm0, (%edx)
|
|
movlpd 7(%ecx), %xmm0
|
|
movlpd %xmm0, 7(%edx)
|
|
mov $15, %esi
|
|
jmp L(CopyFrom1To16Bytes)
|
|
|
|
.p2align 4
|
|
L(Shl2):
|
|
movaps -2(%ecx), %xmm1
|
|
movaps 14(%ecx), %xmm2
|
|
L(Shl2Start):
|
|
pcmpeqb %xmm2, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit2Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl2LoopExit)
|
|
|
|
palignr $2, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 30(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit2Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl2LoopExit)
|
|
|
|
palignr $2, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 30(%ecx), %xmm2
|
|
movaps %xmm3, %xmm1
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit2Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl2LoopExit)
|
|
|
|
palignr $2, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 30(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit2Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl2LoopExit)
|
|
|
|
palignr $2, %xmm3, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
lea 30(%ecx), %ecx
|
|
lea 16(%edx), %edx
|
|
|
|
mov %ecx, %eax
|
|
and $-0x40, %ecx
|
|
sub %ecx, %eax
|
|
lea -14(%ecx), %ecx
|
|
sub %eax, %edx
|
|
# ifdef USE_AS_STRNCPY
|
|
add %eax, %ebx
|
|
# endif
|
|
movaps -2(%ecx), %xmm1
|
|
|
|
L(Shl2LoopStart):
|
|
movaps 14(%ecx), %xmm2
|
|
movaps 30(%ecx), %xmm3
|
|
movaps %xmm3, %xmm6
|
|
movaps 46(%ecx), %xmm4
|
|
movaps %xmm4, %xmm7
|
|
movaps 62(%ecx), %xmm5
|
|
pminub %xmm2, %xmm6
|
|
pminub %xmm5, %xmm7
|
|
pminub %xmm6, %xmm7
|
|
pcmpeqb %xmm0, %xmm7
|
|
pmovmskb %xmm7, %eax
|
|
movaps %xmm5, %xmm7
|
|
palignr $2, %xmm4, %xmm5
|
|
test %eax, %eax
|
|
palignr $2, %xmm3, %xmm4
|
|
jnz L(Shl2Start)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $64, %ebx
|
|
jbe L(StrncpyLeave2)
|
|
# endif
|
|
palignr $2, %xmm2, %xmm3
|
|
lea 64(%ecx), %ecx
|
|
palignr $2, %xmm1, %xmm2
|
|
movaps %xmm7, %xmm1
|
|
movaps %xmm5, 48(%edx)
|
|
movaps %xmm4, 32(%edx)
|
|
movaps %xmm3, 16(%edx)
|
|
movaps %xmm2, (%edx)
|
|
lea 64(%edx), %edx
|
|
jmp L(Shl2LoopStart)
|
|
|
|
L(Shl2LoopExit):
|
|
movlpd (%ecx), %xmm0
|
|
movlpd 6(%ecx), %xmm1
|
|
movlpd %xmm0, (%edx)
|
|
movlpd %xmm1, 6(%edx)
|
|
mov $14, %esi
|
|
jmp L(CopyFrom1To16Bytes)
|
|
|
|
.p2align 4
|
|
L(Shl3):
|
|
movaps -3(%ecx), %xmm1
|
|
movaps 13(%ecx), %xmm2
|
|
L(Shl3Start):
|
|
pcmpeqb %xmm2, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit3Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl3LoopExit)
|
|
|
|
palignr $3, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 29(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit3Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl3LoopExit)
|
|
|
|
palignr $3, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 29(%ecx), %xmm2
|
|
movaps %xmm3, %xmm1
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit3Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl3LoopExit)
|
|
|
|
palignr $3, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 29(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit3Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl3LoopExit)
|
|
|
|
palignr $3, %xmm3, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
lea 29(%ecx), %ecx
|
|
lea 16(%edx), %edx
|
|
|
|
mov %ecx, %eax
|
|
and $-0x40, %ecx
|
|
sub %ecx, %eax
|
|
lea -13(%ecx), %ecx
|
|
sub %eax, %edx
|
|
# ifdef USE_AS_STRNCPY
|
|
add %eax, %ebx
|
|
# endif
|
|
movaps -3(%ecx), %xmm1
|
|
|
|
L(Shl3LoopStart):
|
|
movaps 13(%ecx), %xmm2
|
|
movaps 29(%ecx), %xmm3
|
|
movaps %xmm3, %xmm6
|
|
movaps 45(%ecx), %xmm4
|
|
movaps %xmm4, %xmm7
|
|
movaps 61(%ecx), %xmm5
|
|
pminub %xmm2, %xmm6
|
|
pminub %xmm5, %xmm7
|
|
pminub %xmm6, %xmm7
|
|
pcmpeqb %xmm0, %xmm7
|
|
pmovmskb %xmm7, %eax
|
|
movaps %xmm5, %xmm7
|
|
palignr $3, %xmm4, %xmm5
|
|
test %eax, %eax
|
|
palignr $3, %xmm3, %xmm4
|
|
jnz L(Shl3Start)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $64, %ebx
|
|
jbe L(StrncpyLeave3)
|
|
# endif
|
|
palignr $3, %xmm2, %xmm3
|
|
lea 64(%ecx), %ecx
|
|
palignr $3, %xmm1, %xmm2
|
|
movaps %xmm7, %xmm1
|
|
movaps %xmm5, 48(%edx)
|
|
movaps %xmm4, 32(%edx)
|
|
movaps %xmm3, 16(%edx)
|
|
movaps %xmm2, (%edx)
|
|
lea 64(%edx), %edx
|
|
jmp L(Shl3LoopStart)
|
|
|
|
L(Shl3LoopExit):
|
|
movlpd (%ecx), %xmm0
|
|
movlpd 5(%ecx), %xmm1
|
|
movlpd %xmm0, (%edx)
|
|
movlpd %xmm1, 5(%edx)
|
|
mov $13, %esi
|
|
jmp L(CopyFrom1To16Bytes)
|
|
|
|
.p2align 4
|
|
L(Shl4):
|
|
movaps -4(%ecx), %xmm1
|
|
movaps 12(%ecx), %xmm2
|
|
L(Shl4Start):
|
|
pcmpeqb %xmm2, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit4Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl4LoopExit)
|
|
|
|
palignr $4, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 28(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit4Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl4LoopExit)
|
|
|
|
palignr $4, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 28(%ecx), %xmm2
|
|
movaps %xmm3, %xmm1
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit4Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl4LoopExit)
|
|
|
|
palignr $4, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 28(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit4Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl4LoopExit)
|
|
|
|
palignr $4, %xmm3, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
lea 28(%ecx), %ecx
|
|
lea 16(%edx), %edx
|
|
|
|
mov %ecx, %eax
|
|
and $-0x40, %ecx
|
|
sub %ecx, %eax
|
|
lea -12(%ecx), %ecx
|
|
sub %eax, %edx
|
|
# ifdef USE_AS_STRNCPY
|
|
add %eax, %ebx
|
|
# endif
|
|
movaps -4(%ecx), %xmm1
|
|
|
|
L(Shl4LoopStart):
|
|
movaps 12(%ecx), %xmm2
|
|
movaps 28(%ecx), %xmm3
|
|
movaps %xmm3, %xmm6
|
|
movaps 44(%ecx), %xmm4
|
|
movaps %xmm4, %xmm7
|
|
movaps 60(%ecx), %xmm5
|
|
pminub %xmm2, %xmm6
|
|
pminub %xmm5, %xmm7
|
|
pminub %xmm6, %xmm7
|
|
pcmpeqb %xmm0, %xmm7
|
|
pmovmskb %xmm7, %eax
|
|
movaps %xmm5, %xmm7
|
|
palignr $4, %xmm4, %xmm5
|
|
test %eax, %eax
|
|
palignr $4, %xmm3, %xmm4
|
|
jnz L(Shl4Start)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $64, %ebx
|
|
jbe L(StrncpyLeave4)
|
|
# endif
|
|
palignr $4, %xmm2, %xmm3
|
|
lea 64(%ecx), %ecx
|
|
palignr $4, %xmm1, %xmm2
|
|
movaps %xmm7, %xmm1
|
|
movaps %xmm5, 48(%edx)
|
|
movaps %xmm4, 32(%edx)
|
|
movaps %xmm3, 16(%edx)
|
|
movaps %xmm2, (%edx)
|
|
lea 64(%edx), %edx
|
|
jmp L(Shl4LoopStart)
|
|
|
|
L(Shl4LoopExit):
|
|
movlpd (%ecx), %xmm0
|
|
movl 8(%ecx), %esi
|
|
movlpd %xmm0, (%edx)
|
|
movl %esi, 8(%edx)
|
|
mov $12, %esi
|
|
jmp L(CopyFrom1To16Bytes)
|
|
|
|
.p2align 4
|
|
L(Shl5):
|
|
movaps -5(%ecx), %xmm1
|
|
movaps 11(%ecx), %xmm2
|
|
L(Shl5Start):
|
|
pcmpeqb %xmm2, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit5Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl5LoopExit)
|
|
|
|
palignr $5, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 27(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit5Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl5LoopExit)
|
|
|
|
palignr $5, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 27(%ecx), %xmm2
|
|
movaps %xmm3, %xmm1
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit5Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl5LoopExit)
|
|
|
|
palignr $5, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 27(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit5Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl5LoopExit)
|
|
|
|
palignr $5, %xmm3, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
lea 27(%ecx), %ecx
|
|
lea 16(%edx), %edx
|
|
|
|
mov %ecx, %eax
|
|
and $-0x40, %ecx
|
|
sub %ecx, %eax
|
|
lea -11(%ecx), %ecx
|
|
sub %eax, %edx
|
|
# ifdef USE_AS_STRNCPY
|
|
add %eax, %ebx
|
|
# endif
|
|
movaps -5(%ecx), %xmm1
|
|
|
|
L(Shl5LoopStart):
|
|
movaps 11(%ecx), %xmm2
|
|
movaps 27(%ecx), %xmm3
|
|
movaps %xmm3, %xmm6
|
|
movaps 43(%ecx), %xmm4
|
|
movaps %xmm4, %xmm7
|
|
movaps 59(%ecx), %xmm5
|
|
pminub %xmm2, %xmm6
|
|
pminub %xmm5, %xmm7
|
|
pminub %xmm6, %xmm7
|
|
pcmpeqb %xmm0, %xmm7
|
|
pmovmskb %xmm7, %eax
|
|
movaps %xmm5, %xmm7
|
|
palignr $5, %xmm4, %xmm5
|
|
test %eax, %eax
|
|
palignr $5, %xmm3, %xmm4
|
|
jnz L(Shl5Start)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $64, %ebx
|
|
jbe L(StrncpyLeave5)
|
|
# endif
|
|
palignr $5, %xmm2, %xmm3
|
|
lea 64(%ecx), %ecx
|
|
palignr $5, %xmm1, %xmm2
|
|
movaps %xmm7, %xmm1
|
|
movaps %xmm5, 48(%edx)
|
|
movaps %xmm4, 32(%edx)
|
|
movaps %xmm3, 16(%edx)
|
|
movaps %xmm2, (%edx)
|
|
lea 64(%edx), %edx
|
|
jmp L(Shl5LoopStart)
|
|
|
|
L(Shl5LoopExit):
|
|
movlpd (%ecx), %xmm0
|
|
movl 7(%ecx), %esi
|
|
movlpd %xmm0, (%edx)
|
|
movl %esi, 7(%edx)
|
|
mov $11, %esi
|
|
jmp L(CopyFrom1To16Bytes)
|
|
|
|
.p2align 4
|
|
L(Shl6):
|
|
movaps -6(%ecx), %xmm1
|
|
movaps 10(%ecx), %xmm2
|
|
L(Shl6Start):
|
|
pcmpeqb %xmm2, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit6Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl6LoopExit)
|
|
|
|
palignr $6, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 26(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit6Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl6LoopExit)
|
|
|
|
palignr $6, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 26(%ecx), %xmm2
|
|
movaps %xmm3, %xmm1
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit6Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl6LoopExit)
|
|
|
|
palignr $6, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 26(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit6Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl6LoopExit)
|
|
|
|
palignr $6, %xmm3, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
lea 26(%ecx), %ecx
|
|
lea 16(%edx), %edx
|
|
|
|
mov %ecx, %eax
|
|
and $-0x40, %ecx
|
|
sub %ecx, %eax
|
|
lea -10(%ecx), %ecx
|
|
sub %eax, %edx
|
|
# ifdef USE_AS_STRNCPY
|
|
add %eax, %ebx
|
|
# endif
|
|
movaps -6(%ecx), %xmm1
|
|
|
|
L(Shl6LoopStart):
|
|
movaps 10(%ecx), %xmm2
|
|
movaps 26(%ecx), %xmm3
|
|
movaps %xmm3, %xmm6
|
|
movaps 42(%ecx), %xmm4
|
|
movaps %xmm4, %xmm7
|
|
movaps 58(%ecx), %xmm5
|
|
pminub %xmm2, %xmm6
|
|
pminub %xmm5, %xmm7
|
|
pminub %xmm6, %xmm7
|
|
pcmpeqb %xmm0, %xmm7
|
|
pmovmskb %xmm7, %eax
|
|
movaps %xmm5, %xmm7
|
|
palignr $6, %xmm4, %xmm5
|
|
test %eax, %eax
|
|
palignr $6, %xmm3, %xmm4
|
|
jnz L(Shl6Start)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $64, %ebx
|
|
jbe L(StrncpyLeave6)
|
|
# endif
|
|
palignr $6, %xmm2, %xmm3
|
|
lea 64(%ecx), %ecx
|
|
palignr $6, %xmm1, %xmm2
|
|
movaps %xmm7, %xmm1
|
|
movaps %xmm5, 48(%edx)
|
|
movaps %xmm4, 32(%edx)
|
|
movaps %xmm3, 16(%edx)
|
|
movaps %xmm2, (%edx)
|
|
lea 64(%edx), %edx
|
|
jmp L(Shl6LoopStart)
|
|
|
|
L(Shl6LoopExit):
|
|
movlpd (%ecx), %xmm0
|
|
movl 6(%ecx), %esi
|
|
movlpd %xmm0, (%edx)
|
|
movl %esi, 6(%edx)
|
|
mov $10, %esi
|
|
jmp L(CopyFrom1To16Bytes)
|
|
|
|
.p2align 4
|
|
L(Shl7):
|
|
movaps -7(%ecx), %xmm1
|
|
movaps 9(%ecx), %xmm2
|
|
L(Shl7Start):
|
|
pcmpeqb %xmm2, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit7Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl7LoopExit)
|
|
|
|
palignr $7, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 25(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit7Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl7LoopExit)
|
|
|
|
palignr $7, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 25(%ecx), %xmm2
|
|
movaps %xmm3, %xmm1
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit7Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl7LoopExit)
|
|
|
|
palignr $7, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 25(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit7Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl7LoopExit)
|
|
|
|
palignr $7, %xmm3, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
lea 25(%ecx), %ecx
|
|
lea 16(%edx), %edx
|
|
|
|
mov %ecx, %eax
|
|
and $-0x40, %ecx
|
|
sub %ecx, %eax
|
|
lea -9(%ecx), %ecx
|
|
sub %eax, %edx
|
|
# ifdef USE_AS_STRNCPY
|
|
add %eax, %ebx
|
|
# endif
|
|
movaps -7(%ecx), %xmm1
|
|
|
|
L(Shl7LoopStart):
|
|
movaps 9(%ecx), %xmm2
|
|
movaps 25(%ecx), %xmm3
|
|
movaps %xmm3, %xmm6
|
|
movaps 41(%ecx), %xmm4
|
|
movaps %xmm4, %xmm7
|
|
movaps 57(%ecx), %xmm5
|
|
pminub %xmm2, %xmm6
|
|
pminub %xmm5, %xmm7
|
|
pminub %xmm6, %xmm7
|
|
pcmpeqb %xmm0, %xmm7
|
|
pmovmskb %xmm7, %eax
|
|
movaps %xmm5, %xmm7
|
|
palignr $7, %xmm4, %xmm5
|
|
test %eax, %eax
|
|
palignr $7, %xmm3, %xmm4
|
|
jnz L(Shl7Start)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $64, %ebx
|
|
jbe L(StrncpyLeave7)
|
|
# endif
|
|
palignr $7, %xmm2, %xmm3
|
|
lea 64(%ecx), %ecx
|
|
palignr $7, %xmm1, %xmm2
|
|
movaps %xmm7, %xmm1
|
|
movaps %xmm5, 48(%edx)
|
|
movaps %xmm4, 32(%edx)
|
|
movaps %xmm3, 16(%edx)
|
|
movaps %xmm2, (%edx)
|
|
lea 64(%edx), %edx
|
|
jmp L(Shl7LoopStart)
|
|
|
|
L(Shl7LoopExit):
|
|
movlpd (%ecx), %xmm0
|
|
movl 5(%ecx), %esi
|
|
movlpd %xmm0, (%edx)
|
|
movl %esi, 5(%edx)
|
|
mov $9, %esi
|
|
jmp L(CopyFrom1To16Bytes)
|
|
|
|
.p2align 4
|
|
L(Shl8):
|
|
movaps -8(%ecx), %xmm1
|
|
movaps 8(%ecx), %xmm2
|
|
L(Shl8Start):
|
|
pcmpeqb %xmm2, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit8Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl8LoopExit)
|
|
|
|
palignr $8, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 24(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit8Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl8LoopExit)
|
|
|
|
palignr $8, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 24(%ecx), %xmm2
|
|
movaps %xmm3, %xmm1
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit8Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl8LoopExit)
|
|
|
|
palignr $8, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 24(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit8Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl8LoopExit)
|
|
|
|
palignr $8, %xmm3, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
lea 24(%ecx), %ecx
|
|
lea 16(%edx), %edx
|
|
|
|
mov %ecx, %eax
|
|
and $-0x40, %ecx
|
|
sub %ecx, %eax
|
|
lea -8(%ecx), %ecx
|
|
sub %eax, %edx
|
|
# ifdef USE_AS_STRNCPY
|
|
add %eax, %ebx
|
|
# endif
|
|
movaps -8(%ecx), %xmm1
|
|
|
|
L(Shl8LoopStart):
|
|
movaps 8(%ecx), %xmm2
|
|
movaps 24(%ecx), %xmm3
|
|
movaps %xmm3, %xmm6
|
|
movaps 40(%ecx), %xmm4
|
|
movaps %xmm4, %xmm7
|
|
movaps 56(%ecx), %xmm5
|
|
pminub %xmm2, %xmm6
|
|
pminub %xmm5, %xmm7
|
|
pminub %xmm6, %xmm7
|
|
pcmpeqb %xmm0, %xmm7
|
|
pmovmskb %xmm7, %eax
|
|
movaps %xmm5, %xmm7
|
|
palignr $8, %xmm4, %xmm5
|
|
test %eax, %eax
|
|
palignr $8, %xmm3, %xmm4
|
|
jnz L(Shl8Start)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $64, %ebx
|
|
jbe L(StrncpyLeave8)
|
|
# endif
|
|
palignr $8, %xmm2, %xmm3
|
|
lea 64(%ecx), %ecx
|
|
palignr $8, %xmm1, %xmm2
|
|
movaps %xmm7, %xmm1
|
|
movaps %xmm5, 48(%edx)
|
|
movaps %xmm4, 32(%edx)
|
|
movaps %xmm3, 16(%edx)
|
|
movaps %xmm2, (%edx)
|
|
lea 64(%edx), %edx
|
|
jmp L(Shl8LoopStart)
|
|
|
|
L(Shl8LoopExit):
|
|
movlpd (%ecx), %xmm0
|
|
movlpd %xmm0, (%edx)
|
|
mov $8, %esi
|
|
jmp L(CopyFrom1To16Bytes)
|
|
|
|
.p2align 4
|
|
L(Shl9):
|
|
movaps -9(%ecx), %xmm1
|
|
movaps 7(%ecx), %xmm2
|
|
L(Shl9Start):
|
|
pcmpeqb %xmm2, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit9Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl9LoopExit)
|
|
|
|
palignr $9, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 23(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit9Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl9LoopExit)
|
|
|
|
palignr $9, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 23(%ecx), %xmm2
|
|
movaps %xmm3, %xmm1
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit9Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl9LoopExit)
|
|
|
|
palignr $9, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 23(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit9Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl9LoopExit)
|
|
|
|
palignr $9, %xmm3, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
lea 23(%ecx), %ecx
|
|
lea 16(%edx), %edx
|
|
|
|
mov %ecx, %eax
|
|
and $-0x40, %ecx
|
|
sub %ecx, %eax
|
|
lea -7(%ecx), %ecx
|
|
sub %eax, %edx
|
|
# ifdef USE_AS_STRNCPY
|
|
add %eax, %ebx
|
|
# endif
|
|
movaps -9(%ecx), %xmm1
|
|
|
|
L(Shl9LoopStart):
|
|
movaps 7(%ecx), %xmm2
|
|
movaps 23(%ecx), %xmm3
|
|
movaps %xmm3, %xmm6
|
|
movaps 39(%ecx), %xmm4
|
|
movaps %xmm4, %xmm7
|
|
movaps 55(%ecx), %xmm5
|
|
pminub %xmm2, %xmm6
|
|
pminub %xmm5, %xmm7
|
|
pminub %xmm6, %xmm7
|
|
pcmpeqb %xmm0, %xmm7
|
|
pmovmskb %xmm7, %eax
|
|
movaps %xmm5, %xmm7
|
|
palignr $9, %xmm4, %xmm5
|
|
test %eax, %eax
|
|
palignr $9, %xmm3, %xmm4
|
|
jnz L(Shl9Start)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $64, %ebx
|
|
jbe L(StrncpyLeave9)
|
|
# endif
|
|
palignr $9, %xmm2, %xmm3
|
|
lea 64(%ecx), %ecx
|
|
palignr $9, %xmm1, %xmm2
|
|
movaps %xmm7, %xmm1
|
|
movaps %xmm5, 48(%edx)
|
|
movaps %xmm4, 32(%edx)
|
|
movaps %xmm3, 16(%edx)
|
|
movaps %xmm2, (%edx)
|
|
lea 64(%edx), %edx
|
|
jmp L(Shl9LoopStart)
|
|
|
|
L(Shl9LoopExit):
|
|
movlpd -1(%ecx), %xmm0
|
|
movlpd %xmm0, -1(%edx)
|
|
mov $7, %esi
|
|
jmp L(CopyFrom1To16Bytes)
|
|
|
|
.p2align 4
|
|
L(Shl10):
|
|
movaps -10(%ecx), %xmm1
|
|
movaps 6(%ecx), %xmm2
|
|
L(Shl10Start):
|
|
pcmpeqb %xmm2, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit10Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl10LoopExit)
|
|
|
|
palignr $10, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 22(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit10Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl10LoopExit)
|
|
|
|
palignr $10, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 22(%ecx), %xmm2
|
|
movaps %xmm3, %xmm1
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit10Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl10LoopExit)
|
|
|
|
palignr $10, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 22(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit10Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl10LoopExit)
|
|
|
|
palignr $10, %xmm3, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
lea 22(%ecx), %ecx
|
|
lea 16(%edx), %edx
|
|
|
|
mov %ecx, %eax
|
|
and $-0x40, %ecx
|
|
sub %ecx, %eax
|
|
lea -6(%ecx), %ecx
|
|
sub %eax, %edx
|
|
# ifdef USE_AS_STRNCPY
|
|
add %eax, %ebx
|
|
# endif
|
|
movaps -10(%ecx), %xmm1
|
|
|
|
L(Shl10LoopStart):
|
|
movaps 6(%ecx), %xmm2
|
|
movaps 22(%ecx), %xmm3
|
|
movaps %xmm3, %xmm6
|
|
movaps 38(%ecx), %xmm4
|
|
movaps %xmm4, %xmm7
|
|
movaps 54(%ecx), %xmm5
|
|
pminub %xmm2, %xmm6
|
|
pminub %xmm5, %xmm7
|
|
pminub %xmm6, %xmm7
|
|
pcmpeqb %xmm0, %xmm7
|
|
pmovmskb %xmm7, %eax
|
|
movaps %xmm5, %xmm7
|
|
palignr $10, %xmm4, %xmm5
|
|
test %eax, %eax
|
|
palignr $10, %xmm3, %xmm4
|
|
jnz L(Shl10Start)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $64, %ebx
|
|
jbe L(StrncpyLeave10)
|
|
# endif
|
|
palignr $10, %xmm2, %xmm3
|
|
lea 64(%ecx), %ecx
|
|
palignr $10, %xmm1, %xmm2
|
|
movaps %xmm7, %xmm1
|
|
movaps %xmm5, 48(%edx)
|
|
movaps %xmm4, 32(%edx)
|
|
movaps %xmm3, 16(%edx)
|
|
movaps %xmm2, (%edx)
|
|
lea 64(%edx), %edx
|
|
jmp L(Shl10LoopStart)
|
|
|
|
L(Shl10LoopExit):
|
|
movlpd -2(%ecx), %xmm0
|
|
movlpd %xmm0, -2(%edx)
|
|
mov $6, %esi
|
|
jmp L(CopyFrom1To16Bytes)
|
|
|
|
.p2align 4
|
|
L(Shl11):
|
|
movaps -11(%ecx), %xmm1
|
|
movaps 5(%ecx), %xmm2
|
|
L(Shl11Start):
|
|
pcmpeqb %xmm2, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit11Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl11LoopExit)
|
|
|
|
palignr $11, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 21(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit11Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl11LoopExit)
|
|
|
|
palignr $11, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 21(%ecx), %xmm2
|
|
movaps %xmm3, %xmm1
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit11Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl11LoopExit)
|
|
|
|
palignr $11, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 21(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit11Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl11LoopExit)
|
|
|
|
palignr $11, %xmm3, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
lea 21(%ecx), %ecx
|
|
lea 16(%edx), %edx
|
|
|
|
mov %ecx, %eax
|
|
and $-0x40, %ecx
|
|
sub %ecx, %eax
|
|
lea -5(%ecx), %ecx
|
|
sub %eax, %edx
|
|
# ifdef USE_AS_STRNCPY
|
|
add %eax, %ebx
|
|
# endif
|
|
movaps -11(%ecx), %xmm1
|
|
|
|
L(Shl11LoopStart):
|
|
movaps 5(%ecx), %xmm2
|
|
movaps 21(%ecx), %xmm3
|
|
movaps %xmm3, %xmm6
|
|
movaps 37(%ecx), %xmm4
|
|
movaps %xmm4, %xmm7
|
|
movaps 53(%ecx), %xmm5
|
|
pminub %xmm2, %xmm6
|
|
pminub %xmm5, %xmm7
|
|
pminub %xmm6, %xmm7
|
|
pcmpeqb %xmm0, %xmm7
|
|
pmovmskb %xmm7, %eax
|
|
movaps %xmm5, %xmm7
|
|
palignr $11, %xmm4, %xmm5
|
|
test %eax, %eax
|
|
palignr $11, %xmm3, %xmm4
|
|
jnz L(Shl11Start)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $64, %ebx
|
|
jbe L(StrncpyLeave11)
|
|
# endif
|
|
palignr $11, %xmm2, %xmm3
|
|
lea 64(%ecx), %ecx
|
|
palignr $11, %xmm1, %xmm2
|
|
movaps %xmm7, %xmm1
|
|
movaps %xmm5, 48(%edx)
|
|
movaps %xmm4, 32(%edx)
|
|
movaps %xmm3, 16(%edx)
|
|
movaps %xmm2, (%edx)
|
|
lea 64(%edx), %edx
|
|
jmp L(Shl11LoopStart)
|
|
|
|
L(Shl11LoopExit):
|
|
movlpd -3(%ecx), %xmm0
|
|
movlpd %xmm0, -3(%edx)
|
|
mov $5, %esi
|
|
jmp L(CopyFrom1To16Bytes)
|
|
|
|
.p2align 4
|
|
L(Shl12):
|
|
movaps -12(%ecx), %xmm1
|
|
movaps 4(%ecx), %xmm2
|
|
L(Shl12Start):
|
|
pcmpeqb %xmm2, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit12Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl12LoopExit)
|
|
|
|
palignr $12, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 20(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit12Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl12LoopExit)
|
|
|
|
palignr $12, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 20(%ecx), %xmm2
|
|
movaps %xmm3, %xmm1
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit12Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl12LoopExit)
|
|
|
|
palignr $12, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 20(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit12Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl12LoopExit)
|
|
|
|
palignr $12, %xmm3, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
lea 20(%ecx), %ecx
|
|
lea 16(%edx), %edx
|
|
|
|
mov %ecx, %eax
|
|
and $-0x40, %ecx
|
|
sub %ecx, %eax
|
|
lea -4(%ecx), %ecx
|
|
sub %eax, %edx
|
|
# ifdef USE_AS_STRNCPY
|
|
add %eax, %ebx
|
|
# endif
|
|
movaps -12(%ecx), %xmm1
|
|
|
|
L(Shl12LoopStart):
|
|
movaps 4(%ecx), %xmm2
|
|
movaps 20(%ecx), %xmm3
|
|
movaps %xmm3, %xmm6
|
|
movaps 36(%ecx), %xmm4
|
|
movaps %xmm4, %xmm7
|
|
movaps 52(%ecx), %xmm5
|
|
pminub %xmm2, %xmm6
|
|
pminub %xmm5, %xmm7
|
|
pminub %xmm6, %xmm7
|
|
pcmpeqb %xmm0, %xmm7
|
|
pmovmskb %xmm7, %eax
|
|
movaps %xmm5, %xmm7
|
|
palignr $12, %xmm4, %xmm5
|
|
test %eax, %eax
|
|
palignr $12, %xmm3, %xmm4
|
|
jnz L(Shl12Start)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $64, %ebx
|
|
jbe L(StrncpyLeave12)
|
|
# endif
|
|
palignr $12, %xmm2, %xmm3
|
|
lea 64(%ecx), %ecx
|
|
palignr $12, %xmm1, %xmm2
|
|
movaps %xmm7, %xmm1
|
|
movaps %xmm5, 48(%edx)
|
|
movaps %xmm4, 32(%edx)
|
|
movaps %xmm3, 16(%edx)
|
|
movaps %xmm2, (%edx)
|
|
lea 64(%edx), %edx
|
|
jmp L(Shl12LoopStart)
|
|
|
|
L(Shl12LoopExit):
|
|
movl (%ecx), %esi
|
|
movl %esi, (%edx)
|
|
mov $4, %esi
|
|
jmp L(CopyFrom1To16Bytes)
|
|
|
|
.p2align 4
|
|
L(Shl13):
|
|
movaps -13(%ecx), %xmm1
|
|
movaps 3(%ecx), %xmm2
|
|
L(Shl13Start):
|
|
pcmpeqb %xmm2, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit13Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl13LoopExit)
|
|
|
|
palignr $13, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 19(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit13Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl13LoopExit)
|
|
|
|
palignr $13, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 19(%ecx), %xmm2
|
|
movaps %xmm3, %xmm1
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit13Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl13LoopExit)
|
|
|
|
palignr $13, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 19(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit13Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl13LoopExit)
|
|
|
|
palignr $13, %xmm3, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
lea 19(%ecx), %ecx
|
|
lea 16(%edx), %edx
|
|
|
|
mov %ecx, %eax
|
|
and $-0x40, %ecx
|
|
sub %ecx, %eax
|
|
lea -3(%ecx), %ecx
|
|
sub %eax, %edx
|
|
# ifdef USE_AS_STRNCPY
|
|
add %eax, %ebx
|
|
# endif
|
|
movaps -13(%ecx), %xmm1
|
|
|
|
L(Shl13LoopStart):
|
|
movaps 3(%ecx), %xmm2
|
|
movaps 19(%ecx), %xmm3
|
|
movaps %xmm3, %xmm6
|
|
movaps 35(%ecx), %xmm4
|
|
movaps %xmm4, %xmm7
|
|
movaps 51(%ecx), %xmm5
|
|
pminub %xmm2, %xmm6
|
|
pminub %xmm5, %xmm7
|
|
pminub %xmm6, %xmm7
|
|
pcmpeqb %xmm0, %xmm7
|
|
pmovmskb %xmm7, %eax
|
|
movaps %xmm5, %xmm7
|
|
palignr $13, %xmm4, %xmm5
|
|
test %eax, %eax
|
|
palignr $13, %xmm3, %xmm4
|
|
jnz L(Shl13Start)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $64, %ebx
|
|
jbe L(StrncpyLeave13)
|
|
# endif
|
|
palignr $13, %xmm2, %xmm3
|
|
lea 64(%ecx), %ecx
|
|
palignr $13, %xmm1, %xmm2
|
|
movaps %xmm7, %xmm1
|
|
movaps %xmm5, 48(%edx)
|
|
movaps %xmm4, 32(%edx)
|
|
movaps %xmm3, 16(%edx)
|
|
movaps %xmm2, (%edx)
|
|
lea 64(%edx), %edx
|
|
jmp L(Shl13LoopStart)
|
|
|
|
L(Shl13LoopExit):
|
|
movl -1(%ecx), %esi
|
|
movl %esi, -1(%edx)
|
|
mov $3, %esi
|
|
jmp L(CopyFrom1To16Bytes)
|
|
|
|
.p2align 4
|
|
L(Shl14):
|
|
movaps -14(%ecx), %xmm1
|
|
movaps 2(%ecx), %xmm2
|
|
L(Shl14Start):
|
|
pcmpeqb %xmm2, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit14Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl14LoopExit)
|
|
|
|
palignr $14, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 18(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit14Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl14LoopExit)
|
|
|
|
palignr $14, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 18(%ecx), %xmm2
|
|
movaps %xmm3, %xmm1
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit14Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl14LoopExit)
|
|
|
|
palignr $14, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 18(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit14Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl14LoopExit)
|
|
|
|
palignr $14, %xmm3, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
lea 18(%ecx), %ecx
|
|
lea 16(%edx), %edx
|
|
|
|
mov %ecx, %eax
|
|
and $-0x40, %ecx
|
|
sub %ecx, %eax
|
|
lea -2(%ecx), %ecx
|
|
sub %eax, %edx
|
|
# ifdef USE_AS_STRNCPY
|
|
add %eax, %ebx
|
|
# endif
|
|
movaps -14(%ecx), %xmm1
|
|
|
|
L(Shl14LoopStart):
|
|
movaps 2(%ecx), %xmm2
|
|
movaps 18(%ecx), %xmm3
|
|
movaps %xmm3, %xmm6
|
|
movaps 34(%ecx), %xmm4
|
|
movaps %xmm4, %xmm7
|
|
movaps 50(%ecx), %xmm5
|
|
pminub %xmm2, %xmm6
|
|
pminub %xmm5, %xmm7
|
|
pminub %xmm6, %xmm7
|
|
pcmpeqb %xmm0, %xmm7
|
|
pmovmskb %xmm7, %eax
|
|
movaps %xmm5, %xmm7
|
|
palignr $14, %xmm4, %xmm5
|
|
test %eax, %eax
|
|
palignr $14, %xmm3, %xmm4
|
|
jnz L(Shl14Start)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $64, %ebx
|
|
jbe L(StrncpyLeave14)
|
|
# endif
|
|
palignr $14, %xmm2, %xmm3
|
|
lea 64(%ecx), %ecx
|
|
palignr $14, %xmm1, %xmm2
|
|
movaps %xmm7, %xmm1
|
|
movaps %xmm5, 48(%edx)
|
|
movaps %xmm4, 32(%edx)
|
|
movaps %xmm3, 16(%edx)
|
|
movaps %xmm2, (%edx)
|
|
lea 64(%edx), %edx
|
|
jmp L(Shl14LoopStart)
|
|
|
|
L(Shl14LoopExit):
|
|
movl -2(%ecx), %esi
|
|
movl %esi, -2(%edx)
|
|
mov $2, %esi
|
|
jmp L(CopyFrom1To16Bytes)
|
|
|
|
.p2align 4
|
|
L(Shl15):
|
|
movaps -15(%ecx), %xmm1
|
|
movaps 1(%ecx), %xmm2
|
|
L(Shl15Start):
|
|
pcmpeqb %xmm2, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit15Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl15LoopExit)
|
|
|
|
palignr $15, %xmm1, %xmm2
|
|
movaps %xmm3, %xmm1
|
|
movaps %xmm2, (%edx)
|
|
movaps 17(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit15Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl15LoopExit)
|
|
|
|
palignr $15, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 17(%ecx), %xmm2
|
|
movaps %xmm3, %xmm1
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
movaps %xmm2, %xmm3
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit15Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl15LoopExit)
|
|
|
|
palignr $15, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 17(%ecx), %xmm2
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
lea 16(%edx), %edx
|
|
pmovmskb %xmm0, %eax
|
|
lea 16(%ecx), %ecx
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit15Case2OrCase3)
|
|
# endif
|
|
test %eax, %eax
|
|
jnz L(Shl15LoopExit)
|
|
|
|
palignr $15, %xmm3, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
lea 17(%ecx), %ecx
|
|
lea 16(%edx), %edx
|
|
|
|
mov %ecx, %eax
|
|
and $-0x40, %ecx
|
|
sub %ecx, %eax
|
|
lea -1(%ecx), %ecx
|
|
sub %eax, %edx
|
|
# ifdef USE_AS_STRNCPY
|
|
add %eax, %ebx
|
|
# endif
|
|
movaps -15(%ecx), %xmm1
|
|
|
|
L(Shl15LoopStart):
|
|
movaps 1(%ecx), %xmm2
|
|
movaps 17(%ecx), %xmm3
|
|
movaps %xmm3, %xmm6
|
|
movaps 33(%ecx), %xmm4
|
|
movaps %xmm4, %xmm7
|
|
movaps 49(%ecx), %xmm5
|
|
pminub %xmm2, %xmm6
|
|
pminub %xmm5, %xmm7
|
|
pminub %xmm6, %xmm7
|
|
pcmpeqb %xmm0, %xmm7
|
|
pmovmskb %xmm7, %eax
|
|
movaps %xmm5, %xmm7
|
|
palignr $15, %xmm4, %xmm5
|
|
test %eax, %eax
|
|
palignr $15, %xmm3, %xmm4
|
|
jnz L(Shl15Start)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $64, %ebx
|
|
jbe L(StrncpyLeave15)
|
|
# endif
|
|
palignr $15, %xmm2, %xmm3
|
|
lea 64(%ecx), %ecx
|
|
palignr $15, %xmm1, %xmm2
|
|
movaps %xmm7, %xmm1
|
|
movaps %xmm5, 48(%edx)
|
|
movaps %xmm4, 32(%edx)
|
|
movaps %xmm3, 16(%edx)
|
|
movaps %xmm2, (%edx)
|
|
lea 64(%edx), %edx
|
|
jmp L(Shl15LoopStart)
|
|
|
|
L(Shl15LoopExit):
|
|
movl -3(%ecx), %esi
|
|
movl %esi, -3(%edx)
|
|
mov $1, %esi
|
|
# ifdef USE_AS_STRCAT
|
|
jmp L(CopyFrom1To16Bytes)
|
|
# endif
|
|
|
|
|
|
# ifndef USE_AS_STRCAT
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To16Bytes):
|
|
# ifdef USE_AS_STRNCPY
|
|
add $16, %ebx
|
|
# endif
|
|
add %esi, %edx
|
|
add %esi, %ecx
|
|
|
|
POP (%esi)
|
|
test %al, %al
|
|
jz L(ExitHigh8)
|
|
|
|
L(CopyFrom1To16BytesLess8):
|
|
mov %al, %ah
|
|
and $15, %ah
|
|
jz L(ExitHigh4)
|
|
|
|
test $0x01, %al
|
|
jnz L(Exit1)
|
|
test $0x02, %al
|
|
jnz L(Exit2)
|
|
test $0x04, %al
|
|
jnz L(Exit3)
|
|
|
|
.p2align 4
|
|
L(Exit4):
|
|
movl (%ecx), %eax
|
|
movl %eax, (%edx)
|
|
SAVE_RESULT (3)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $4, %ebx
|
|
lea 4(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero1)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN1
|
|
|
|
.p2align 4
|
|
L(ExitHigh4):
|
|
test $0x10, %al
|
|
jnz L(Exit5)
|
|
test $0x20, %al
|
|
jnz L(Exit6)
|
|
test $0x40, %al
|
|
jnz L(Exit7)
|
|
|
|
.p2align 4
|
|
L(Exit8):
|
|
movlpd (%ecx), %xmm0
|
|
movlpd %xmm0, (%edx)
|
|
SAVE_RESULT (7)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $8, %ebx
|
|
lea 8(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero1)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN1
|
|
|
|
.p2align 4
|
|
L(ExitHigh8):
|
|
mov %ah, %al
|
|
and $15, %al
|
|
jz L(ExitHigh12)
|
|
|
|
test $0x01, %ah
|
|
jnz L(Exit9)
|
|
test $0x02, %ah
|
|
jnz L(Exit10)
|
|
test $0x04, %ah
|
|
jnz L(Exit11)
|
|
|
|
.p2align 4
|
|
L(Exit12):
|
|
movlpd (%ecx), %xmm0
|
|
movl 8(%ecx), %eax
|
|
movlpd %xmm0, (%edx)
|
|
movl %eax, 8(%edx)
|
|
SAVE_RESULT (11)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $12, %ebx
|
|
lea 12(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero1)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN1
|
|
|
|
.p2align 4
|
|
L(ExitHigh12):
|
|
test $0x10, %ah
|
|
jnz L(Exit13)
|
|
test $0x20, %ah
|
|
jnz L(Exit14)
|
|
test $0x40, %ah
|
|
jnz L(Exit15)
|
|
|
|
.p2align 4
|
|
L(Exit16):
|
|
movdqu (%ecx), %xmm0
|
|
movdqu %xmm0, (%edx)
|
|
SAVE_RESULT (15)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
lea 16(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero1)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN1
|
|
|
|
# ifdef USE_AS_STRNCPY
|
|
|
|
CFI_PUSH(%esi)
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To16BytesCase2):
|
|
add $16, %ebx
|
|
add %esi, %ecx
|
|
add %esi, %edx
|
|
|
|
POP (%esi)
|
|
|
|
test %al, %al
|
|
jz L(ExitHighCase2)
|
|
|
|
cmp $8, %ebx
|
|
ja L(CopyFrom1To16BytesLess8)
|
|
|
|
test $0x01, %al
|
|
jnz L(Exit1)
|
|
cmp $1, %ebx
|
|
je L(Exit1)
|
|
test $0x02, %al
|
|
jnz L(Exit2)
|
|
cmp $2, %ebx
|
|
je L(Exit2)
|
|
test $0x04, %al
|
|
jnz L(Exit3)
|
|
cmp $3, %ebx
|
|
je L(Exit3)
|
|
test $0x08, %al
|
|
jnz L(Exit4)
|
|
cmp $4, %ebx
|
|
je L(Exit4)
|
|
test $0x10, %al
|
|
jnz L(Exit5)
|
|
cmp $5, %ebx
|
|
je L(Exit5)
|
|
test $0x20, %al
|
|
jnz L(Exit6)
|
|
cmp $6, %ebx
|
|
je L(Exit6)
|
|
test $0x40, %al
|
|
jnz L(Exit7)
|
|
cmp $7, %ebx
|
|
je L(Exit7)
|
|
jmp L(Exit8)
|
|
|
|
.p2align 4
|
|
L(ExitHighCase2):
|
|
cmp $8, %ebx
|
|
jbe L(CopyFrom1To16BytesLess8Case3)
|
|
|
|
test $0x01, %ah
|
|
jnz L(Exit9)
|
|
cmp $9, %ebx
|
|
je L(Exit9)
|
|
test $0x02, %ah
|
|
jnz L(Exit10)
|
|
cmp $10, %ebx
|
|
je L(Exit10)
|
|
test $0x04, %ah
|
|
jnz L(Exit11)
|
|
cmp $11, %ebx
|
|
je L(Exit11)
|
|
test $0x8, %ah
|
|
jnz L(Exit12)
|
|
cmp $12, %ebx
|
|
je L(Exit12)
|
|
test $0x10, %ah
|
|
jnz L(Exit13)
|
|
cmp $13, %ebx
|
|
je L(Exit13)
|
|
test $0x20, %ah
|
|
jnz L(Exit14)
|
|
cmp $14, %ebx
|
|
je L(Exit14)
|
|
test $0x40, %ah
|
|
jnz L(Exit15)
|
|
cmp $15, %ebx
|
|
je L(Exit15)
|
|
jmp L(Exit16)
|
|
|
|
CFI_PUSH(%esi)
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To16BytesCase2OrCase3):
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16BytesCase2)
|
|
|
|
.p2align 4
|
|
L(CopyFrom1To16BytesCase3):
|
|
add $16, %ebx
|
|
add %esi, %edx
|
|
add %esi, %ecx
|
|
|
|
POP (%esi)
|
|
|
|
cmp $8, %ebx
|
|
ja L(ExitHigh8Case3)
|
|
|
|
L(CopyFrom1To16BytesLess8Case3):
|
|
cmp $4, %ebx
|
|
ja L(ExitHigh4Case3)
|
|
|
|
cmp $1, %ebx
|
|
je L(Exit1)
|
|
cmp $2, %ebx
|
|
je L(Exit2)
|
|
cmp $3, %ebx
|
|
je L(Exit3)
|
|
movl (%ecx), %eax
|
|
movl %eax, (%edx)
|
|
SAVE_RESULT (4)
|
|
RETURN1
|
|
|
|
.p2align 4
|
|
L(ExitHigh4Case3):
|
|
cmp $5, %ebx
|
|
je L(Exit5)
|
|
cmp $6, %ebx
|
|
je L(Exit6)
|
|
cmp $7, %ebx
|
|
je L(Exit7)
|
|
movlpd (%ecx), %xmm0
|
|
movlpd %xmm0, (%edx)
|
|
SAVE_RESULT (8)
|
|
RETURN1
|
|
|
|
.p2align 4
|
|
L(ExitHigh8Case3):
|
|
cmp $12, %ebx
|
|
ja L(ExitHigh12Case3)
|
|
|
|
cmp $9, %ebx
|
|
je L(Exit9)
|
|
cmp $10, %ebx
|
|
je L(Exit10)
|
|
cmp $11, %ebx
|
|
je L(Exit11)
|
|
movlpd (%ecx), %xmm0
|
|
movl 8(%ecx), %eax
|
|
movlpd %xmm0, (%edx)
|
|
movl %eax, 8(%edx)
|
|
SAVE_RESULT (12)
|
|
RETURN1
|
|
|
|
.p2align 4
|
|
L(ExitHigh12Case3):
|
|
cmp $13, %ebx
|
|
je L(Exit13)
|
|
cmp $14, %ebx
|
|
je L(Exit14)
|
|
cmp $15, %ebx
|
|
je L(Exit15)
|
|
movlpd (%ecx), %xmm0
|
|
movlpd 8(%ecx), %xmm1
|
|
movlpd %xmm0, (%edx)
|
|
movlpd %xmm1, 8(%edx)
|
|
SAVE_RESULT (16)
|
|
RETURN1
|
|
|
|
# endif
|
|
|
|
.p2align 4
|
|
L(Exit1):
|
|
movb (%ecx), %al
|
|
movb %al, (%edx)
|
|
SAVE_RESULT (0)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $1, %ebx
|
|
lea 1(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero1)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN1
|
|
|
|
.p2align 4
|
|
L(Exit2):
|
|
movw (%ecx), %ax
|
|
movw %ax, (%edx)
|
|
SAVE_RESULT (1)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $2, %ebx
|
|
lea 2(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero1)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN1
|
|
|
|
.p2align 4
|
|
L(Exit3):
|
|
movw (%ecx), %ax
|
|
movw %ax, (%edx)
|
|
movb 2(%ecx), %al
|
|
movb %al, 2(%edx)
|
|
SAVE_RESULT (2)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $3, %ebx
|
|
lea 3(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero1)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN1
|
|
|
|
.p2align 4
|
|
L(Exit5):
|
|
movl (%ecx), %eax
|
|
movl %eax, (%edx)
|
|
movb 4(%ecx), %al
|
|
movb %al, 4(%edx)
|
|
SAVE_RESULT (4)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $5, %ebx
|
|
lea 5(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero1)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN1
|
|
|
|
.p2align 4
|
|
L(Exit6):
|
|
movl (%ecx), %eax
|
|
movl %eax, (%edx)
|
|
movw 4(%ecx), %ax
|
|
movw %ax, 4(%edx)
|
|
SAVE_RESULT (5)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $6, %ebx
|
|
lea 6(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero1)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN1
|
|
|
|
.p2align 4
|
|
L(Exit7):
|
|
movl (%ecx), %eax
|
|
movl %eax, (%edx)
|
|
movl 3(%ecx), %eax
|
|
movl %eax, 3(%edx)
|
|
SAVE_RESULT (6)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $7, %ebx
|
|
lea 7(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero1)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN1
|
|
|
|
.p2align 4
|
|
L(Exit9):
|
|
movlpd (%ecx), %xmm0
|
|
movb 8(%ecx), %al
|
|
movlpd %xmm0, (%edx)
|
|
movb %al, 8(%edx)
|
|
SAVE_RESULT (8)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $9, %ebx
|
|
lea 9(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero1)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN1
|
|
|
|
.p2align 4
|
|
L(Exit10):
|
|
movlpd (%ecx), %xmm0
|
|
movw 8(%ecx), %ax
|
|
movlpd %xmm0, (%edx)
|
|
movw %ax, 8(%edx)
|
|
SAVE_RESULT (9)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $10, %ebx
|
|
lea 10(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero1)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN1
|
|
|
|
.p2align 4
|
|
L(Exit11):
|
|
movlpd (%ecx), %xmm0
|
|
movl 7(%ecx), %eax
|
|
movlpd %xmm0, (%edx)
|
|
movl %eax, 7(%edx)
|
|
SAVE_RESULT (10)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $11, %ebx
|
|
lea 11(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero1)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN1
|
|
|
|
.p2align 4
|
|
L(Exit13):
|
|
movlpd (%ecx), %xmm0
|
|
movlpd 5(%ecx), %xmm1
|
|
movlpd %xmm0, (%edx)
|
|
movlpd %xmm1, 5(%edx)
|
|
SAVE_RESULT (12)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $13, %ebx
|
|
lea 13(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero1)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN1
|
|
|
|
.p2align 4
|
|
L(Exit14):
|
|
movlpd (%ecx), %xmm0
|
|
movlpd 6(%ecx), %xmm1
|
|
movlpd %xmm0, (%edx)
|
|
movlpd %xmm1, 6(%edx)
|
|
SAVE_RESULT (13)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $14, %ebx
|
|
lea 14(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero1)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN1
|
|
|
|
.p2align 4
|
|
L(Exit15):
|
|
movlpd (%ecx), %xmm0
|
|
movlpd 7(%ecx), %xmm1
|
|
movlpd %xmm0, (%edx)
|
|
movlpd %xmm1, 7(%edx)
|
|
SAVE_RESULT (14)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $15, %ebx
|
|
lea 15(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero1)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN1
|
|
|
|
CFI_POP (%edi)
|
|
|
|
# ifdef USE_AS_STRNCPY
|
|
.p2align 4
|
|
L(Fill0):
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill1):
|
|
movb %dl, (%ecx)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill2):
|
|
movw %dx, (%ecx)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill3):
|
|
movw %dx, (%ecx)
|
|
movb %dl, 2(%ecx)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill4):
|
|
movl %edx, (%ecx)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill5):
|
|
movl %edx, (%ecx)
|
|
movb %dl, 4(%ecx)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill6):
|
|
movl %edx, (%ecx)
|
|
movw %dx, 4(%ecx)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill7):
|
|
movl %edx, (%ecx)
|
|
movl %edx, 3(%ecx)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill8):
|
|
movlpd %xmm0, (%ecx)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill9):
|
|
movlpd %xmm0, (%ecx)
|
|
movb %dl, 8(%ecx)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill10):
|
|
movlpd %xmm0, (%ecx)
|
|
movw %dx, 8(%ecx)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill11):
|
|
movlpd %xmm0, (%ecx)
|
|
movl %edx, 7(%ecx)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill12):
|
|
movlpd %xmm0, (%ecx)
|
|
movl %edx, 8(%ecx)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill13):
|
|
movlpd %xmm0, (%ecx)
|
|
movlpd %xmm0, 5(%ecx)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill14):
|
|
movlpd %xmm0, (%ecx)
|
|
movlpd %xmm0, 6(%ecx)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill15):
|
|
movlpd %xmm0, (%ecx)
|
|
movlpd %xmm0, 7(%ecx)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(Fill16):
|
|
movlpd %xmm0, (%ecx)
|
|
movlpd %xmm0, 8(%ecx)
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyFillExit1):
|
|
lea 16(%ebx), %ebx
|
|
L(FillFrom1To16Bytes):
|
|
test %ebx, %ebx
|
|
jz L(Fill0)
|
|
cmp $16, %ebx
|
|
je L(Fill16)
|
|
cmp $8, %ebx
|
|
je L(Fill8)
|
|
jg L(FillMore8)
|
|
cmp $4, %ebx
|
|
je L(Fill4)
|
|
jg L(FillMore4)
|
|
cmp $2, %ebx
|
|
jl L(Fill1)
|
|
je L(Fill2)
|
|
jg L(Fill3)
|
|
L(FillMore8): /* but less than 16 */
|
|
cmp $12, %ebx
|
|
je L(Fill12)
|
|
jl L(FillLess12)
|
|
cmp $14, %ebx
|
|
jl L(Fill13)
|
|
je L(Fill14)
|
|
jg L(Fill15)
|
|
L(FillMore4): /* but less than 8 */
|
|
cmp $6, %ebx
|
|
jl L(Fill5)
|
|
je L(Fill6)
|
|
jg L(Fill7)
|
|
L(FillLess12): /* but more than 8 */
|
|
cmp $10, %ebx
|
|
jl L(Fill9)
|
|
je L(Fill10)
|
|
jmp L(Fill11)
|
|
|
|
CFI_PUSH(%edi)
|
|
|
|
.p2align 4
|
|
L(StrncpyFillTailWithZero1):
|
|
POP (%edi)
|
|
L(StrncpyFillTailWithZero):
|
|
pxor %xmm0, %xmm0
|
|
xor %edx, %edx
|
|
sub $16, %ebx
|
|
jbe L(StrncpyFillExit1)
|
|
|
|
movlpd %xmm0, (%ecx)
|
|
movlpd %xmm0, 8(%ecx)
|
|
|
|
lea 16(%ecx), %ecx
|
|
|
|
mov %ecx, %edx
|
|
and $0xf, %edx
|
|
sub %edx, %ecx
|
|
add %edx, %ebx
|
|
xor %edx, %edx
|
|
sub $64, %ebx
|
|
jb L(StrncpyFillLess64)
|
|
|
|
L(StrncpyFillLoopMovdqa):
|
|
movdqa %xmm0, (%ecx)
|
|
movdqa %xmm0, 16(%ecx)
|
|
movdqa %xmm0, 32(%ecx)
|
|
movdqa %xmm0, 48(%ecx)
|
|
lea 64(%ecx), %ecx
|
|
sub $64, %ebx
|
|
jae L(StrncpyFillLoopMovdqa)
|
|
|
|
L(StrncpyFillLess64):
|
|
add $32, %ebx
|
|
jl L(StrncpyFillLess32)
|
|
movdqa %xmm0, (%ecx)
|
|
movdqa %xmm0, 16(%ecx)
|
|
lea 32(%ecx), %ecx
|
|
sub $16, %ebx
|
|
jl L(StrncpyFillExit1)
|
|
movdqa %xmm0, (%ecx)
|
|
lea 16(%ecx), %ecx
|
|
jmp L(FillFrom1To16Bytes)
|
|
|
|
L(StrncpyFillLess32):
|
|
add $16, %ebx
|
|
jl L(StrncpyFillExit1)
|
|
movdqa %xmm0, (%ecx)
|
|
lea 16(%ecx), %ecx
|
|
jmp L(FillFrom1To16Bytes)
|
|
# endif
|
|
|
|
.p2align 4
|
|
L(ExitTail1):
|
|
movb (%ecx), %al
|
|
movb %al, (%edx)
|
|
SAVE_RESULT_TAIL (0)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $1, %ebx
|
|
lea 1(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(ExitTail2):
|
|
movw (%ecx), %ax
|
|
movw %ax, (%edx)
|
|
SAVE_RESULT_TAIL (1)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $2, %ebx
|
|
lea 2(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(ExitTail3):
|
|
movw (%ecx), %ax
|
|
movw %ax, (%edx)
|
|
movb 2(%ecx), %al
|
|
movb %al, 2(%edx)
|
|
SAVE_RESULT_TAIL (2)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $3, %ebx
|
|
lea 3(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(ExitTail4):
|
|
movl (%ecx), %eax
|
|
movl %eax, (%edx)
|
|
SAVE_RESULT_TAIL (3)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $4, %ebx
|
|
lea 4(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(ExitTail5):
|
|
movl (%ecx), %eax
|
|
movl %eax, (%edx)
|
|
movb 4(%ecx), %al
|
|
movb %al, 4(%edx)
|
|
SAVE_RESULT_TAIL (4)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $5, %ebx
|
|
lea 5(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(ExitTail6):
|
|
movl (%ecx), %eax
|
|
movl %eax, (%edx)
|
|
movw 4(%ecx), %ax
|
|
movw %ax, 4(%edx)
|
|
SAVE_RESULT_TAIL (5)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $6, %ebx
|
|
lea 6(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(ExitTail7):
|
|
movl (%ecx), %eax
|
|
movl %eax, (%edx)
|
|
movl 3(%ecx), %eax
|
|
movl %eax, 3(%edx)
|
|
SAVE_RESULT_TAIL (6)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $7, %ebx
|
|
lea 7(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(ExitTail8):
|
|
movlpd (%ecx), %xmm0
|
|
movlpd %xmm0, (%edx)
|
|
SAVE_RESULT_TAIL (7)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $8, %ebx
|
|
lea 8(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero)
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(ExitTail9):
|
|
movlpd (%ecx), %xmm0
|
|
movb 8(%ecx), %al
|
|
movlpd %xmm0, (%edx)
|
|
movb %al, 8(%edx)
|
|
SAVE_RESULT_TAIL (8)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $9, %ebx
|
|
lea 9(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(ExitTail10):
|
|
movlpd (%ecx), %xmm0
|
|
movw 8(%ecx), %ax
|
|
movlpd %xmm0, (%edx)
|
|
movw %ax, 8(%edx)
|
|
SAVE_RESULT_TAIL (9)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $10, %ebx
|
|
lea 10(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(ExitTail11):
|
|
movlpd (%ecx), %xmm0
|
|
movl 7(%ecx), %eax
|
|
movlpd %xmm0, (%edx)
|
|
movl %eax, 7(%edx)
|
|
SAVE_RESULT_TAIL (10)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $11, %ebx
|
|
lea 11(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(ExitTail12):
|
|
movlpd (%ecx), %xmm0
|
|
movl 8(%ecx), %eax
|
|
movlpd %xmm0, (%edx)
|
|
movl %eax, 8(%edx)
|
|
SAVE_RESULT_TAIL (11)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $12, %ebx
|
|
lea 12(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(ExitTail13):
|
|
movlpd (%ecx), %xmm0
|
|
movlpd 5(%ecx), %xmm1
|
|
movlpd %xmm0, (%edx)
|
|
movlpd %xmm1, 5(%edx)
|
|
SAVE_RESULT_TAIL (12)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $13, %ebx
|
|
lea 13(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(ExitTail14):
|
|
movlpd (%ecx), %xmm0
|
|
movlpd 6(%ecx), %xmm1
|
|
movlpd %xmm0, (%edx)
|
|
movlpd %xmm1, 6(%edx)
|
|
SAVE_RESULT_TAIL (13)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $14, %ebx
|
|
lea 14(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(ExitTail15):
|
|
movlpd (%ecx), %xmm0
|
|
movlpd 7(%ecx), %xmm1
|
|
movlpd %xmm0, (%edx)
|
|
movlpd %xmm1, 7(%edx)
|
|
SAVE_RESULT_TAIL (14)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $15, %ebx
|
|
lea 15(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero)
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(ExitTail16):
|
|
movdqu (%ecx), %xmm0
|
|
movdqu %xmm0, (%edx)
|
|
SAVE_RESULT_TAIL (15)
|
|
# ifdef USE_AS_STRNCPY
|
|
sub $16, %ebx
|
|
lea 16(%edx), %ecx
|
|
jnz L(StrncpyFillTailWithZero)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
# endif
|
|
RETURN
|
|
# endif
|
|
|
|
# ifdef USE_AS_STRNCPY
|
|
# ifndef USE_AS_STRCAT
|
|
CFI_PUSH (%esi)
|
|
CFI_PUSH (%edi)
|
|
# endif
|
|
.p2align 4
|
|
L(StrncpyLeaveCase2OrCase3):
|
|
test %eax, %eax
|
|
jnz L(Aligned64LeaveCase2)
|
|
|
|
L(Aligned64LeaveCase3):
|
|
add $48, %ebx
|
|
jle L(CopyFrom1To16BytesCase3)
|
|
movaps %xmm4, -64(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(CopyFrom1To16BytesCase3)
|
|
movaps %xmm5, -48(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(CopyFrom1To16BytesCase3)
|
|
movaps %xmm6, -32(%edx)
|
|
lea 16(%esi), %esi
|
|
lea -16(%ebx), %ebx
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
L(Aligned64LeaveCase2):
|
|
pcmpeqb %xmm4, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
add $48, %ebx
|
|
jle L(CopyFrom1To16BytesCase2OrCase3)
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16Bytes)
|
|
|
|
pcmpeqb %xmm5, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
movaps %xmm4, -64(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(CopyFrom1To16BytesCase2OrCase3)
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16Bytes)
|
|
|
|
pcmpeqb %xmm6, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
movaps %xmm5, -48(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(CopyFrom1To16BytesCase2OrCase3)
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16Bytes)
|
|
|
|
pcmpeqb %xmm7, %xmm0
|
|
pmovmskb %xmm0, %eax
|
|
movaps %xmm6, -32(%edx)
|
|
lea 16(%esi), %esi
|
|
lea -16(%ebx), %ebx
|
|
jmp L(CopyFrom1To16BytesCase2)
|
|
|
|
/*--------------------------------------------------*/
|
|
.p2align 4
|
|
L(StrncpyExit1Case2OrCase3):
|
|
movlpd (%ecx), %xmm0
|
|
movlpd 7(%ecx), %xmm1
|
|
movlpd %xmm0, (%edx)
|
|
movlpd %xmm1, 7(%edx)
|
|
mov $15, %esi
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16BytesCase2)
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
.p2align 4
|
|
L(StrncpyExit2Case2OrCase3):
|
|
movlpd (%ecx), %xmm0
|
|
movlpd 6(%ecx), %xmm1
|
|
movlpd %xmm0, (%edx)
|
|
movlpd %xmm1, 6(%edx)
|
|
mov $14, %esi
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16BytesCase2)
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
.p2align 4
|
|
L(StrncpyExit3Case2OrCase3):
|
|
movlpd (%ecx), %xmm0
|
|
movlpd 5(%ecx), %xmm1
|
|
movlpd %xmm0, (%edx)
|
|
movlpd %xmm1, 5(%edx)
|
|
mov $13, %esi
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16BytesCase2)
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
.p2align 4
|
|
L(StrncpyExit4Case2OrCase3):
|
|
movlpd (%ecx), %xmm0
|
|
movl 8(%ecx), %esi
|
|
movlpd %xmm0, (%edx)
|
|
movl %esi, 8(%edx)
|
|
mov $12, %esi
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16BytesCase2)
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
.p2align 4
|
|
L(StrncpyExit5Case2OrCase3):
|
|
movlpd (%ecx), %xmm0
|
|
movl 7(%ecx), %esi
|
|
movlpd %xmm0, (%edx)
|
|
movl %esi, 7(%edx)
|
|
mov $11, %esi
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16BytesCase2)
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
.p2align 4
|
|
L(StrncpyExit6Case2OrCase3):
|
|
movlpd (%ecx), %xmm0
|
|
movl 6(%ecx), %esi
|
|
movlpd %xmm0, (%edx)
|
|
movl %esi, 6(%edx)
|
|
mov $10, %esi
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16BytesCase2)
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
.p2align 4
|
|
L(StrncpyExit7Case2OrCase3):
|
|
movlpd (%ecx), %xmm0
|
|
movl 5(%ecx), %esi
|
|
movlpd %xmm0, (%edx)
|
|
movl %esi, 5(%edx)
|
|
mov $9, %esi
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16BytesCase2)
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
.p2align 4
|
|
L(StrncpyExit8Case2OrCase3):
|
|
movlpd (%ecx), %xmm0
|
|
movlpd %xmm0, (%edx)
|
|
mov $8, %esi
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16BytesCase2)
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
.p2align 4
|
|
L(StrncpyExit9Case2OrCase3):
|
|
movlpd (%ecx), %xmm0
|
|
movlpd %xmm0, (%edx)
|
|
mov $7, %esi
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16BytesCase2)
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
.p2align 4
|
|
L(StrncpyExit10Case2OrCase3):
|
|
movlpd -1(%ecx), %xmm0
|
|
movlpd %xmm0, -1(%edx)
|
|
mov $6, %esi
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16BytesCase2)
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
.p2align 4
|
|
L(StrncpyExit11Case2OrCase3):
|
|
movlpd -2(%ecx), %xmm0
|
|
movlpd %xmm0, -2(%edx)
|
|
mov $5, %esi
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16BytesCase2)
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
.p2align 4
|
|
L(StrncpyExit12Case2OrCase3):
|
|
movl (%ecx), %esi
|
|
movl %esi, (%edx)
|
|
mov $4, %esi
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16BytesCase2)
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
.p2align 4
|
|
L(StrncpyExit13Case2OrCase3):
|
|
movl -1(%ecx), %esi
|
|
movl %esi, -1(%edx)
|
|
mov $3, %esi
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16BytesCase2)
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
.p2align 4
|
|
L(StrncpyExit14Case2OrCase3):
|
|
movl -2(%ecx), %esi
|
|
movl %esi, -2(%edx)
|
|
mov $2, %esi
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16BytesCase2)
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
.p2align 4
|
|
L(StrncpyExit15Case2OrCase3):
|
|
movl -3(%ecx), %esi
|
|
movl %esi, -3(%edx)
|
|
mov $1, %esi
|
|
test %eax, %eax
|
|
jnz L(CopyFrom1To16BytesCase2)
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
L(StrncpyLeave1):
|
|
movaps %xmm2, %xmm3
|
|
add $48, %ebx
|
|
jle L(StrncpyExit1)
|
|
palignr $1, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 31(%ecx), %xmm2
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit1)
|
|
palignr $1, %xmm3, %xmm2
|
|
movaps %xmm2, 16(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit1)
|
|
movaps %xmm4, 32(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit1)
|
|
movaps %xmm5, 48(%edx)
|
|
lea 16(%esi), %esi
|
|
lea -16(%ebx), %ebx
|
|
L(StrncpyExit1):
|
|
lea 15(%edx, %esi), %edx
|
|
lea 15(%ecx, %esi), %ecx
|
|
movdqu -16(%ecx), %xmm0
|
|
xor %esi, %esi
|
|
movdqu %xmm0, -16(%edx)
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
L(StrncpyLeave2):
|
|
movaps %xmm2, %xmm3
|
|
add $48, %ebx
|
|
jle L(StrncpyExit2)
|
|
palignr $2, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 30(%ecx), %xmm2
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit2)
|
|
palignr $2, %xmm3, %xmm2
|
|
movaps %xmm2, 16(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit2)
|
|
movaps %xmm4, 32(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit2)
|
|
movaps %xmm5, 48(%edx)
|
|
lea 16(%esi), %esi
|
|
lea -16(%ebx), %ebx
|
|
L(StrncpyExit2):
|
|
lea 14(%edx, %esi), %edx
|
|
lea 14(%ecx, %esi), %ecx
|
|
movdqu -16(%ecx), %xmm0
|
|
xor %esi, %esi
|
|
movdqu %xmm0, -16(%edx)
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
L(StrncpyLeave3):
|
|
movaps %xmm2, %xmm3
|
|
add $48, %ebx
|
|
jle L(StrncpyExit3)
|
|
palignr $3, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 29(%ecx), %xmm2
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit3)
|
|
palignr $3, %xmm3, %xmm2
|
|
movaps %xmm2, 16(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit3)
|
|
movaps %xmm4, 32(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit3)
|
|
movaps %xmm5, 48(%edx)
|
|
lea 16(%esi), %esi
|
|
lea -16(%ebx), %ebx
|
|
L(StrncpyExit3):
|
|
lea 13(%edx, %esi), %edx
|
|
lea 13(%ecx, %esi), %ecx
|
|
movdqu -16(%ecx), %xmm0
|
|
xor %esi, %esi
|
|
movdqu %xmm0, -16(%edx)
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
L(StrncpyLeave4):
|
|
movaps %xmm2, %xmm3
|
|
add $48, %ebx
|
|
jle L(StrncpyExit4)
|
|
palignr $4, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 28(%ecx), %xmm2
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit4)
|
|
palignr $4, %xmm3, %xmm2
|
|
movaps %xmm2, 16(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit4)
|
|
movaps %xmm4, 32(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit4)
|
|
movaps %xmm5, 48(%edx)
|
|
lea 16(%esi), %esi
|
|
lea -16(%ebx), %ebx
|
|
L(StrncpyExit4):
|
|
lea 12(%edx, %esi), %edx
|
|
lea 12(%ecx, %esi), %ecx
|
|
movlpd -12(%ecx), %xmm0
|
|
movl -4(%ecx), %eax
|
|
movlpd %xmm0, -12(%edx)
|
|
movl %eax, -4(%edx)
|
|
xor %esi, %esi
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
L(StrncpyLeave5):
|
|
movaps %xmm2, %xmm3
|
|
add $48, %ebx
|
|
jle L(StrncpyExit5)
|
|
palignr $5, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 27(%ecx), %xmm2
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit5)
|
|
palignr $5, %xmm3, %xmm2
|
|
movaps %xmm2, 16(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit5)
|
|
movaps %xmm4, 32(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit5)
|
|
movaps %xmm5, 48(%edx)
|
|
lea 16(%esi), %esi
|
|
lea -16(%ebx), %ebx
|
|
L(StrncpyExit5):
|
|
lea 11(%edx, %esi), %edx
|
|
lea 11(%ecx, %esi), %ecx
|
|
movlpd -11(%ecx), %xmm0
|
|
movl -4(%ecx), %eax
|
|
movlpd %xmm0, -11(%edx)
|
|
movl %eax, -4(%edx)
|
|
xor %esi, %esi
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
L(StrncpyLeave6):
|
|
movaps %xmm2, %xmm3
|
|
add $48, %ebx
|
|
jle L(StrncpyExit6)
|
|
palignr $6, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 26(%ecx), %xmm2
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit6)
|
|
palignr $6, %xmm3, %xmm2
|
|
movaps %xmm2, 16(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit6)
|
|
movaps %xmm4, 32(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit6)
|
|
movaps %xmm5, 48(%edx)
|
|
lea 16(%esi), %esi
|
|
lea -16(%ebx), %ebx
|
|
L(StrncpyExit6):
|
|
lea 10(%edx, %esi), %edx
|
|
lea 10(%ecx, %esi), %ecx
|
|
|
|
movlpd -10(%ecx), %xmm0
|
|
movw -2(%ecx), %ax
|
|
movlpd %xmm0, -10(%edx)
|
|
movw %ax, -2(%edx)
|
|
xor %esi, %esi
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
L(StrncpyLeave7):
|
|
movaps %xmm2, %xmm3
|
|
add $48, %ebx
|
|
jle L(StrncpyExit7)
|
|
palignr $7, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 25(%ecx), %xmm2
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit7)
|
|
palignr $7, %xmm3, %xmm2
|
|
movaps %xmm2, 16(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit7)
|
|
movaps %xmm4, 32(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit7)
|
|
movaps %xmm5, 48(%edx)
|
|
lea 16(%esi), %esi
|
|
lea -16(%ebx), %ebx
|
|
L(StrncpyExit7):
|
|
lea 9(%edx, %esi), %edx
|
|
lea 9(%ecx, %esi), %ecx
|
|
|
|
movlpd -9(%ecx), %xmm0
|
|
movb -1(%ecx), %ah
|
|
movlpd %xmm0, -9(%edx)
|
|
movb %ah, -1(%edx)
|
|
xor %esi, %esi
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
L(StrncpyLeave8):
|
|
movaps %xmm2, %xmm3
|
|
add $48, %ebx
|
|
jle L(StrncpyExit8)
|
|
palignr $8, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 24(%ecx), %xmm2
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit8)
|
|
palignr $8, %xmm3, %xmm2
|
|
movaps %xmm2, 16(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit8)
|
|
movaps %xmm4, 32(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit8)
|
|
movaps %xmm5, 48(%edx)
|
|
lea 16(%esi), %esi
|
|
lea -16(%ebx), %ebx
|
|
L(StrncpyExit8):
|
|
lea 8(%edx, %esi), %edx
|
|
lea 8(%ecx, %esi), %ecx
|
|
movlpd -8(%ecx), %xmm0
|
|
movlpd %xmm0, -8(%edx)
|
|
xor %esi, %esi
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
L(StrncpyLeave9):
|
|
movaps %xmm2, %xmm3
|
|
add $48, %ebx
|
|
jle L(StrncpyExit9)
|
|
palignr $9, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 23(%ecx), %xmm2
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit9)
|
|
palignr $9, %xmm3, %xmm2
|
|
movaps %xmm2, 16(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit9)
|
|
movaps %xmm4, 32(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit9)
|
|
movaps %xmm5, 48(%edx)
|
|
lea 16(%esi), %esi
|
|
lea -16(%ebx), %ebx
|
|
L(StrncpyExit9):
|
|
lea 7(%edx, %esi), %edx
|
|
lea 7(%ecx, %esi), %ecx
|
|
|
|
movlpd -8(%ecx), %xmm0
|
|
movlpd %xmm0, -8(%edx)
|
|
xor %esi, %esi
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
L(StrncpyLeave10):
|
|
movaps %xmm2, %xmm3
|
|
add $48, %ebx
|
|
jle L(StrncpyExit10)
|
|
palignr $10, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 22(%ecx), %xmm2
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit10)
|
|
palignr $10, %xmm3, %xmm2
|
|
movaps %xmm2, 16(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit10)
|
|
movaps %xmm4, 32(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit10)
|
|
movaps %xmm5, 48(%edx)
|
|
lea 16(%esi), %esi
|
|
lea -16(%ebx), %ebx
|
|
L(StrncpyExit10):
|
|
lea 6(%edx, %esi), %edx
|
|
lea 6(%ecx, %esi), %ecx
|
|
|
|
movlpd -8(%ecx), %xmm0
|
|
movlpd %xmm0, -8(%edx)
|
|
xor %esi, %esi
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
L(StrncpyLeave11):
|
|
movaps %xmm2, %xmm3
|
|
add $48, %ebx
|
|
jle L(StrncpyExit11)
|
|
palignr $11, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 21(%ecx), %xmm2
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit11)
|
|
palignr $11, %xmm3, %xmm2
|
|
movaps %xmm2, 16(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit11)
|
|
movaps %xmm4, 32(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit11)
|
|
movaps %xmm5, 48(%edx)
|
|
lea 16(%esi), %esi
|
|
lea -16(%ebx), %ebx
|
|
L(StrncpyExit11):
|
|
lea 5(%edx, %esi), %edx
|
|
lea 5(%ecx, %esi), %ecx
|
|
movl -5(%ecx), %esi
|
|
movb -1(%ecx), %ah
|
|
movl %esi, -5(%edx)
|
|
movb %ah, -1(%edx)
|
|
xor %esi, %esi
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
L(StrncpyLeave12):
|
|
movaps %xmm2, %xmm3
|
|
add $48, %ebx
|
|
jle L(StrncpyExit12)
|
|
palignr $12, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 20(%ecx), %xmm2
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit12)
|
|
palignr $12, %xmm3, %xmm2
|
|
movaps %xmm2, 16(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit12)
|
|
movaps %xmm4, 32(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit12)
|
|
movaps %xmm5, 48(%edx)
|
|
lea 16(%esi), %esi
|
|
lea -16(%ebx), %ebx
|
|
L(StrncpyExit12):
|
|
lea 4(%edx, %esi), %edx
|
|
lea 4(%ecx, %esi), %ecx
|
|
movl -4(%ecx), %eax
|
|
movl %eax, -4(%edx)
|
|
xor %esi, %esi
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
L(StrncpyLeave13):
|
|
movaps %xmm2, %xmm3
|
|
add $48, %ebx
|
|
jle L(StrncpyExit13)
|
|
palignr $13, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 19(%ecx), %xmm2
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit13)
|
|
palignr $13, %xmm3, %xmm2
|
|
movaps %xmm2, 16(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit13)
|
|
movaps %xmm4, 32(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit13)
|
|
movaps %xmm5, 48(%edx)
|
|
lea 16(%esi), %esi
|
|
lea -16(%ebx), %ebx
|
|
L(StrncpyExit13):
|
|
lea 3(%edx, %esi), %edx
|
|
lea 3(%ecx, %esi), %ecx
|
|
|
|
movl -4(%ecx), %eax
|
|
movl %eax, -4(%edx)
|
|
xor %esi, %esi
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
L(StrncpyLeave14):
|
|
movaps %xmm2, %xmm3
|
|
add $48, %ebx
|
|
jle L(StrncpyExit14)
|
|
palignr $14, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 18(%ecx), %xmm2
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit14)
|
|
palignr $14, %xmm3, %xmm2
|
|
movaps %xmm2, 16(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit14)
|
|
movaps %xmm4, 32(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit14)
|
|
movaps %xmm5, 48(%edx)
|
|
lea 16(%esi), %esi
|
|
lea -16(%ebx), %ebx
|
|
L(StrncpyExit14):
|
|
lea 2(%edx, %esi), %edx
|
|
lea 2(%ecx, %esi), %ecx
|
|
movw -2(%ecx), %ax
|
|
movw %ax, -2(%edx)
|
|
xor %esi, %esi
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
|
|
L(StrncpyLeave15):
|
|
movaps %xmm2, %xmm3
|
|
add $48, %ebx
|
|
jle L(StrncpyExit15)
|
|
palignr $15, %xmm1, %xmm2
|
|
movaps %xmm2, (%edx)
|
|
movaps 17(%ecx), %xmm2
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit15)
|
|
palignr $15, %xmm3, %xmm2
|
|
movaps %xmm2, 16(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit15)
|
|
movaps %xmm4, 32(%edx)
|
|
lea 16(%esi), %esi
|
|
sub $16, %ebx
|
|
jbe L(StrncpyExit15)
|
|
movaps %xmm5, 48(%edx)
|
|
lea 16(%esi), %esi
|
|
lea -16(%ebx), %ebx
|
|
L(StrncpyExit15):
|
|
lea 1(%edx, %esi), %edx
|
|
lea 1(%ecx, %esi), %ecx
|
|
movb -1(%ecx), %ah
|
|
movb %ah, -1(%edx)
|
|
xor %esi, %esi
|
|
jmp L(CopyFrom1To16BytesCase3)
|
|
# endif
|
|
|
|
# ifndef USE_AS_STRCAT
|
|
# ifdef USE_AS_STRNCPY
|
|
CFI_POP (%esi)
|
|
CFI_POP (%edi)
|
|
|
|
.p2align 4
|
|
L(ExitTail0):
|
|
movl %edx, %eax
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit15Bytes):
|
|
cmp $12, %ebx
|
|
jbe L(StrncpyExit12Bytes)
|
|
cmpb $0, 8(%ecx)
|
|
jz L(ExitTail9)
|
|
cmpb $0, 9(%ecx)
|
|
jz L(ExitTail10)
|
|
cmpb $0, 10(%ecx)
|
|
jz L(ExitTail11)
|
|
cmpb $0, 11(%ecx)
|
|
jz L(ExitTail12)
|
|
cmp $13, %ebx
|
|
je L(ExitTail13)
|
|
cmpb $0, 12(%ecx)
|
|
jz L(ExitTail13)
|
|
cmp $14, %ebx
|
|
je L(ExitTail14)
|
|
cmpb $0, 13(%ecx)
|
|
jz L(ExitTail14)
|
|
movlpd (%ecx), %xmm0
|
|
movlpd 7(%ecx), %xmm1
|
|
movlpd %xmm0, (%edx)
|
|
movlpd %xmm1, 7(%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 14(%edx), %eax
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# else
|
|
movl %edx, %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit12Bytes):
|
|
cmp $9, %ebx
|
|
je L(ExitTail9)
|
|
cmpb $0, 8(%ecx)
|
|
jz L(ExitTail9)
|
|
cmp $10, %ebx
|
|
je L(ExitTail10)
|
|
cmpb $0, 9(%ecx)
|
|
jz L(ExitTail10)
|
|
cmp $11, %ebx
|
|
je L(ExitTail11)
|
|
cmpb $0, 10(%ecx)
|
|
jz L(ExitTail11)
|
|
movlpd (%ecx), %xmm0
|
|
movl 8(%ecx), %eax
|
|
movlpd %xmm0, (%edx)
|
|
movl %eax, 8(%edx)
|
|
SAVE_RESULT_TAIL (11)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit8Bytes):
|
|
cmp $4, %ebx
|
|
jbe L(StrncpyExit4Bytes)
|
|
cmpb $0, (%ecx)
|
|
jz L(ExitTail1)
|
|
cmpb $0, 1(%ecx)
|
|
jz L(ExitTail2)
|
|
cmpb $0, 2(%ecx)
|
|
jz L(ExitTail3)
|
|
cmpb $0, 3(%ecx)
|
|
jz L(ExitTail4)
|
|
|
|
cmp $5, %ebx
|
|
je L(ExitTail5)
|
|
cmpb $0, 4(%ecx)
|
|
jz L(ExitTail5)
|
|
cmp $6, %ebx
|
|
je L(ExitTail6)
|
|
cmpb $0, 5(%ecx)
|
|
jz L(ExitTail6)
|
|
cmp $7, %ebx
|
|
je L(ExitTail7)
|
|
cmpb $0, 6(%ecx)
|
|
jz L(ExitTail7)
|
|
movlpd (%ecx), %xmm0
|
|
movlpd %xmm0, (%edx)
|
|
# ifdef USE_AS_STPCPY
|
|
lea 7(%edx), %eax
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# else
|
|
movl %edx, %eax
|
|
# endif
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(StrncpyExit4Bytes):
|
|
test %ebx, %ebx
|
|
jz L(ExitTail0)
|
|
cmp $1, %ebx
|
|
je L(ExitTail1)
|
|
cmpb $0, (%ecx)
|
|
jz L(ExitTail1)
|
|
cmp $2, %ebx
|
|
je L(ExitTail2)
|
|
cmpb $0, 1(%ecx)
|
|
jz L(ExitTail2)
|
|
cmp $3, %ebx
|
|
je L(ExitTail3)
|
|
cmpb $0, 2(%ecx)
|
|
jz L(ExitTail3)
|
|
movl (%ecx), %eax
|
|
movl %eax, (%edx)
|
|
SAVE_RESULT_TAIL (3)
|
|
# ifdef USE_AS_STPCPY
|
|
cmpb $1, (%eax)
|
|
sbb $-1, %eax
|
|
# endif
|
|
RETURN
|
|
# endif
|
|
|
|
END (STRCPY)
|
|
# endif
|
|
#endif
|