powerpc: Use default strcpy optimization for POWER7
This patches uses the default strcpy/stpcpy implementation for POWER7/PPC64. This is faster in mostly inputs for benchtests and for multiarch the implementation uses the POWER7 strlen and memcpy. * string/stpcpy.c (__stpcpy): Use STPCPY to redefine symbol name and cleanup macro usage. * string/strcpy.c (strcpt): Use STRCPY to redefine symbol name. * sysdeps/powerpc/powerpc64/multiarch/stpcpy-power7.S: Remove file. * sysdeps/powerpc/powerpc64/multiarch/stpcpy-ppc64.S: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strcpy-power7.S: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strcpy-ppc64.S: Likewise. * sysdeps/powerpc/powerpc64/power7/stpcpy.S: Likewise. * sysdeps/powerpc/powerpc64/power7/strcpy.S: Likewise. * sysdeps/powerpc/powerpc64/power7/strcpy.c: Likewise. * sysdeps/powerpc/powerpc64/stpcpy.S: Likewise. * sysdeps/powerpc/powerpc64/strcpy.S: Likewise. * sysdeps/powerpc/powerpc64/multiarch/stpcpy.c [SHARED && IS_IN (libc)]: Include <string/strcpy.c>. * sysdeps/powerpc/powerpc64/multiarch/stpcpy.c [SHARED && IS_IN (libc)]: Include <string/stpcpy.c>. * sysdeps/powerpc/powerpc64/multiarch/stpcpy-power7.c: New file. * sysdeps/powerpc/powerpc64/multiarch/stpcpy-ppc64.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strcpy-power7.c: Likewise. * sysdeps/powerpc/powerpc64/multiarch/strcpy-ppc64.c: Likewise. * sysdeps/powerpc/powerpc64/power7/strcpy.c: Likewise.
This commit is contained in:
parent
14362ef154
commit
142e0a9953
22
ChangeLog
22
ChangeLog
@ -1,5 +1,27 @@
|
||||
2015-08-10 Adhemerval Zanella <adhemerval.zanella@linaro.org>
|
||||
|
||||
* string/stpcpy.c (__stpcpy): Use STPCPY to redefine symbol name and
|
||||
cleanup macro usage.
|
||||
* string/strcpy.c (strcpt): Use STRCPY to redefine symbol name.
|
||||
* sysdeps/powerpc/powerpc64/multiarch/stpcpy-power7.S: Remove file.
|
||||
* sysdeps/powerpc/powerpc64/multiarch/stpcpy-ppc64.S: Likewise.
|
||||
* sysdeps/powerpc/powerpc64/multiarch/strcpy-power7.S: Likewise.
|
||||
* sysdeps/powerpc/powerpc64/multiarch/strcpy-ppc64.S: Likewise.
|
||||
* sysdeps/powerpc/powerpc64/power7/stpcpy.S: Likewise.
|
||||
* sysdeps/powerpc/powerpc64/power7/strcpy.S: Likewise.
|
||||
* sysdeps/powerpc/powerpc64/power7/strcpy.c: Likewise.
|
||||
* sysdeps/powerpc/powerpc64/stpcpy.S: Likewise.
|
||||
* sysdeps/powerpc/powerpc64/strcpy.S: Likewise.
|
||||
* sysdeps/powerpc/powerpc64/multiarch/stpcpy.c
|
||||
[SHARED && IS_IN (libc)]: Include <string/strcpy.c>.
|
||||
* sysdeps/powerpc/powerpc64/multiarch/stpcpy.c
|
||||
[SHARED && IS_IN (libc)]: Include <string/stpcpy.c>.
|
||||
* sysdeps/powerpc/powerpc64/multiarch/stpcpy-power7.c: New file.
|
||||
* sysdeps/powerpc/powerpc64/multiarch/stpcpy-ppc64.c: Likewise.
|
||||
* sysdeps/powerpc/powerpc64/multiarch/strcpy-power7.c: Likewise.
|
||||
* sysdeps/powerpc/powerpc64/multiarch/strcpy-ppc64.c: Likewise.
|
||||
* sysdeps/powerpc/powerpc64/power7/strcpy.c: Likewise.
|
||||
|
||||
* sysdeps/powerpc/powerpc64/power7/strnlen.S (__strnlen): Add
|
||||
libc_hidden_def.
|
||||
(strnlen): Remove libc_hidden_builtin_def and add libc_hidden_def.
|
||||
|
@ -25,25 +25,17 @@
|
||||
#undef __stpcpy
|
||||
#undef stpcpy
|
||||
|
||||
#ifndef weak_alias
|
||||
# define __stpcpy stpcpy
|
||||
#ifndef STPCPY
|
||||
# define STPCPY __stpcpy
|
||||
#endif
|
||||
|
||||
/* Copy SRC to DEST, returning the address of the terminating '\0' in DEST. */
|
||||
char *
|
||||
__stpcpy (dest, src)
|
||||
char *dest;
|
||||
const char *src;
|
||||
STPCPY (char *dest, const char *src)
|
||||
{
|
||||
size_t len = strlen (src);
|
||||
return memcpy (dest, src, len + 1) + len;
|
||||
}
|
||||
#ifdef libc_hidden_def
|
||||
libc_hidden_def (__stpcpy)
|
||||
#endif
|
||||
#ifdef weak_alias
|
||||
weak_alias (__stpcpy, stpcpy)
|
||||
#endif
|
||||
#ifdef libc_hidden_builtin_def
|
||||
libc_hidden_def (__stpcpy)
|
||||
libc_hidden_builtin_def (stpcpy)
|
||||
#endif
|
||||
|
@ -20,9 +20,13 @@
|
||||
|
||||
#undef strcpy
|
||||
|
||||
#ifndef STRCPY
|
||||
# define STRCPY strcpy
|
||||
#endif
|
||||
|
||||
/* Copy SRC to DEST. */
|
||||
char *
|
||||
strcpy (char *dest, const char *src)
|
||||
STRCPY (char *dest, const char *src)
|
||||
{
|
||||
return memcpy (dest, src, strlen (src) + 1);
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* Optimized stpcpy implementation for POWER7.
|
||||
Copyright (C) 2013-2015 Free Software Foundation, Inc.
|
||||
/* Multiarch stpcpy for POWER7/PPC64.
|
||||
Copyright (C) 2015 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
@ -16,25 +16,21 @@
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include <string.h>
|
||||
|
||||
#undef EALIGN
|
||||
#define EALIGN(name, alignt, words) \
|
||||
.section ".text"; \
|
||||
ENTRY_2(__stpcpy_power7) \
|
||||
.align ALIGNARG(alignt); \
|
||||
EALIGN_W_##words; \
|
||||
BODY_LABEL(__stpcpy_power7): \
|
||||
cfi_startproc; \
|
||||
LOCALENTRY(__stpcpy_power7)
|
||||
extern __typeof (memcpy) __memcpy_power7 attribute_hidden;
|
||||
extern __typeof (strlen) __strlen_power7 attribute_hidden;
|
||||
extern __typeof (stpcpy) __stpcpy_power7 attribute_hidden;
|
||||
|
||||
#undef END
|
||||
#define END(name) \
|
||||
cfi_endproc; \
|
||||
TRACEBACK(__stpcpy_power7) \
|
||||
END_2(__stpcpy_power7)
|
||||
#define STPCPY __stpcpy_power7
|
||||
#define memcpy __memcpy_power7
|
||||
#define strlen __strlen_power7
|
||||
|
||||
#undef libc_hidden_def
|
||||
#define libc_hidden_def(name)
|
||||
#undef weak_alias
|
||||
#define weak_alias(name, alias)
|
||||
#undef libc_hidden_builtin_def
|
||||
#define libc_hidden_builtin_def(name)
|
||||
|
||||
#include <sysdeps/powerpc/powerpc64/power7/stpcpy.S>
|
||||
#include <string/stpcpy.c>
|
@ -1,48 +0,0 @@
|
||||
/* Default stpcpy implementation for PowerPC64.
|
||||
Copyright (C) 2013-2015 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
#if defined SHARED && IS_IN (libc)
|
||||
# undef EALIGN
|
||||
# define EALIGN(name, alignt, words) \
|
||||
.section ".text"; \
|
||||
ENTRY_2(__stpcpy_ppc) \
|
||||
.align ALIGNARG(alignt); \
|
||||
EALIGN_W_##words; \
|
||||
BODY_LABEL(__stpcpy_ppc): \
|
||||
cfi_startproc; \
|
||||
LOCALENTRY(__stpcpy_ppc)
|
||||
|
||||
# undef END
|
||||
# define END(name) \
|
||||
cfi_endproc; \
|
||||
TRACEBACK(__stpcpy_ppc) \
|
||||
END_2(__stpcpy_ppc)
|
||||
|
||||
# undef weak_alias
|
||||
# define weak_alias(name, alias)
|
||||
# undef libc_hidden_def
|
||||
# define libc_hidden_def(name)
|
||||
|
||||
# undef libc_hidden_builtin_def
|
||||
# define libc_hidden_builtin_def(name) \
|
||||
.globl __GI___stpcpy; __GI___stpcpy = __stpcpy_ppc
|
||||
#endif
|
||||
|
||||
#include <sysdeps/powerpc/powerpc64/stpcpy.S>
|
@ -1,5 +1,5 @@
|
||||
/* Optimized strcpy implementation for POWER7.
|
||||
Copyright (C) 2013-2015 Free Software Foundation, Inc.
|
||||
/* Multiarch stpcpy for PPC64.
|
||||
Copyright (C) 2015 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
@ -16,25 +16,24 @@
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include <string.h>
|
||||
|
||||
#undef EALIGN
|
||||
#define EALIGN(name, alignt, words) \
|
||||
.section ".text"; \
|
||||
ENTRY_2(__strcpy_power7) \
|
||||
.align ALIGNARG(alignt); \
|
||||
EALIGN_W_##words; \
|
||||
BODY_LABEL(__strcpy_power7): \
|
||||
cfi_startproc; \
|
||||
LOCALENTRY(__strcpy_power7)
|
||||
extern __typeof (memcpy) __memcpy_ppc attribute_hidden;
|
||||
extern __typeof (strlen) __strlen_ppc attribute_hidden;
|
||||
extern __typeof (stpcpy) __stpcpy_ppc attribute_hidden;
|
||||
|
||||
#undef END
|
||||
#define END(name) \
|
||||
cfi_endproc; \
|
||||
TRACEBACK(__strcpy_power7) \
|
||||
END_2(__strcpy_power7)
|
||||
#define STPCPY __stpcpy_ppc
|
||||
#define memcpy __memcpy_ppc
|
||||
#define strlen __strlen_ppc
|
||||
|
||||
#undef weak_alias
|
||||
#define weak_alias(name, aliasname) \
|
||||
extern __typeof (__stpcpy_ppc) aliasname \
|
||||
__attribute__ ((weak, alias ("__stpcpy_ppc")));
|
||||
|
||||
#undef libc_hidden_def
|
||||
#define libc_hidden_def(name)
|
||||
#undef libc_hidden_builtin_def
|
||||
#define libc_hidden_builtin_def(name)
|
||||
|
||||
#include <sysdeps/powerpc/powerpc64/power7/strcpy.S>
|
||||
#include <string/stpcpy.c>
|
@ -32,4 +32,6 @@ libc_ifunc (__stpcpy,
|
||||
|
||||
weak_alias (__stpcpy, stpcpy)
|
||||
libc_hidden_def (stpcpy)
|
||||
#else
|
||||
# include <string/stpcpy.c>
|
||||
#endif
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* Optimized stpcpy implementation for PowerPC64/POWER7.
|
||||
Copyright (C) 2013-2015 Free Software Foundation, Inc.
|
||||
/* Multiarch strcpy for POWER7/PPC64.
|
||||
Copyright (C) 2015 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
@ -16,9 +16,17 @@
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#define USE_AS_STPCPY
|
||||
#include <sysdeps/powerpc/powerpc64/power7/strcpy.S>
|
||||
#include <string.h>
|
||||
|
||||
weak_alias (__stpcpy, stpcpy)
|
||||
libc_hidden_def (__stpcpy)
|
||||
libc_hidden_builtin_def (stpcpy)
|
||||
extern __typeof (memcpy) __memcpy_power7 attribute_hidden;
|
||||
extern __typeof (strlen) __strlen_power7 attribute_hidden;
|
||||
extern __typeof (strcpy) __strcpy_power7 attribute_hidden;
|
||||
|
||||
#define STRCPY __strcpy_power7
|
||||
#define memcpy __memcpy_power7
|
||||
#define strlen __strlen_power7
|
||||
|
||||
#undef libc_hidden_builtin_def
|
||||
#define libc_hidden_builtin_def(name)
|
||||
|
||||
#include <string/strcpy.c>
|
@ -1,43 +0,0 @@
|
||||
/* Default strcpy implementation for PowerPC64.
|
||||
Copyright (C) 2013-2015 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
#if defined SHARED && IS_IN (libc)
|
||||
# undef EALIGN
|
||||
# define EALIGN(name, alignt, words) \
|
||||
.section ".text"; \
|
||||
ENTRY_2(__strcpy_ppc) \
|
||||
.align ALIGNARG(alignt); \
|
||||
EALIGN_W_##words; \
|
||||
BODY_LABEL(__strcpy_ppc): \
|
||||
cfi_startproc; \
|
||||
LOCALENTRY(__strcpy_ppc)
|
||||
|
||||
# undef END
|
||||
# define END(name) \
|
||||
cfi_endproc; \
|
||||
TRACEBACK(__strcpy_ppc) \
|
||||
END_2(__strcpy_ppc)
|
||||
|
||||
# undef libc_hidden_builtin_def
|
||||
# define libc_hidden_builtin_def(name) \
|
||||
.globl __GI_strcpy; __GI_strcpy = __strcpy_ppc
|
||||
#endif
|
||||
|
||||
#include <sysdeps/powerpc/powerpc64/strcpy.S>
|
@ -1,5 +1,5 @@
|
||||
/* Optimized stpcpy implementation for PowerPC64.
|
||||
Copyright (C) 1997-2015 Free Software Foundation, Inc.
|
||||
/* Multiarch strcpy for PPC64.
|
||||
Copyright (C) 2015 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
@ -16,9 +16,20 @@
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#define USE_AS_STPCPY
|
||||
#include <sysdeps/powerpc/powerpc64/strcpy.S>
|
||||
#include <string.h>
|
||||
|
||||
weak_alias (__stpcpy, stpcpy)
|
||||
libc_hidden_def (__stpcpy)
|
||||
libc_hidden_builtin_def (stpcpy)
|
||||
#if defined SHARED && IS_IN (libc)
|
||||
extern __typeof (memcpy) __memcpy_ppc attribute_hidden;
|
||||
extern __typeof (strlen) __strlen_ppc attribute_hidden;
|
||||
extern __typeof (strcpy) __strcpy_ppc attribute_hidden;
|
||||
|
||||
# define STRCPY __strcpy_ppc
|
||||
# define memcpy __memcpy_ppc
|
||||
# define strlen __strlen_ppc
|
||||
|
||||
# undef libc_hidden_builtin_def
|
||||
# define libc_hidden_builtin_def(name) \
|
||||
__hidden_ver1 (__strcpy_ppc, __GI_strcpy, __strcpy_ppc);
|
||||
#endif
|
||||
|
||||
#include <string/strcpy.c>
|
@ -1,437 +0,0 @@
|
||||
/* Optimized strcpy/stpcpy implementation for PowerPC64/POWER7.
|
||||
Copyright (C) 2013-2015 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
/* Implements the function
|
||||
|
||||
char * [r3] strcpy (char *dest [r3], const char *src [r4])
|
||||
|
||||
or
|
||||
|
||||
char * [r3] strcpy (char *dest [r3], const char *src [r4])
|
||||
|
||||
if USE_AS_STPCPY is defined. It tries to use aligned memory accesses
|
||||
when possible using the following algorithm:
|
||||
|
||||
if (((((uintptr_t)dst & 0x7UL) == 0) && ((uintptr_t)src & 0x7UL) == 0))
|
||||
goto aligned_doubleword_copy;
|
||||
if (((uintptr_t)dst & 0x7UL) == ((uintptr_t)src & 0x7UL))
|
||||
goto same_alignment;
|
||||
goto unaligned;
|
||||
|
||||
The aligned comparison are made using cmpb instructions. */
|
||||
|
||||
#ifdef USE_AS_STPCPY
|
||||
# define FUNC_NAME __stpcpy
|
||||
#else
|
||||
# define FUNC_NAME strcpy
|
||||
#endif
|
||||
|
||||
.machine power7
|
||||
EALIGN (FUNC_NAME, 4, 0)
|
||||
CALL_MCOUNT 2
|
||||
|
||||
#define rTMP r0
|
||||
#ifdef USE_AS_STPCPY
|
||||
#define rRTN r3 /* pointer to previous word/doubleword in dest */
|
||||
#else
|
||||
#define rRTN r12 /* pointer to previous word/doubleword in dest */
|
||||
#endif
|
||||
#define rSRC r4 /* pointer to previous word/doubleword in src */
|
||||
#define rMASK r5 /* mask 0xffffffff | 0xffffffffffffffff */
|
||||
#define rWORD r6 /* current word from src */
|
||||
#define rALT r7 /* alternate word from src */
|
||||
#define rRTNAL r8 /* alignment of return pointer */
|
||||
#define rSRCAL r9 /* alignment of source pointer */
|
||||
#define rALCNT r10 /* bytes to read to reach 8 bytes alignment */
|
||||
#define rSUBAL r11 /* doubleword minus unaligned displacement */
|
||||
|
||||
#ifndef USE_AS_STPCPY
|
||||
/* Save the dst pointer to use as return value. */
|
||||
mr rRTN, r3
|
||||
#endif
|
||||
or rTMP, rSRC, rRTN
|
||||
clrldi. rTMP, rTMP, 61
|
||||
bne L(check_alignment)
|
||||
b L(aligned_doubleword_copy)
|
||||
|
||||
.align 4
|
||||
L(check_alignment):
|
||||
rldicl rRTNAL, rRTN, 0, 61
|
||||
rldicl rSRCAL, rSRC, 0, 61
|
||||
cmpld cr7, rSRCAL, rRTNAL
|
||||
beq cr7, L(same_alignment)
|
||||
b L(unaligned)
|
||||
|
||||
.align 4
|
||||
L(same_alignment):
|
||||
/* Src and dst with same alignment: align both to doubleword. */
|
||||
mr rALCNT, rRTN
|
||||
lbz rWORD, 0(rSRC)
|
||||
subfic rSUBAL, rRTNAL, 8
|
||||
addi rRTN, rRTN, 1
|
||||
addi rSRC, rSRC, 1
|
||||
cmpdi cr7, rWORD, 0
|
||||
stb rWORD, 0(rALCNT)
|
||||
beq cr7, L(s2)
|
||||
|
||||
add rALCNT, rALCNT, rSUBAL
|
||||
subf rALCNT, rRTN, rALCNT
|
||||
addi rALCNT, rALCNT, 1
|
||||
mtctr rALCNT
|
||||
b L(s1)
|
||||
|
||||
.align 4
|
||||
L(s0):
|
||||
addi rSRC, rSRC, 1
|
||||
lbz rWORD, -1(rSRC)
|
||||
cmpdi cr7, rWORD, 0
|
||||
stb rWORD, -1(rALCNT)
|
||||
beqlr cr7
|
||||
mr rRTN, rALCNT
|
||||
L(s1):
|
||||
addi rALCNT, rRTN,1
|
||||
bdnz L(s0)
|
||||
b L(aligned_doubleword_copy)
|
||||
.align 4
|
||||
L(s2):
|
||||
mr rRTN, rALCNT
|
||||
blr
|
||||
|
||||
/* For doubleword aligned memory, operate using doubleword load and stores. */
|
||||
.align 4
|
||||
L(aligned_doubleword_copy):
|
||||
li rMASK, 0
|
||||
addi rRTN, rRTN, -8
|
||||
ld rWORD, 0(rSRC)
|
||||
b L(g2)
|
||||
|
||||
.align 4
|
||||
L(g0): ldu rALT, 8(rSRC)
|
||||
stdu rWORD, 8(rRTN)
|
||||
cmpb rTMP, rALT, rMASK
|
||||
cmpdi rTMP, 0
|
||||
bne L(g1)
|
||||
ldu rWORD, 8(rSRC)
|
||||
stdu rALT, 8(rRTN)
|
||||
L(g2): cmpb rTMP, rWORD, rMASK
|
||||
cmpdi rTMP, 0 /* If rTMP is 0, no null's have been found. */
|
||||
beq L(g0)
|
||||
|
||||
mr rALT, rWORD
|
||||
/* We've hit the end of the string. Do the rest byte-by-byte. */
|
||||
L(g1):
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
extrdi. rTMP, rALT, 8, 56
|
||||
stbu rALT, 8(rRTN)
|
||||
beqlr-
|
||||
extrdi. rTMP, rALT, 8, 48
|
||||
stbu rTMP, 1(rRTN)
|
||||
beqlr-
|
||||
extrdi. rTMP, rALT, 8, 40
|
||||
stbu rTMP, 1(rRTN)
|
||||
beqlr-
|
||||
extrdi. rTMP, rALT, 8, 32
|
||||
stbu rTMP, 1(rRTN)
|
||||
beqlr-
|
||||
extrdi. rTMP, rALT, 8, 24
|
||||
stbu rTMP, 1(rRTN)
|
||||
beqlr-
|
||||
extrdi. rTMP, rALT, 8, 16
|
||||
stbu rTMP, 1(rRTN)
|
||||
beqlr-
|
||||
extrdi. rTMP, rALT, 8, 8
|
||||
stbu rTMP, 1(rRTN)
|
||||
beqlr-
|
||||
extrdi rTMP, rALT, 8, 0
|
||||
stbu rTMP, 1(rRTN)
|
||||
#else
|
||||
extrdi. rTMP, rALT, 8, 0
|
||||
stbu rTMP, 8(rRTN)
|
||||
beqlr
|
||||
extrdi. rTMP, rALT, 8, 8
|
||||
stbu rTMP, 1(rRTN)
|
||||
beqlr
|
||||
extrdi. rTMP, rALT, 8, 16
|
||||
stbu rTMP, 1(rRTN)
|
||||
beqlr
|
||||
extrdi. rTMP, rALT, 8, 24
|
||||
stbu rTMP, 1(rRTN)
|
||||
beqlr
|
||||
extrdi. rTMP, rALT, 8, 32
|
||||
stbu rTMP, 1(rRTN)
|
||||
beqlr
|
||||
extrdi. rTMP, rALT, 8, 40
|
||||
stbu rTMP, 1(rRTN)
|
||||
beqlr
|
||||
extrdi. rTMP, rALT, 8, 48
|
||||
stbu rTMP, 1(rRTN)
|
||||
beqlr
|
||||
stbu rALT, 1(rRTN)
|
||||
#endif
|
||||
blr
|
||||
|
||||
.align 4
|
||||
L(unaligned):
|
||||
cmpdi rSRCAL, 0 /* Check src alignment */
|
||||
beq L(srcaligndstunalign)
|
||||
/* src is unaligned */
|
||||
rlwinm r10, rSRC, 3,26,28 /* Calculate padding. */
|
||||
clrrdi rSRC, rSRC, 3 /* Align the addr to dw boundary */
|
||||
ld rWORD, 0(rSRC) /* Load doubleword from memory. */
|
||||
li rTMP, 0
|
||||
/* Discard bits not part of the string */
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
srd rALT, rWORD, r10
|
||||
#else
|
||||
sld rALT, rWORD, r10
|
||||
#endif
|
||||
cmpb rTMP, rALT, rTMP /* Compare each byte against null */
|
||||
/* Discard bits not part of the string */
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
sld rTMP, rTMP, r10
|
||||
#else
|
||||
srd rTMP, rTMP, r10
|
||||
#endif
|
||||
cmpdi rTMP, 0
|
||||
bne L(bytebybyte) /* if it has null, copy byte by byte */
|
||||
subfic r8, r9, 8
|
||||
rlwinm r5, rRTN, 3,26,28 /* Calculate padding in bits. */
|
||||
rldicl r9, rRTN, 0, 61 /* Calculate padding in bytes. */
|
||||
addi rRTN, rRTN, -1
|
||||
|
||||
cmpdi r5, 0 /* check dest alignment */
|
||||
beq L(srcunaligndstalign)
|
||||
|
||||
/* both src and dst unaligned */
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
sld rWORD, rALT, r10
|
||||
mr r11, r10
|
||||
addi r11, r11, -8 /* Adjust byte pointer on loaded dw */
|
||||
#else
|
||||
srd rWORD, rALT, r10
|
||||
subfic r11, r10, 64
|
||||
#endif
|
||||
/* dst alignment is greater then src alignment? */
|
||||
cmpd cr7, r5, r10
|
||||
blt cr7, L(dst_align_small)
|
||||
/* src alignment is less than dst */
|
||||
|
||||
/* Calculate the dst alignment differnce */
|
||||
subfic rALT, r9, 8
|
||||
mtctr rALT
|
||||
|
||||
/* Write till dst is aligned */
|
||||
cmpdi rTMP, rALT, 4
|
||||
blt L(storebyte1) /* less than 4, store byte by byte */
|
||||
beq L(equal1) /* if its 4, store word */
|
||||
addi rTMP, rALT, -4 /* greater than 4, so stb and stw */
|
||||
mtctr rTMP
|
||||
L(storebyte1):
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
addi r11, r11, 8 /* Adjust byte pointer on loaded dw */
|
||||
#else
|
||||
addi r11, r11, -8
|
||||
#endif
|
||||
srd rALT, rWORD, r11
|
||||
stbu rALT, 1(rRTN)
|
||||
bdnz L(storebyte1)
|
||||
|
||||
subfic rALT, r9, 8 /* Check the remaining bytes */
|
||||
cmpdi rTMP, rALT, 4
|
||||
blt L(proceed)
|
||||
|
||||
.align 4
|
||||
L(equal1):
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
addi r11, r11, 8 /* Adjust byte pointer on loaded dw */
|
||||
srd rALT, rWORD, r11
|
||||
#else
|
||||
subfic r11, r11, 64
|
||||
sld rALT, rWORD, r11
|
||||
srdi rALT, rALT, 32
|
||||
#endif
|
||||
stw rALT, 1(rRTN)
|
||||
addi rRTN, rRTN, 4
|
||||
|
||||
L(proceed):
|
||||
mr rALT, rWORD
|
||||
/* calculate the Left over bytes to be written */
|
||||
subfic r11, r10, 64
|
||||
subfic r5, r5, 64
|
||||
subf r5, r5, r11 /* remaining bytes on second dw */
|
||||
subfic r10, r5, 64 /* remaining bytes on first dw */
|
||||
subfic r9, r9, 8
|
||||
subf r8, r9, r8 /* recalculate padding */
|
||||
L(srcunaligndstalign):
|
||||
addi rRTN, rRTN, 1
|
||||
subfic r5, r10, 64 /* remaining bytes on second dw */
|
||||
addi rSRC, rSRC, 8
|
||||
li rTMP,0
|
||||
b L(storedouble)
|
||||
|
||||
.align 4
|
||||
L(dst_align_small):
|
||||
mtctr r8
|
||||
/* Write till src is aligned */
|
||||
L(storebyte2):
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
addi r11, r11, 8 /* Adjust byte pointer on dw */
|
||||
#else
|
||||
addi r11, r11, -8
|
||||
#endif
|
||||
srd rALT, rWORD, r11
|
||||
stbu rALT, 1(rRTN)
|
||||
bdnz L(storebyte2)
|
||||
|
||||
addi rSRC, rSRC, 8 /* Increment src pointer */
|
||||
addi rRTN, rRTN, 1 /* Increment dst pointer */
|
||||
rldicl r8, rRTN, 0, 61 /* Recalculate padding */
|
||||
|
||||
/* src is aligned */
|
||||
L(srcaligndstunalign):
|
||||
ld rWORD, 0(rSRC)
|
||||
mr rALT, rWORD
|
||||
li rTMP, 0 /* Check null */
|
||||
cmpb rTMP, rWORD, rTMP
|
||||
cmpdi rTMP, 0
|
||||
bne L(bytebybyte) /* Do byte by byte if there is NULL */
|
||||
rlwinm r5, rRTN, 3,26,28 /* Calculate padding */
|
||||
addi rRTN, rRTN, -1
|
||||
subfic r10, r8, 8
|
||||
/* write byte by byte till aligned */
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
li r11, -8
|
||||
#else
|
||||
li r11, 64
|
||||
#endif
|
||||
mtctr r10
|
||||
cmpdi rTMP, r10, 4
|
||||
blt L(storebyte)
|
||||
beq L(equal)
|
||||
addi rTMP, r10, -4
|
||||
mtctr rTMP
|
||||
L(storebyte):
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
addi r11, r11, 8 /* Adjust byte pointer on dw */
|
||||
#else
|
||||
addi r11, r11, -8
|
||||
#endif
|
||||
srd rALT, rWORD, r11
|
||||
stbu rALT, 1(rRTN)
|
||||
bdnz L(storebyte)
|
||||
|
||||
cmpdi rTMP, r10, 4
|
||||
blt L(align)
|
||||
|
||||
.align 4
|
||||
L(equal):
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
addi r11, r11, 8
|
||||
srd rALT, rWORD, r11
|
||||
#else
|
||||
subfic r11, r11, 64
|
||||
sld rALT, rWORD, r11
|
||||
srdi rALT, rALT, 32
|
||||
#endif
|
||||
stw rALT, 1(rRTN)
|
||||
addi rRTN, rRTN, 4
|
||||
L(align):
|
||||
addi rRTN, rRTN, 1
|
||||
addi rSRC, rSRC, 8 /* Increment src pointer */
|
||||
subfic r10, r5, 64
|
||||
li rTMP, 0
|
||||
/* dst addr aligned to 8 */
|
||||
L(storedouble):
|
||||
ld rALT, 0(rSRC) /* load next dw */
|
||||
cmpb rTMP, rALT, rTMP
|
||||
cmpdi rTMP, 0 /* check for null on each new dw */
|
||||
bne L(null)
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
srd r9, rWORD, r10 /* bytes from first dw */
|
||||
sld r11, rALT, r5 /* bytes from second dw */
|
||||
#else
|
||||
sld r9, rWORD, r10
|
||||
srd r11, rALT, r5
|
||||
#endif
|
||||
or r11, r9, r11 /* make as a single dw */
|
||||
std r11, 0(rRTN) /* store as std on aligned addr */
|
||||
mr rWORD, rALT /* still few bytes left to be written */
|
||||
addi rRTN, rRTN, 8 /* increment dst addr */
|
||||
addi rSRC, rSRC, 8 /* increment src addr */
|
||||
b L(storedouble) /* Loop till NULL */
|
||||
|
||||
.align 4
|
||||
|
||||
/* We've hit the end of the string. Do the rest byte-by-byte. */
|
||||
L(null):
|
||||
addi rRTN, rRTN, -1
|
||||
mr r10, r5
|
||||
mtctr r8
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
subfic r10, r10, 64
|
||||
addi r10, r10, -8
|
||||
#endif
|
||||
cmpdi rTMP, r8, 4
|
||||
blt L(loop)
|
||||
|
||||
/* we can still use stw if leftover >= 4*/
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
addi r10, r10, 8
|
||||
srd r11, rWORD, r10
|
||||
#else
|
||||
subfic r10, r10, 64
|
||||
sld r11, rWORD, r10
|
||||
srdi r11, r11, 32
|
||||
#endif
|
||||
stw r11, 1(rRTN)
|
||||
addi rRTN, rRTN, 4
|
||||
|
||||
beq L(bytebybyte1)
|
||||
addi r10, r10, 32
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
addi r10, r10, -8
|
||||
#else
|
||||
subfic r10, r10, 64
|
||||
#endif
|
||||
addi rTMP, r8, -4
|
||||
mtctr rTMP
|
||||
/* remaining byte by byte part of first dw */
|
||||
L(loop):
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
addi r10, r10, 8
|
||||
#else
|
||||
addi r10, r10, -8
|
||||
#endif
|
||||
srd rTMP, rWORD, r10
|
||||
stbu rTMP, 1(rRTN)
|
||||
bdnz L(loop)
|
||||
|
||||
L(bytebybyte1):
|
||||
addi rRTN, rRTN, 1
|
||||
/* remaining byte by byte part of second dw */
|
||||
L(bytebybyte):
|
||||
addi rRTN, rRTN, -8
|
||||
b L(g1)
|
||||
|
||||
END (FUNC_NAME)
|
||||
|
||||
#ifndef USE_AS_STPCPY
|
||||
libc_hidden_builtin_def (strcpy)
|
||||
#endif
|
@ -1,216 +0,0 @@
|
||||
/* Optimized strcpy implementation for PowerPC64.
|
||||
Copyright (C) 1997-2015 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
/* See strlen.s for comments on how the end-of-string testing works. */
|
||||
|
||||
/* char * [r3] strcpy (char *dest [r3], const char *src [r4]) */
|
||||
|
||||
#ifdef USE_AS_STPCPY
|
||||
# define FUNC_NAME __stpcpy
|
||||
#else
|
||||
# define FUNC_NAME strcpy
|
||||
#endif
|
||||
|
||||
EALIGN (FUNC_NAME, 4, 0)
|
||||
CALL_MCOUNT 2
|
||||
|
||||
#define rTMP r0
|
||||
#ifdef USE_AS_STPCPY
|
||||
#define rRTN r3 /* pointer to previous word/doubleword in dest */
|
||||
#else
|
||||
#define rRTN r12 /* pointer to previous word/doubleword in dest */
|
||||
#endif
|
||||
#define rSRC r4 /* pointer to previous word/doubleword in src */
|
||||
#define rWORD r6 /* current word from src */
|
||||
#define rFEFE r7 /* constant 0xfefefeff | 0xfefefefefefefeff */
|
||||
#define r7F7F r8 /* constant 0x7f7f7f7f | 0x7f7f7f7f7f7f7f7f */
|
||||
#define rNEG r9 /* ~(word in s1 | r7F7F) */
|
||||
#define rALT r10 /* alternate word from src */
|
||||
|
||||
#ifndef USE_AS_STPCPY
|
||||
/* Save the dst pointer to use as return value. */
|
||||
mr rRTN, r3
|
||||
#endif
|
||||
or rTMP, rSRC, rRTN
|
||||
clrldi. rTMP, rTMP, 61
|
||||
bne L(check_word_alignment)
|
||||
|
||||
/* For doubleword aligned memory, operate using doubleword load and stores. */
|
||||
addi rRTN, rRTN, -8
|
||||
|
||||
lis rFEFE, -0x101
|
||||
lis r7F7F, 0x7f7f
|
||||
ld rWORD, 0(rSRC)
|
||||
addi rFEFE, rFEFE, -0x101
|
||||
addi r7F7F, r7F7F, 0x7f7f
|
||||
sldi rTMP, rFEFE, 32
|
||||
insrdi r7F7F, r7F7F, 32, 0
|
||||
add rFEFE, rFEFE, rTMP
|
||||
b L(g2)
|
||||
|
||||
L(g0): ldu rALT, 8(rSRC)
|
||||
stdu rWORD, 8(rRTN)
|
||||
add rTMP, rFEFE, rALT
|
||||
nor rNEG, r7F7F, rALT
|
||||
and. rTMP, rTMP, rNEG
|
||||
bne- L(g1)
|
||||
ldu rWORD, 8(rSRC)
|
||||
stdu rALT, 8(rRTN)
|
||||
L(g2): add rTMP, rFEFE, rWORD
|
||||
nor rNEG, r7F7F, rWORD
|
||||
and. rTMP, rTMP, rNEG
|
||||
beq+ L(g0)
|
||||
|
||||
mr rALT, rWORD
|
||||
/* We've hit the end of the string. Do the rest byte-by-byte. */
|
||||
L(g1):
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
extrdi. rTMP, rALT, 8, 56
|
||||
stbu rALT, 8(rRTN)
|
||||
beqlr-
|
||||
extrdi. rTMP, rALT, 8, 48
|
||||
stbu rTMP, 1(rRTN)
|
||||
beqlr-
|
||||
extrdi. rTMP, rALT, 8, 40
|
||||
stbu rTMP, 1(rRTN)
|
||||
beqlr-
|
||||
extrdi. rTMP, rALT, 8, 32
|
||||
stbu rTMP, 1(rRTN)
|
||||
beqlr-
|
||||
extrdi. rTMP, rALT, 8, 24
|
||||
stbu rTMP, 1(rRTN)
|
||||
beqlr-
|
||||
extrdi. rTMP, rALT, 8, 16
|
||||
stbu rTMP, 1(rRTN)
|
||||
beqlr-
|
||||
extrdi. rTMP, rALT, 8, 8
|
||||
stbu rTMP, 1(rRTN)
|
||||
beqlr-
|
||||
extrdi rTMP, rALT, 8, 0
|
||||
stbu rTMP, 1(rRTN)
|
||||
#else
|
||||
extrdi. rTMP, rALT, 8, 0
|
||||
stbu rTMP, 8(rRTN)
|
||||
beqlr-
|
||||
extrdi. rTMP, rALT, 8, 8
|
||||
stbu rTMP, 1(rRTN)
|
||||
beqlr-
|
||||
extrdi. rTMP, rALT, 8, 16
|
||||
stbu rTMP, 1(rRTN)
|
||||
beqlr-
|
||||
extrdi. rTMP, rALT, 8, 24
|
||||
stbu rTMP, 1(rRTN)
|
||||
beqlr-
|
||||
extrdi. rTMP, rALT, 8, 32
|
||||
stbu rTMP, 1(rRTN)
|
||||
beqlr
|
||||
extrdi. rTMP, rALT, 8, 40
|
||||
stbu rTMP, 1(rRTN)
|
||||
beqlr-
|
||||
extrdi. rTMP, rALT, 8, 48
|
||||
stbu rTMP, 1(rRTN)
|
||||
beqlr-
|
||||
stbu rALT, 1(rRTN)
|
||||
#endif
|
||||
blr
|
||||
|
||||
L(check_word_alignment):
|
||||
clrldi. rTMP, rTMP, 62
|
||||
bne L(unaligned)
|
||||
|
||||
/* For word aligned memory, operate using word load and stores. */
|
||||
addi rRTN, rRTN, -4
|
||||
|
||||
lis rFEFE, -0x101
|
||||
lis r7F7F, 0x7f7f
|
||||
lwz rWORD, 0(rSRC)
|
||||
addi rFEFE, rFEFE, -0x101
|
||||
addi r7F7F, r7F7F, 0x7f7f
|
||||
b L(g5)
|
||||
|
||||
L(g3): lwzu rALT, 4(rSRC)
|
||||
stwu rWORD, 4(rRTN)
|
||||
add rTMP, rFEFE, rALT
|
||||
nor rNEG, r7F7F, rALT
|
||||
and. rTMP, rTMP, rNEG
|
||||
bne- L(g4)
|
||||
lwzu rWORD, 4(rSRC)
|
||||
stwu rALT, 4(rRTN)
|
||||
L(g5): add rTMP, rFEFE, rWORD
|
||||
nor rNEG, r7F7F, rWORD
|
||||
and. rTMP, rTMP, rNEG
|
||||
beq+ L(g3)
|
||||
|
||||
mr rALT, rWORD
|
||||
/* We've hit the end of the string. Do the rest byte-by-byte. */
|
||||
L(g4):
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
rlwinm. rTMP, rALT, 0, 24, 31
|
||||
stbu rALT, 4(rRTN)
|
||||
beqlr-
|
||||
rlwinm. rTMP, rALT, 24, 24, 31
|
||||
stbu rTMP, 1(rRTN)
|
||||
beqlr-
|
||||
rlwinm. rTMP, rALT, 16, 24, 31
|
||||
stbu rTMP, 1(rRTN)
|
||||
beqlr-
|
||||
rlwinm rTMP, rALT, 8, 24, 31
|
||||
stbu rTMP, 1(rRTN)
|
||||
#else
|
||||
rlwinm. rTMP, rALT, 8, 24, 31
|
||||
stbu rTMP, 4(rRTN)
|
||||
beqlr-
|
||||
rlwinm. rTMP, rALT, 16, 24, 31
|
||||
stbu rTMP, 1(rRTN)
|
||||
beqlr-
|
||||
rlwinm. rTMP, rALT, 24, 24, 31
|
||||
stbu rTMP, 1(rRTN)
|
||||
beqlr-
|
||||
stbu rALT, 1(rRTN)
|
||||
#endif
|
||||
blr
|
||||
|
||||
/* Oh well. In this case, we just do a byte-by-byte copy. */
|
||||
.align 4
|
||||
nop
|
||||
L(unaligned):
|
||||
lbz rWORD, 0(rSRC)
|
||||
addi rRTN, rRTN, -1
|
||||
cmpwi rWORD, 0
|
||||
beq- L(u2)
|
||||
|
||||
L(u0): lbzu rALT, 1(rSRC)
|
||||
stbu rWORD, 1(rRTN)
|
||||
cmpwi rALT, 0
|
||||
beq- L(u1)
|
||||
nop /* Let 601 load start of loop. */
|
||||
lbzu rWORD, 1(rSRC)
|
||||
stbu rALT, 1(rRTN)
|
||||
cmpwi rWORD, 0
|
||||
bne+ L(u0)
|
||||
L(u2): stbu rWORD, 1(rRTN)
|
||||
blr
|
||||
L(u1): stbu rALT, 1(rRTN)
|
||||
blr
|
||||
END (FUNC_NAME)
|
||||
|
||||
#ifndef USE_AS_STPCPY
|
||||
libc_hidden_builtin_def (strcpy)
|
||||
#endif
|
Loading…
x
Reference in New Issue
Block a user