Adhemerval Zanella 71ae86478e PowerPC: memset optimization for POWER8/PPC64
This patch adds an optimized memset implementation for POWER8.  For
sizes from 0 to 255 bytes, a word/doubleword algorithm similar to
POWER7 optimized one is used.

For size higher than 255 two strategies are used:

1. If the constant is different than 0, the memory is written with
   altivec vector instruction;

2. If constant is 0, dbcz instructions are used.  The loop is unrolled
   to clear 512 byte at time.

Using vector instructions increases throughput considerable, with a
double performance for sizes larger than 1024.  The dcbz loops unrolls
also shows performance improvement, by doubling throughput for sizes
larger than 8192 bytes.
2014-09-10 07:39:46 -04:00

54 lines
2.0 KiB
C

/* Multiple versions of memset.
Copyright (C) 2013-2014 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
/* Define multiple versions only for definition in libc. */
#if defined SHARED && !defined NOT_IN_libc
/* Redefine memset so that the compiler won't complain about the type
mismatch with the IFUNC selector in strong_alias, below. */
# undef memset
# define memset __redirect_memset
# include <string.h>
# include <shlib-compat.h>
# include "init-arch.h"
extern __typeof (__redirect_memset) __libc_memset;
extern __typeof (__redirect_memset) __memset_ppc attribute_hidden;
extern __typeof (__redirect_memset) __memset_power4 attribute_hidden;
extern __typeof (__redirect_memset) __memset_power6 attribute_hidden;
extern __typeof (__redirect_memset) __memset_power7 attribute_hidden;
extern __typeof (__redirect_memset) __memset_power8 attribute_hidden;
/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle
ifunc symbol properly. */
libc_ifunc (__libc_memset,
(hwcap2 & PPC_FEATURE2_ARCH_2_07)
? __memset_power8 :
(hwcap & PPC_FEATURE_HAS_VSX)
? __memset_power7 :
(hwcap & PPC_FEATURE_ARCH_2_05)
? __memset_power6 :
(hwcap & PPC_FEATURE_POWER4)
? __memset_power4
: __memset_ppc);
#undef memset
strong_alias (__libc_memset, memset);
libc_hidden_ver (__libc_memset, memset);
#endif