71ae86478e
This patch adds an optimized memset implementation for POWER8. For sizes from 0 to 255 bytes, a word/doubleword algorithm similar to POWER7 optimized one is used. For size higher than 255 two strategies are used: 1. If the constant is different than 0, the memory is written with altivec vector instruction; 2. If constant is 0, dbcz instructions are used. The loop is unrolled to clear 512 byte at time. Using vector instructions increases throughput considerable, with a double performance for sizes larger than 1024. The dcbz loops unrolls also shows performance improvement, by doubling throughput for sizes larger than 8192 bytes.
44 lines
1.6 KiB
C
44 lines
1.6 KiB
C
/* Multiple versions of bzero. PowerPC64 version.
|
|
Copyright (C) 2013-2014 Free Software Foundation, Inc.
|
|
This file is part of the GNU C Library.
|
|
|
|
The GNU C Library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
The GNU C Library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with the GNU C Library; if not, see
|
|
<http://www.gnu.org/licenses/>. */
|
|
|
|
/* Define multiple versions only for definition in libc. */
|
|
#ifndef NOT_IN_libc
|
|
# include <string.h>
|
|
# include <strings.h>
|
|
# include "init-arch.h"
|
|
|
|
extern __typeof (bzero) __bzero_ppc attribute_hidden;
|
|
extern __typeof (bzero) __bzero_power4 attribute_hidden;
|
|
extern __typeof (bzero) __bzero_power6 attribute_hidden;
|
|
extern __typeof (bzero) __bzero_power7 attribute_hidden;
|
|
extern __typeof (bzero) __bzero_power8 attribute_hidden;
|
|
|
|
libc_ifunc (__bzero,
|
|
(hwcap2 & PPC_FEATURE2_ARCH_2_07)
|
|
? __bzero_power8 :
|
|
(hwcap & PPC_FEATURE_HAS_VSX)
|
|
? __bzero_power7 :
|
|
(hwcap & PPC_FEATURE_ARCH_2_05)
|
|
? __bzero_power6 :
|
|
(hwcap & PPC_FEATURE_POWER4)
|
|
? __bzero_power4
|
|
: __bzero_ppc);
|
|
|
|
weak_alias (__bzero, bzero)
|
|
#endif
|