2e9e166761
This patch provides optimized version of wmemset with the z13 vector instructions. ChangeLog: * sysdeps/s390/multiarch/wmemset-c.c: New File. * sysdeps/s390/multiarch/wmemset-vx.S: Likewise. * sysdeps/s390/multiarch/wmemset.c: Likewise. * sysdeps/s390/multiarch/Makefile (sysdep_routines): Add wmemset functions. * sysdeps/s390/multiarch/ifunc-impl-list-common.c (__libc_ifunc_impl_list_common): Add ifunc test for wmemset. * wcsmbs/wmemset.c: Use WMEMSET if defined. * string/test-memset.c: Add wmemset support. * wcsmbs/test-wmemset.c: New File. * wcsmbs/Makefile (strop-tests): Add wmemset. * benchtests/bench-memset.c: Add wmemset support. * benchtests/bench-wmemset.c: New File. * benchtests/Makefile (wcsmbs-bench): Add wmemset.
176 lines
5.6 KiB
Makefile
176 lines
5.6 KiB
Makefile
# Copyright (C) 2013-2015 Free Software Foundation, Inc.
|
||
# This file is part of the GNU C Library.
|
||
|
||
# The GNU C Library is free software; you can redistribute it and/or
|
||
# modify it under the terms of the GNU Lesser General Public
|
||
# License as published by the Free Software Foundation; either
|
||
# version 2.1 of the License, or (at your option) any later version.
|
||
|
||
# The GNU C Library is distributed in the hope that it will be useful,
|
||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
# Lesser General Public License for more details.
|
||
|
||
# You should have received a copy of the GNU Lesser General Public
|
||
# License along with the GNU C Library; if not, see
|
||
# <http://www.gnu.org/licenses/>.
|
||
|
||
|
||
# Makefile for benchmark tests. The only useful target here is `bench`.
|
||
# Add benchmark functions in alphabetical order.
|
||
|
||
subdir := benchtests
|
||
|
||
include ../Makeconfig
|
||
bench-math := acos acosh asin asinh atan atanh cos cosh exp exp2 ffs ffsll \
|
||
log log2 modf pow rint sin sincos sinh sqrt tan tanh
|
||
|
||
bench-pthread := pthread_once
|
||
|
||
bench := $(bench-math) $(bench-pthread)
|
||
|
||
# String function benchmarks.
|
||
string-bench := bcopy bzero memccpy memchr memcmp memcpy memmem memmove \
|
||
mempcpy memset rawmemchr stpcpy stpncpy strcasecmp strcasestr \
|
||
strcat strchr strchrnul strcmp strcpy strcspn strlen \
|
||
strncasecmp strncat strncmp strncpy strnlen strpbrk strrchr \
|
||
strspn strstr strcpy_chk stpcpy_chk memrchr strsep strtok \
|
||
strcoll
|
||
wcsmbs-bench := wcslen wcsnlen wcscpy wcpcpy wcsncpy wcpncpy wcscat wcsncat \
|
||
wcscmp wcsncmp wcschr wcschrnul wcsrchr wcsspn wcspbrk wcscspn \
|
||
wmemchr wmemset
|
||
string-bench-all := $(string-bench) ${wcsmbs-bench}
|
||
|
||
# We have to generate locales
|
||
LOCALES := en_US.UTF-8 tr_TR.UTF-8 cs_CZ.UTF-8 fa_IR.UTF-8 fr_FR.UTF-8 \
|
||
ja_JP.UTF-8 si_LK.UTF-8 en_GB.UTF-8 vi_VN.UTF-8 ar_SA.UTF-8 \
|
||
da_DK.UTF-8 pl_PL.UTF-8 pt_PT.UTF-8 el_GR.UTF-8 ru_RU.UTF-8 \
|
||
iw_IL.UTF-8 is_IS.UTF-8 es_ES.UTF-8 hi_IN.UTF-8 sv_SE.UTF-8 \
|
||
hu_HU.UTF-8 it_IT.UTF-8 sr_RS.UTF-8 zh_CN.UTF-8
|
||
include ../gen-locales.mk
|
||
|
||
stdlib-bench := strtod
|
||
|
||
stdio-common-bench := sprintf
|
||
|
||
benchset := $(string-bench-all) $(stdlib-bench) $(stdio-common-bench)
|
||
|
||
CFLAGS-bench-ffs.c += -fno-builtin
|
||
CFLAGS-bench-ffsll.c += -fno-builtin
|
||
|
||
bench-malloc := malloc-thread
|
||
|
||
$(addprefix $(objpfx)bench-,$(bench-math)): $(libm)
|
||
$(addprefix $(objpfx)bench-,$(bench-pthread)): $(shared-thread-library)
|
||
$(objpfx)bench-malloc-thread: $(shared-thread-library)
|
||
|
||
|
||
|
||
# Rules to build and execute the benchmarks. Do not put any benchmark
|
||
# parameters beyond this point.
|
||
|
||
# We don't want the benchmark programs to run in parallel since that could
|
||
# affect their performance.
|
||
.NOTPARALLEL:
|
||
|
||
include ../Rules
|
||
|
||
binaries-bench := $(addprefix $(objpfx)bench-,$(bench))
|
||
binaries-benchset := $(addprefix $(objpfx)bench-,$(benchset))
|
||
binaries-bench-malloc := $(addprefix $(objpfx)bench-,$(bench-malloc))
|
||
|
||
# The default duration: 10 seconds.
|
||
ifndef BENCH_DURATION
|
||
BENCH_DURATION := 10
|
||
endif
|
||
|
||
CPPFLAGS-nonlib += -DDURATION=$(BENCH_DURATION)
|
||
|
||
# Use clock_gettime to measure performance of functions. The default is to use
|
||
# HP_TIMING if it is available.
|
||
ifdef USE_CLOCK_GETTIME
|
||
CPPFLAGS-nonlib += -DUSE_CLOCK_GETTIME
|
||
endif
|
||
|
||
DETAILED_OPT :=
|
||
|
||
ifdef DETAILED
|
||
DETAILED_OPT := -d
|
||
endif
|
||
|
||
# This makes sure CPPFLAGS-nonlib and CFLAGS-nonlib are passed
|
||
# for all these modules.
|
||
cpp-srcs-left := $(binaries-benchset:=.c) $(binaries-bench:=.c) \
|
||
$(binaries-bench-malloc:=.c)
|
||
lib := nonlib
|
||
include $(patsubst %,$(..)cppflags-iterator.mk,$(cpp-srcs-left))
|
||
|
||
extra-objs += json-lib.o
|
||
|
||
bench-deps := bench-skeleton.c bench-timing.h Makefile
|
||
|
||
run-bench = $(test-wrapper-env) \
|
||
$(run-program-env) \
|
||
$($*-ENV) $(rtld-prefix) $${run}
|
||
|
||
timing-type := $(objpfx)bench-timing-type
|
||
|
||
bench-clean:
|
||
rm -f $(binaries-bench) $(addsuffix .o,$(binaries-bench))
|
||
rm -f $(binaries-benchset) $(addsuffix .o,$(binaries-benchset))
|
||
rm -f $(binaries-bench-malloc) $(addsuffix .o,$(binaries-bench-malloc))
|
||
rm -f $(timing-type) $(addsuffix .o,$(timing-type))
|
||
|
||
bench: $(timing-type) $(gen-locales) bench-set bench-func bench-malloc
|
||
|
||
bench-set: $(binaries-benchset)
|
||
for run in $^; do \
|
||
echo "Running $${run}"; \
|
||
$(run-bench) > $${run}.out; \
|
||
done
|
||
|
||
bench-malloc: $(binaries-bench-malloc)
|
||
run=$(objpfx)bench-malloc-thread; \
|
||
for thr in 1 8 16 32; do \
|
||
echo "Running $${run} $${thr}"; \
|
||
$(run-bench) $${thr} > $${run}-$${thr}.out; \
|
||
done
|
||
|
||
# Build and execute the benchmark functions. This target generates JSON
|
||
# formatted bench.out. Each of the programs produce independent JSON output,
|
||
# so one could even execute them individually and process it using any JSON
|
||
# capable language or tool.
|
||
bench-func: $(binaries-bench)
|
||
{ timing_type=$$($(timing-type)); \
|
||
echo "{\"timing_type\": \"$${timing_type}\","; \
|
||
echo " \"functions\": {"; \
|
||
for run in $^; do \
|
||
if ! [ "x$${run}" = "x$<" ]; then \
|
||
echo ","; \
|
||
fi; \
|
||
echo "Running $${run}" >&2; \
|
||
$(run-bench) $(DETAILED_OPT); \
|
||
done; \
|
||
echo; \
|
||
echo " }"; \
|
||
echo "}"; } > $(objpfx)bench.out-tmp; \
|
||
if [ -f $(objpfx)bench.out ]; then \
|
||
mv -f $(objpfx)bench.out $(objpfx)bench.out.old; \
|
||
fi; \
|
||
mv -f $(objpfx)bench.out-tmp $(objpfx)bench.out
|
||
scripts/validate_benchout.py $(objpfx)bench.out \
|
||
scripts/benchout.schema.json
|
||
|
||
$(timing-type) $(binaries-bench) $(binaries-benchset) \
|
||
$(binaries-bench-malloc): %: %.o $(objpfx)json-lib.o \
|
||
$(sort $(filter $(common-objpfx)lib%,$(link-libc))) \
|
||
$(addprefix $(csu-objpfx),start.o) $(+preinit) $(+postinit)
|
||
$(+link)
|
||
|
||
$(objpfx)bench-%.c: %-inputs $(bench-deps)
|
||
{ if [ -n "$($*-INCLUDE)" ]; then \
|
||
cat $($*-INCLUDE); \
|
||
fi; \
|
||
scripts/bench.py $(patsubst %-inputs,%,$<); } > $@-tmp
|
||
mv -f $@-tmp $@
|