5ff81530dd
Continuing fixes for ceil and floor functions not to raise the "inexact" exception, this patch fixes the x86_64 SSE4.1 versions. The roundss / roundsd instructions take an immediate operand that determines the rounding mode and whether to raise "inexact"; this just needs bit 3 set to disable "inexact", which this patch does. Remark: we don't have an SSE4.1 version of trunc / truncf (using this instruction with operand 11); I'd expect one to make sense, but of course it should be benchmarked against the existing C code. I'll file a bug in Bugzilla for the lack of such a version. Tested for x86_64. [BZ #15479] * sysdeps/x86_64/fpu/multiarch/s_ceil.S (__ceil_sse41): Set bit 3 of immediate operand to rounding instruction. * sysdeps/x86_64/fpu/multiarch/s_ceilf.S (__ceilf_sse41): Likewise. * sysdeps/x86_64/fpu/multiarch/s_floor.S (__floor_sse41): Likewise. * sysdeps/x86_64/fpu/multiarch/s_floorf.S (__floorf_sse41): Likewise.
39 lines
1.2 KiB
ArmAsm
39 lines
1.2 KiB
ArmAsm
/* Copyright (C) 2011-2016 Free Software Foundation, Inc.
|
|
This file is part of the GNU C Library.
|
|
Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
|
|
|
|
The GNU C Library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
The GNU C Library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with the GNU C Library; if not, see
|
|
<http://www.gnu.org/licenses/>. */
|
|
|
|
#include <machine/asm.h>
|
|
#include <init-arch.h>
|
|
|
|
|
|
ENTRY(__floorf)
|
|
.type __floorf, @gnu_indirect_function
|
|
LOAD_RTLD_GLOBAL_RO_RDX
|
|
leaq __floorf_sse41(%rip), %rax
|
|
HAS_CPU_FEATURE (SSE4_1)
|
|
jnz 2f
|
|
leaq __floorf_c(%rip), %rax
|
|
2: ret
|
|
END(__floorf)
|
|
weak_alias (__floorf, floorf)
|
|
|
|
|
|
ENTRY(__floorf_sse41)
|
|
roundss $9, %xmm0, %xmm0
|
|
ret
|
|
END(__floorf_sse41)
|