2005-01-06 22:40:27 +00:00
|
|
|
/* PLT trampolines. x86-64 version.
|
2016-01-04 16:05:18 +00:00
|
|
|
Copyright (C) 2004-2016 Free Software Foundation, Inc.
|
2005-01-06 22:40:27 +00:00
|
|
|
This file is part of the GNU C Library.
|
|
|
|
|
|
|
|
The GNU C Library is free software; you can redistribute it and/or
|
|
|
|
modify it under the terms of the GNU Lesser General Public
|
|
|
|
License as published by the Free Software Foundation; either
|
|
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
|
|
|
|
The GNU C Library is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
Lesser General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
2012-02-09 23:18:22 +00:00
|
|
|
License along with the GNU C Library; if not, see
|
|
|
|
<http://www.gnu.org/licenses/>. */
|
2005-01-06 22:40:27 +00:00
|
|
|
|
2009-07-10 12:04:14 -07:00
|
|
|
#include <config.h>
|
2005-01-06 22:40:27 +00:00
|
|
|
#include <sysdep.h>
|
2009-07-10 12:04:14 -07:00
|
|
|
#include <link-defines.h>
|
2005-01-06 22:40:27 +00:00
|
|
|
|
Save and restore vector registers in x86-64 ld.so
This patch adds SSE, AVX and AVX512 versions of _dl_runtime_resolve
and _dl_runtime_profile, which save and restore the first 8 vector
registers used for parameter passing. elf_machine_runtime_setup
selects the proper _dl_runtime_resolve or _dl_runtime_profile based
on _dl_x86_cpu_features. It avoids race condition caused by
FOREIGN_CALL macros, which are only used for x86-64.
Performance impact of saving and restoring 8 vector registers are
negligible on Nehalem, Sandy Bridge, Ivy Bridge and Haswell when
ld.so is optimized with SSE2.
[BZ #15128]
* sysdeps/x86_64/Makefile [$(subdir) == elf] (tests): Add
ifuncmain8.
(modules-names): Add ifuncmod8.
($(objpfx)ifuncmain8): New rule.
* sysdeps/x86_64/dl-machine.h: Include <dl-procinfo.h> and
<cpuid.h>.
(elf_machine_runtime_setup): Use _dl_runtime_resolve_sse,
_dl_runtime_resolve_avx, or _dl_runtime_resolve_avx512,
_dl_runtime_profile_sse, _dl_runtime_profile_avx, or
_dl_runtime_profile_avx512, based on HAS_ARCH_FEATURE.
* sysdeps/x86_64/dl-trampoline.S: Rewrite.
* sysdeps/x86_64/dl-trampoline.h: Likewise.
* sysdeps/x86_64/ifuncmain8.c: New file.
* sysdeps/x86_64/ifuncmod8.c: Likewise.
* sysdeps/x86_64/nptl/tcb-offsets.sym (RTLD_SAVESPACE_SSE):
Removed.
* sysdeps/x86_64/nptl/tls.h (__128bits): Removed.
(tcbhead_t): Change rtld_must_xmm_save to __glibc_unused1.
Change rtld_savespace_sse to __glibc_unused2.
(RTLD_CHECK_FOREIGN_CALL): Removed.
(RTLD_ENABLE_FOREIGN_CALL): Likewise.
(RTLD_PREPARE_FOREIGN_CALL): Likewise.
(RTLD_FINALIZE_FOREIGN_CALL): Likewise.
2015-08-25 04:33:54 -07:00
|
|
|
#ifndef DL_STACK_ALIGNMENT
|
|
|
|
/* Due to GCC bug:
|
|
|
|
|
|
|
|
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066
|
|
|
|
|
|
|
|
__tls_get_addr may be called with 8-byte stack alignment. Although
|
|
|
|
this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't assume
|
|
|
|
that stack will be always aligned at 16 bytes. We use unaligned
|
|
|
|
16-byte move to load and store SSE registers, which has no penalty
|
|
|
|
on modern processors if stack is 16-byte aligned. */
|
|
|
|
# define DL_STACK_ALIGNMENT 8
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef DL_RUNIME_UNALIGNED_VEC_SIZE
|
|
|
|
/* The maximum size of unaligned vector load and store. */
|
|
|
|
# define DL_RUNIME_UNALIGNED_VEC_SIZE 16
|
2012-05-11 11:50:11 -07:00
|
|
|
#endif
|
|
|
|
|
Save and restore vector registers in x86-64 ld.so
This patch adds SSE, AVX and AVX512 versions of _dl_runtime_resolve
and _dl_runtime_profile, which save and restore the first 8 vector
registers used for parameter passing. elf_machine_runtime_setup
selects the proper _dl_runtime_resolve or _dl_runtime_profile based
on _dl_x86_cpu_features. It avoids race condition caused by
FOREIGN_CALL macros, which are only used for x86-64.
Performance impact of saving and restoring 8 vector registers are
negligible on Nehalem, Sandy Bridge, Ivy Bridge and Haswell when
ld.so is optimized with SSE2.
[BZ #15128]
* sysdeps/x86_64/Makefile [$(subdir) == elf] (tests): Add
ifuncmain8.
(modules-names): Add ifuncmod8.
($(objpfx)ifuncmain8): New rule.
* sysdeps/x86_64/dl-machine.h: Include <dl-procinfo.h> and
<cpuid.h>.
(elf_machine_runtime_setup): Use _dl_runtime_resolve_sse,
_dl_runtime_resolve_avx, or _dl_runtime_resolve_avx512,
_dl_runtime_profile_sse, _dl_runtime_profile_avx, or
_dl_runtime_profile_avx512, based on HAS_ARCH_FEATURE.
* sysdeps/x86_64/dl-trampoline.S: Rewrite.
* sysdeps/x86_64/dl-trampoline.h: Likewise.
* sysdeps/x86_64/ifuncmain8.c: New file.
* sysdeps/x86_64/ifuncmod8.c: Likewise.
* sysdeps/x86_64/nptl/tcb-offsets.sym (RTLD_SAVESPACE_SSE):
Removed.
* sysdeps/x86_64/nptl/tls.h (__128bits): Removed.
(tcbhead_t): Change rtld_must_xmm_save to __glibc_unused1.
Change rtld_savespace_sse to __glibc_unused2.
(RTLD_CHECK_FOREIGN_CALL): Removed.
(RTLD_ENABLE_FOREIGN_CALL): Likewise.
(RTLD_PREPARE_FOREIGN_CALL): Likewise.
(RTLD_FINALIZE_FOREIGN_CALL): Likewise.
2015-08-25 04:33:54 -07:00
|
|
|
/* True if _dl_runtime_resolve should align stack to VEC_SIZE bytes. */
|
|
|
|
#define DL_RUNIME_RESOLVE_REALIGN_STACK \
|
|
|
|
(VEC_SIZE > DL_STACK_ALIGNMENT \
|
|
|
|
&& VEC_SIZE > DL_RUNIME_UNALIGNED_VEC_SIZE)
|
|
|
|
|
|
|
|
/* Align vector register save area to 16 bytes. */
|
|
|
|
#define REGISTER_SAVE_VEC_OFF 0
|
|
|
|
|
2014-04-01 10:16:04 -07:00
|
|
|
/* Area on stack to save and restore registers used for parameter
|
|
|
|
passing when calling _dl_fixup. */
|
|
|
|
#ifdef __ILP32__
|
Save and restore vector registers in x86-64 ld.so
This patch adds SSE, AVX and AVX512 versions of _dl_runtime_resolve
and _dl_runtime_profile, which save and restore the first 8 vector
registers used for parameter passing. elf_machine_runtime_setup
selects the proper _dl_runtime_resolve or _dl_runtime_profile based
on _dl_x86_cpu_features. It avoids race condition caused by
FOREIGN_CALL macros, which are only used for x86-64.
Performance impact of saving and restoring 8 vector registers are
negligible on Nehalem, Sandy Bridge, Ivy Bridge and Haswell when
ld.so is optimized with SSE2.
[BZ #15128]
* sysdeps/x86_64/Makefile [$(subdir) == elf] (tests): Add
ifuncmain8.
(modules-names): Add ifuncmod8.
($(objpfx)ifuncmain8): New rule.
* sysdeps/x86_64/dl-machine.h: Include <dl-procinfo.h> and
<cpuid.h>.
(elf_machine_runtime_setup): Use _dl_runtime_resolve_sse,
_dl_runtime_resolve_avx, or _dl_runtime_resolve_avx512,
_dl_runtime_profile_sse, _dl_runtime_profile_avx, or
_dl_runtime_profile_avx512, based on HAS_ARCH_FEATURE.
* sysdeps/x86_64/dl-trampoline.S: Rewrite.
* sysdeps/x86_64/dl-trampoline.h: Likewise.
* sysdeps/x86_64/ifuncmain8.c: New file.
* sysdeps/x86_64/ifuncmod8.c: Likewise.
* sysdeps/x86_64/nptl/tcb-offsets.sym (RTLD_SAVESPACE_SSE):
Removed.
* sysdeps/x86_64/nptl/tls.h (__128bits): Removed.
(tcbhead_t): Change rtld_must_xmm_save to __glibc_unused1.
Change rtld_savespace_sse to __glibc_unused2.
(RTLD_CHECK_FOREIGN_CALL): Removed.
(RTLD_ENABLE_FOREIGN_CALL): Likewise.
(RTLD_PREPARE_FOREIGN_CALL): Likewise.
(RTLD_FINALIZE_FOREIGN_CALL): Likewise.
2015-08-25 04:33:54 -07:00
|
|
|
# define REGISTER_SAVE_RAX (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 8)
|
2015-03-16 14:58:43 -07:00
|
|
|
# define PRESERVE_BND_REGS_PREFIX
|
2014-04-01 10:16:04 -07:00
|
|
|
#else
|
|
|
|
/* Align bound register save area to 16 bytes. */
|
Save and restore vector registers in x86-64 ld.so
This patch adds SSE, AVX and AVX512 versions of _dl_runtime_resolve
and _dl_runtime_profile, which save and restore the first 8 vector
registers used for parameter passing. elf_machine_runtime_setup
selects the proper _dl_runtime_resolve or _dl_runtime_profile based
on _dl_x86_cpu_features. It avoids race condition caused by
FOREIGN_CALL macros, which are only used for x86-64.
Performance impact of saving and restoring 8 vector registers are
negligible on Nehalem, Sandy Bridge, Ivy Bridge and Haswell when
ld.so is optimized with SSE2.
[BZ #15128]
* sysdeps/x86_64/Makefile [$(subdir) == elf] (tests): Add
ifuncmain8.
(modules-names): Add ifuncmod8.
($(objpfx)ifuncmain8): New rule.
* sysdeps/x86_64/dl-machine.h: Include <dl-procinfo.h> and
<cpuid.h>.
(elf_machine_runtime_setup): Use _dl_runtime_resolve_sse,
_dl_runtime_resolve_avx, or _dl_runtime_resolve_avx512,
_dl_runtime_profile_sse, _dl_runtime_profile_avx, or
_dl_runtime_profile_avx512, based on HAS_ARCH_FEATURE.
* sysdeps/x86_64/dl-trampoline.S: Rewrite.
* sysdeps/x86_64/dl-trampoline.h: Likewise.
* sysdeps/x86_64/ifuncmain8.c: New file.
* sysdeps/x86_64/ifuncmod8.c: Likewise.
* sysdeps/x86_64/nptl/tcb-offsets.sym (RTLD_SAVESPACE_SSE):
Removed.
* sysdeps/x86_64/nptl/tls.h (__128bits): Removed.
(tcbhead_t): Change rtld_must_xmm_save to __glibc_unused1.
Change rtld_savespace_sse to __glibc_unused2.
(RTLD_CHECK_FOREIGN_CALL): Removed.
(RTLD_ENABLE_FOREIGN_CALL): Likewise.
(RTLD_PREPARE_FOREIGN_CALL): Likewise.
(RTLD_FINALIZE_FOREIGN_CALL): Likewise.
2015-08-25 04:33:54 -07:00
|
|
|
# define REGISTER_SAVE_BND0 (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 8)
|
2014-04-01 10:16:04 -07:00
|
|
|
# define REGISTER_SAVE_BND1 (REGISTER_SAVE_BND0 + 16)
|
|
|
|
# define REGISTER_SAVE_BND2 (REGISTER_SAVE_BND1 + 16)
|
|
|
|
# define REGISTER_SAVE_BND3 (REGISTER_SAVE_BND2 + 16)
|
|
|
|
# define REGISTER_SAVE_RAX (REGISTER_SAVE_BND3 + 16)
|
2015-03-16 14:58:43 -07:00
|
|
|
# ifdef HAVE_MPX_SUPPORT
|
|
|
|
# define PRESERVE_BND_REGS_PREFIX bnd
|
|
|
|
# else
|
|
|
|
# define PRESERVE_BND_REGS_PREFIX .byte 0xf2
|
|
|
|
# endif
|
2014-04-01 10:16:04 -07:00
|
|
|
#endif
|
|
|
|
#define REGISTER_SAVE_RCX (REGISTER_SAVE_RAX + 8)
|
|
|
|
#define REGISTER_SAVE_RDX (REGISTER_SAVE_RCX + 8)
|
|
|
|
#define REGISTER_SAVE_RSI (REGISTER_SAVE_RDX + 8)
|
|
|
|
#define REGISTER_SAVE_RDI (REGISTER_SAVE_RSI + 8)
|
|
|
|
#define REGISTER_SAVE_R8 (REGISTER_SAVE_RDI + 8)
|
|
|
|
#define REGISTER_SAVE_R9 (REGISTER_SAVE_R8 + 8)
|
|
|
|
|
2015-10-13 10:29:45 -07:00
|
|
|
#define RESTORE_AVX
|
|
|
|
|
|
|
|
#ifdef HAVE_AVX512_ASM_SUPPORT
|
|
|
|
# define VEC_SIZE 64
|
|
|
|
# define VMOVA vmovdqa64
|
|
|
|
# if DL_RUNIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT
|
|
|
|
# define VMOV vmovdqa64
|
|
|
|
# else
|
|
|
|
# define VMOV vmovdqu64
|
|
|
|
# endif
|
|
|
|
# define VEC(i) zmm##i
|
|
|
|
# define _dl_runtime_resolve _dl_runtime_resolve_avx512
|
|
|
|
# define _dl_runtime_profile _dl_runtime_profile_avx512
|
|
|
|
# include "dl-trampoline.h"
|
|
|
|
# undef _dl_runtime_resolve
|
|
|
|
# undef _dl_runtime_profile
|
|
|
|
# undef VEC
|
|
|
|
# undef VMOV
|
|
|
|
# undef VMOVA
|
|
|
|
# undef VEC_SIZE
|
Save and restore vector registers in x86-64 ld.so
This patch adds SSE, AVX and AVX512 versions of _dl_runtime_resolve
and _dl_runtime_profile, which save and restore the first 8 vector
registers used for parameter passing. elf_machine_runtime_setup
selects the proper _dl_runtime_resolve or _dl_runtime_profile based
on _dl_x86_cpu_features. It avoids race condition caused by
FOREIGN_CALL macros, which are only used for x86-64.
Performance impact of saving and restoring 8 vector registers are
negligible on Nehalem, Sandy Bridge, Ivy Bridge and Haswell when
ld.so is optimized with SSE2.
[BZ #15128]
* sysdeps/x86_64/Makefile [$(subdir) == elf] (tests): Add
ifuncmain8.
(modules-names): Add ifuncmod8.
($(objpfx)ifuncmain8): New rule.
* sysdeps/x86_64/dl-machine.h: Include <dl-procinfo.h> and
<cpuid.h>.
(elf_machine_runtime_setup): Use _dl_runtime_resolve_sse,
_dl_runtime_resolve_avx, or _dl_runtime_resolve_avx512,
_dl_runtime_profile_sse, _dl_runtime_profile_avx, or
_dl_runtime_profile_avx512, based on HAS_ARCH_FEATURE.
* sysdeps/x86_64/dl-trampoline.S: Rewrite.
* sysdeps/x86_64/dl-trampoline.h: Likewise.
* sysdeps/x86_64/ifuncmain8.c: New file.
* sysdeps/x86_64/ifuncmod8.c: Likewise.
* sysdeps/x86_64/nptl/tcb-offsets.sym (RTLD_SAVESPACE_SSE):
Removed.
* sysdeps/x86_64/nptl/tls.h (__128bits): Removed.
(tcbhead_t): Change rtld_must_xmm_save to __glibc_unused1.
Change rtld_savespace_sse to __glibc_unused2.
(RTLD_CHECK_FOREIGN_CALL): Removed.
(RTLD_ENABLE_FOREIGN_CALL): Likewise.
(RTLD_PREPARE_FOREIGN_CALL): Likewise.
(RTLD_FINALIZE_FOREIGN_CALL): Likewise.
2015-08-25 04:33:54 -07:00
|
|
|
#else
|
2015-10-13 10:29:45 -07:00
|
|
|
strong_alias (_dl_runtime_resolve_avx, _dl_runtime_resolve_avx512)
|
|
|
|
.hidden _dl_runtime_resolve_avx512
|
|
|
|
strong_alias (_dl_runtime_profile_avx, _dl_runtime_profile_avx512)
|
|
|
|
.hidden _dl_runtime_profile_avx512
|
2014-04-01 10:16:04 -07:00
|
|
|
#endif
|
Save and restore vector registers in x86-64 ld.so
This patch adds SSE, AVX and AVX512 versions of _dl_runtime_resolve
and _dl_runtime_profile, which save and restore the first 8 vector
registers used for parameter passing. elf_machine_runtime_setup
selects the proper _dl_runtime_resolve or _dl_runtime_profile based
on _dl_x86_cpu_features. It avoids race condition caused by
FOREIGN_CALL macros, which are only used for x86-64.
Performance impact of saving and restoring 8 vector registers are
negligible on Nehalem, Sandy Bridge, Ivy Bridge and Haswell when
ld.so is optimized with SSE2.
[BZ #15128]
* sysdeps/x86_64/Makefile [$(subdir) == elf] (tests): Add
ifuncmain8.
(modules-names): Add ifuncmod8.
($(objpfx)ifuncmain8): New rule.
* sysdeps/x86_64/dl-machine.h: Include <dl-procinfo.h> and
<cpuid.h>.
(elf_machine_runtime_setup): Use _dl_runtime_resolve_sse,
_dl_runtime_resolve_avx, or _dl_runtime_resolve_avx512,
_dl_runtime_profile_sse, _dl_runtime_profile_avx, or
_dl_runtime_profile_avx512, based on HAS_ARCH_FEATURE.
* sysdeps/x86_64/dl-trampoline.S: Rewrite.
* sysdeps/x86_64/dl-trampoline.h: Likewise.
* sysdeps/x86_64/ifuncmain8.c: New file.
* sysdeps/x86_64/ifuncmod8.c: Likewise.
* sysdeps/x86_64/nptl/tcb-offsets.sym (RTLD_SAVESPACE_SSE):
Removed.
* sysdeps/x86_64/nptl/tls.h (__128bits): Removed.
(tcbhead_t): Change rtld_must_xmm_save to __glibc_unused1.
Change rtld_savespace_sse to __glibc_unused2.
(RTLD_CHECK_FOREIGN_CALL): Removed.
(RTLD_ENABLE_FOREIGN_CALL): Likewise.
(RTLD_PREPARE_FOREIGN_CALL): Likewise.
(RTLD_FINALIZE_FOREIGN_CALL): Likewise.
2015-08-25 04:33:54 -07:00
|
|
|
|
|
|
|
#define VEC_SIZE 32
|
|
|
|
#define VMOVA vmovdqa
|
|
|
|
#if DL_RUNIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT
|
|
|
|
# define VMOV vmovdqa
|
|
|
|
#else
|
|
|
|
# define VMOV vmovdqu
|
2005-07-07 02:39:45 +00:00
|
|
|
#endif
|
Save and restore vector registers in x86-64 ld.so
This patch adds SSE, AVX and AVX512 versions of _dl_runtime_resolve
and _dl_runtime_profile, which save and restore the first 8 vector
registers used for parameter passing. elf_machine_runtime_setup
selects the proper _dl_runtime_resolve or _dl_runtime_profile based
on _dl_x86_cpu_features. It avoids race condition caused by
FOREIGN_CALL macros, which are only used for x86-64.
Performance impact of saving and restoring 8 vector registers are
negligible on Nehalem, Sandy Bridge, Ivy Bridge and Haswell when
ld.so is optimized with SSE2.
[BZ #15128]
* sysdeps/x86_64/Makefile [$(subdir) == elf] (tests): Add
ifuncmain8.
(modules-names): Add ifuncmod8.
($(objpfx)ifuncmain8): New rule.
* sysdeps/x86_64/dl-machine.h: Include <dl-procinfo.h> and
<cpuid.h>.
(elf_machine_runtime_setup): Use _dl_runtime_resolve_sse,
_dl_runtime_resolve_avx, or _dl_runtime_resolve_avx512,
_dl_runtime_profile_sse, _dl_runtime_profile_avx, or
_dl_runtime_profile_avx512, based on HAS_ARCH_FEATURE.
* sysdeps/x86_64/dl-trampoline.S: Rewrite.
* sysdeps/x86_64/dl-trampoline.h: Likewise.
* sysdeps/x86_64/ifuncmain8.c: New file.
* sysdeps/x86_64/ifuncmod8.c: Likewise.
* sysdeps/x86_64/nptl/tcb-offsets.sym (RTLD_SAVESPACE_SSE):
Removed.
* sysdeps/x86_64/nptl/tls.h (__128bits): Removed.
(tcbhead_t): Change rtld_must_xmm_save to __glibc_unused1.
Change rtld_savespace_sse to __glibc_unused2.
(RTLD_CHECK_FOREIGN_CALL): Removed.
(RTLD_ENABLE_FOREIGN_CALL): Likewise.
(RTLD_PREPARE_FOREIGN_CALL): Likewise.
(RTLD_FINALIZE_FOREIGN_CALL): Likewise.
2015-08-25 04:33:54 -07:00
|
|
|
#define VEC(i) ymm##i
|
|
|
|
#define _dl_runtime_resolve _dl_runtime_resolve_avx
|
|
|
|
#define _dl_runtime_profile _dl_runtime_profile_avx
|
|
|
|
#include "dl-trampoline.h"
|
|
|
|
#undef _dl_runtime_resolve
|
|
|
|
#undef _dl_runtime_profile
|
|
|
|
#undef VEC
|
|
|
|
#undef VMOV
|
|
|
|
#undef VMOVA
|
|
|
|
#undef VEC_SIZE
|
|
|
|
|
|
|
|
/* movaps/movups is 1-byte shorter. */
|
|
|
|
#define VEC_SIZE 16
|
|
|
|
#define VMOVA movaps
|
|
|
|
#if DL_RUNIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT
|
|
|
|
# define VMOV movaps
|
|
|
|
#else
|
|
|
|
# define VMOV movups
|
2009-07-29 08:33:03 -07:00
|
|
|
#endif
|
Save and restore vector registers in x86-64 ld.so
This patch adds SSE, AVX and AVX512 versions of _dl_runtime_resolve
and _dl_runtime_profile, which save and restore the first 8 vector
registers used for parameter passing. elf_machine_runtime_setup
selects the proper _dl_runtime_resolve or _dl_runtime_profile based
on _dl_x86_cpu_features. It avoids race condition caused by
FOREIGN_CALL macros, which are only used for x86-64.
Performance impact of saving and restoring 8 vector registers are
negligible on Nehalem, Sandy Bridge, Ivy Bridge and Haswell when
ld.so is optimized with SSE2.
[BZ #15128]
* sysdeps/x86_64/Makefile [$(subdir) == elf] (tests): Add
ifuncmain8.
(modules-names): Add ifuncmod8.
($(objpfx)ifuncmain8): New rule.
* sysdeps/x86_64/dl-machine.h: Include <dl-procinfo.h> and
<cpuid.h>.
(elf_machine_runtime_setup): Use _dl_runtime_resolve_sse,
_dl_runtime_resolve_avx, or _dl_runtime_resolve_avx512,
_dl_runtime_profile_sse, _dl_runtime_profile_avx, or
_dl_runtime_profile_avx512, based on HAS_ARCH_FEATURE.
* sysdeps/x86_64/dl-trampoline.S: Rewrite.
* sysdeps/x86_64/dl-trampoline.h: Likewise.
* sysdeps/x86_64/ifuncmain8.c: New file.
* sysdeps/x86_64/ifuncmod8.c: Likewise.
* sysdeps/x86_64/nptl/tcb-offsets.sym (RTLD_SAVESPACE_SSE):
Removed.
* sysdeps/x86_64/nptl/tls.h (__128bits): Removed.
(tcbhead_t): Change rtld_must_xmm_save to __glibc_unused1.
Change rtld_savespace_sse to __glibc_unused2.
(RTLD_CHECK_FOREIGN_CALL): Removed.
(RTLD_ENABLE_FOREIGN_CALL): Likewise.
(RTLD_PREPARE_FOREIGN_CALL): Likewise.
(RTLD_FINALIZE_FOREIGN_CALL): Likewise.
2015-08-25 04:33:54 -07:00
|
|
|
#define VEC(i) xmm##i
|
|
|
|
#define _dl_runtime_resolve _dl_runtime_resolve_sse
|
|
|
|
#define _dl_runtime_profile _dl_runtime_profile_sse
|
|
|
|
#undef RESTORE_AVX
|
|
|
|
#include "dl-trampoline.h"
|