5a706f649d
* sysdeps/aarch64/dl-tlsdesc.S (_dl_tlsdesc_dynamic): Use PTR_REG macro in cmp instruction.
444 lines
13 KiB
ArmAsm
444 lines
13 KiB
ArmAsm
/* Thread-local storage handling in the ELF dynamic linker.
|
|
AArch64 version.
|
|
Copyright (C) 2011-2017 Free Software Foundation, Inc.
|
|
|
|
This file is part of the GNU C Library.
|
|
|
|
The GNU C Library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
The GNU C Library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with the GNU C Library; if not, see
|
|
<http://www.gnu.org/licenses/>. */
|
|
|
|
#include <sysdep.h>
|
|
#include <tls.h>
|
|
#include "tlsdesc.h"
|
|
|
|
#define NSAVEDQREGPAIRS 16
|
|
#define SAVE_Q_REGISTERS \
|
|
stp q0, q1, [sp, #-32*NSAVEDQREGPAIRS]!; \
|
|
cfi_adjust_cfa_offset (32*NSAVEDQREGPAIRS); \
|
|
stp q2, q3, [sp, #32*1]; \
|
|
stp q4, q5, [sp, #32*2]; \
|
|
stp q6, q7, [sp, #32*3]; \
|
|
stp q8, q9, [sp, #32*4]; \
|
|
stp q10, q11, [sp, #32*5]; \
|
|
stp q12, q13, [sp, #32*6]; \
|
|
stp q14, q15, [sp, #32*7]; \
|
|
stp q16, q17, [sp, #32*8]; \
|
|
stp q18, q19, [sp, #32*9]; \
|
|
stp q20, q21, [sp, #32*10]; \
|
|
stp q22, q23, [sp, #32*11]; \
|
|
stp q24, q25, [sp, #32*12]; \
|
|
stp q26, q27, [sp, #32*13]; \
|
|
stp q28, q29, [sp, #32*14]; \
|
|
stp q30, q31, [sp, #32*15];
|
|
|
|
#define RESTORE_Q_REGISTERS \
|
|
ldp q2, q3, [sp, #32*1]; \
|
|
ldp q4, q5, [sp, #32*2]; \
|
|
ldp q6, q7, [sp, #32*3]; \
|
|
ldp q8, q9, [sp, #32*4]; \
|
|
ldp q10, q11, [sp, #32*5]; \
|
|
ldp q12, q13, [sp, #32*6]; \
|
|
ldp q14, q15, [sp, #32*7]; \
|
|
ldp q16, q17, [sp, #32*8]; \
|
|
ldp q18, q19, [sp, #32*9]; \
|
|
ldp q20, q21, [sp, #32*10]; \
|
|
ldp q22, q23, [sp, #32*11]; \
|
|
ldp q24, q25, [sp, #32*12]; \
|
|
ldp q26, q27, [sp, #32*13]; \
|
|
ldp q28, q29, [sp, #32*14]; \
|
|
ldp q30, q31, [sp, #32*15]; \
|
|
ldp q0, q1, [sp], #32*NSAVEDQREGPAIRS; \
|
|
cfi_adjust_cfa_offset (-32*NSAVEDQREGPAIRS);
|
|
|
|
.text
|
|
|
|
/* Compute the thread pointer offset for symbols in the static
|
|
TLS block. The offset is the same for all threads.
|
|
Prototype:
|
|
_dl_tlsdesc_return (tlsdesc *) ;
|
|
*/
|
|
.hidden _dl_tlsdesc_return
|
|
.global _dl_tlsdesc_return
|
|
.type _dl_tlsdesc_return,%function
|
|
cfi_startproc
|
|
.align 2
|
|
_dl_tlsdesc_return:
|
|
DELOUSE (0)
|
|
ldr PTR_REG (0), [x0, #PTR_SIZE]
|
|
RET
|
|
cfi_endproc
|
|
.size _dl_tlsdesc_return, .-_dl_tlsdesc_return
|
|
|
|
/* Same as _dl_tlsdesc_return but with synchronization for
|
|
lazy relocation.
|
|
Prototype:
|
|
_dl_tlsdesc_return_lazy (tlsdesc *) ;
|
|
*/
|
|
.hidden _dl_tlsdesc_return_lazy
|
|
.global _dl_tlsdesc_return_lazy
|
|
.type _dl_tlsdesc_return_lazy,%function
|
|
cfi_startproc
|
|
.align 2
|
|
_dl_tlsdesc_return_lazy:
|
|
/* The ldar here happens after the load from [x0] at the call site
|
|
(that is generated by the compiler as part of the TLS access ABI),
|
|
so it reads the same value (this function is the final value of
|
|
td->entry) and thus it synchronizes with the release store to
|
|
td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
|
|
from [x0,#PTR_SIZE] here happens after the initialization of td->arg. */
|
|
DELOUSE (0)
|
|
ldar PTR_REG (zr), [x0]
|
|
ldr PTR_REG (0), [x0, #PTR_SIZE]
|
|
RET
|
|
cfi_endproc
|
|
.size _dl_tlsdesc_return_lazy, .-_dl_tlsdesc_return_lazy
|
|
|
|
/* Handler for undefined weak TLS symbols.
|
|
Prototype:
|
|
_dl_tlsdesc_undefweak (tlsdesc *);
|
|
|
|
The second word of the descriptor contains the addend.
|
|
Return the addend minus the thread pointer. This ensures
|
|
that when the caller adds on the thread pointer it gets back
|
|
the addend. */
|
|
|
|
.hidden _dl_tlsdesc_undefweak
|
|
.global _dl_tlsdesc_undefweak
|
|
.type _dl_tlsdesc_undefweak,%function
|
|
cfi_startproc
|
|
.align 2
|
|
_dl_tlsdesc_undefweak:
|
|
str x1, [sp, #-16]!
|
|
cfi_adjust_cfa_offset (16)
|
|
/* The ldar here happens after the load from [x0] at the call site
|
|
(that is generated by the compiler as part of the TLS access ABI),
|
|
so it reads the same value (this function is the final value of
|
|
td->entry) and thus it synchronizes with the release store to
|
|
td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
|
|
from [x0,#8] here happens after the initialization of td->arg. */
|
|
DELOUSE (0)
|
|
ldar PTR_REG (zr), [x0]
|
|
ldr PTR_REG (0), [x0, #PTR_SIZE]
|
|
mrs x1, tpidr_el0
|
|
sub PTR_REG (0), PTR_REG (0), PTR_REG (1)
|
|
ldr x1, [sp], #16
|
|
cfi_adjust_cfa_offset (-16)
|
|
RET
|
|
cfi_endproc
|
|
.size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
|
|
|
|
#ifdef SHARED
|
|
/* Handler for dynamic TLS symbols.
|
|
Prototype:
|
|
_dl_tlsdesc_dynamic (tlsdesc *) ;
|
|
|
|
The second word of the descriptor points to a
|
|
tlsdesc_dynamic_arg structure.
|
|
|
|
Returns the offset between the thread pointer and the
|
|
object referenced by the argument.
|
|
|
|
ptrdiff_t
|
|
__attribute__ ((__regparm__ (1)))
|
|
_dl_tlsdesc_dynamic (struct tlsdesc *tdp)
|
|
{
|
|
struct tlsdesc_dynamic_arg *td = tdp->arg;
|
|
dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + TCBHEAD_DTV);
|
|
if (__builtin_expect (td->gen_count <= dtv[0].counter
|
|
&& (dtv[td->tlsinfo.ti_module].pointer.val
|
|
!= TLS_DTV_UNALLOCATED),
|
|
1))
|
|
return dtv[td->tlsinfo.ti_module].pointer.val
|
|
+ td->tlsinfo.ti_offset
|
|
- __thread_pointer;
|
|
|
|
return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
|
|
}
|
|
*/
|
|
|
|
.hidden _dl_tlsdesc_dynamic
|
|
.global _dl_tlsdesc_dynamic
|
|
.type _dl_tlsdesc_dynamic,%function
|
|
cfi_startproc
|
|
.align 2
|
|
_dl_tlsdesc_dynamic:
|
|
# define NSAVEXREGPAIRS 2
|
|
stp x29, x30, [sp,#-(32+16*NSAVEXREGPAIRS)]!
|
|
cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS)
|
|
cfi_rel_offset (x29, 0)
|
|
cfi_rel_offset (x30, 8)
|
|
mov x29, sp
|
|
DELOUSE (0)
|
|
|
|
/* Save just enough registers to support fast path, if we fall
|
|
into slow path we will save additional registers. */
|
|
|
|
stp x1, x2, [sp, #32+16*0]
|
|
stp x3, x4, [sp, #32+16*1]
|
|
cfi_rel_offset (x1, 32)
|
|
cfi_rel_offset (x2, 32+8)
|
|
cfi_rel_offset (x3, 32+16)
|
|
cfi_rel_offset (x4, 32+24)
|
|
|
|
mrs x4, tpidr_el0
|
|
/* The ldar here happens after the load from [x0] at the call site
|
|
(that is generated by the compiler as part of the TLS access ABI),
|
|
so it reads the same value (this function is the final value of
|
|
td->entry) and thus it synchronizes with the release store to
|
|
td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
|
|
from [x0,#PTR_SIZE] here happens after the initialization of td->arg. */
|
|
ldar PTR_REG (zr), [x0]
|
|
ldr PTR_REG (1), [x0,#TLSDESC_ARG]
|
|
ldr PTR_REG (0), [x4,#TCBHEAD_DTV]
|
|
ldr PTR_REG (3), [x1,#TLSDESC_GEN_COUNT]
|
|
ldr PTR_REG (2), [x0,#DTV_COUNTER]
|
|
cmp PTR_REG (3), PTR_REG (2)
|
|
b.hi 2f
|
|
ldr PTR_REG (2), [x1,#TLSDESC_MODID]
|
|
add PTR_REG (0), PTR_REG (0), PTR_REG (2), lsl #(PTR_LOG_SIZE + 1)
|
|
ldr PTR_REG (0), [x0] /* Load val member of DTV entry. */
|
|
cmp PTR_REG (0), #TLS_DTV_UNALLOCATED
|
|
b.eq 2f
|
|
ldr PTR_REG (1), [x1,#TLSDESC_MODOFF]
|
|
add PTR_REG (0), PTR_REG (0), PTR_REG (1)
|
|
sub PTR_REG (0), PTR_REG (0), PTR_REG (4)
|
|
1:
|
|
ldp x1, x2, [sp, #32+16*0]
|
|
ldp x3, x4, [sp, #32+16*1]
|
|
|
|
ldp x29, x30, [sp], #(32+16*NSAVEXREGPAIRS)
|
|
cfi_adjust_cfa_offset (-32-16*NSAVEXREGPAIRS)
|
|
cfi_restore (x29)
|
|
cfi_restore (x30)
|
|
# undef NSAVEXREGPAIRS
|
|
RET
|
|
2:
|
|
/* This is the slow path. We need to call __tls_get_addr() which
|
|
means we need to save and restore all the register that the
|
|
callee will trash. */
|
|
|
|
/* Save the remaining registers that we must treat as caller save. */
|
|
# define NSAVEXREGPAIRS 7
|
|
stp x5, x6, [sp, #-16*NSAVEXREGPAIRS]!
|
|
cfi_adjust_cfa_offset (16*NSAVEXREGPAIRS)
|
|
stp x7, x8, [sp, #16*1]
|
|
stp x9, x10, [sp, #16*2]
|
|
stp x11, x12, [sp, #16*3]
|
|
stp x13, x14, [sp, #16*4]
|
|
stp x15, x16, [sp, #16*5]
|
|
stp x17, x18, [sp, #16*6]
|
|
cfi_rel_offset (x5, 0)
|
|
cfi_rel_offset (x6, 8)
|
|
cfi_rel_offset (x7, 16)
|
|
cfi_rel_offset (x8, 16+8)
|
|
cfi_rel_offset (x9, 16*2)
|
|
cfi_rel_offset (x10, 16*2+8)
|
|
cfi_rel_offset (x11, 16*3)
|
|
cfi_rel_offset (x12, 16*3+8)
|
|
cfi_rel_offset (x13, 16*4)
|
|
cfi_rel_offset (x14, 16*4+8)
|
|
cfi_rel_offset (x15, 16*5)
|
|
cfi_rel_offset (x16, 16*5+8)
|
|
cfi_rel_offset (x17, 16*6)
|
|
cfi_rel_offset (x18, 16*6+8)
|
|
|
|
SAVE_Q_REGISTERS
|
|
|
|
mov x0, x1
|
|
bl __tls_get_addr
|
|
|
|
mrs x1, tpidr_el0
|
|
sub PTR_REG (0), PTR_REG (0), PTR_REG (1)
|
|
|
|
RESTORE_Q_REGISTERS
|
|
|
|
ldp x7, x8, [sp, #16*1]
|
|
ldp x9, x10, [sp, #16*2]
|
|
ldp x11, x12, [sp, #16*3]
|
|
ldp x13, x14, [sp, #16*4]
|
|
ldp x15, x16, [sp, #16*5]
|
|
ldp x17, x18, [sp, #16*6]
|
|
ldp x5, x6, [sp], #16*NSAVEXREGPAIRS
|
|
cfi_adjust_cfa_offset (-16*NSAVEXREGPAIRS)
|
|
b 1b
|
|
cfi_endproc
|
|
.size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
|
|
# undef NSAVEXREGPAIRS
|
|
#endif
|
|
|
|
/* This function is a wrapper for a lazy resolver for TLS_DESC
|
|
RELA relocations.
|
|
When the actual resolver returns, it will have adjusted the
|
|
TLS descriptor such that we can tail-call it for it to return
|
|
the TP offset of the symbol. */
|
|
|
|
.hidden _dl_tlsdesc_resolve_rela
|
|
.global _dl_tlsdesc_resolve_rela
|
|
.type _dl_tlsdesc_resolve_rela,%function
|
|
cfi_startproc
|
|
.align 2
|
|
_dl_tlsdesc_resolve_rela:
|
|
#define NSAVEXREGPAIRS 9
|
|
/* The tlsdesc PLT entry pushes x2 and x3 to the stack. */
|
|
cfi_adjust_cfa_offset (16)
|
|
cfi_rel_offset (x2, 0)
|
|
cfi_rel_offset (x3, 8)
|
|
stp x29, x30, [sp, #-(32+16*NSAVEXREGPAIRS)]!
|
|
cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS)
|
|
cfi_rel_offset (x29, 0)
|
|
cfi_rel_offset (x30, 8)
|
|
mov x29, sp
|
|
stp x1, x4, [sp, #32+16*0]
|
|
stp x5, x6, [sp, #32+16*1]
|
|
stp x7, x8, [sp, #32+16*2]
|
|
stp x9, x10, [sp, #32+16*3]
|
|
stp x11, x12, [sp, #32+16*4]
|
|
stp x13, x14, [sp, #32+16*5]
|
|
stp x15, x16, [sp, #32+16*6]
|
|
stp x17, x18, [sp, #32+16*7]
|
|
str x0, [sp, #32+16*8]
|
|
cfi_rel_offset (x1, 32)
|
|
cfi_rel_offset (x4, 32+8)
|
|
cfi_rel_offset (x5, 32+16)
|
|
cfi_rel_offset (x6, 32+16+8)
|
|
cfi_rel_offset (x7, 32+16*2)
|
|
cfi_rel_offset (x8, 32+16*2+8)
|
|
cfi_rel_offset (x9, 32+16*3)
|
|
cfi_rel_offset (x10, 32+16*3+8)
|
|
cfi_rel_offset (x11, 32+16*4)
|
|
cfi_rel_offset (x12, 32+16*4+8)
|
|
cfi_rel_offset (x13, 32+16*5)
|
|
cfi_rel_offset (x14, 32+16*5+8)
|
|
cfi_rel_offset (x15, 32+16*6)
|
|
cfi_rel_offset (x16, 32+16*6+8)
|
|
cfi_rel_offset (x17, 32+16*7)
|
|
cfi_rel_offset (x18, 32+16*7+8)
|
|
cfi_rel_offset (x0, 32+16*8)
|
|
|
|
SAVE_Q_REGISTERS
|
|
|
|
DELOUSE (3)
|
|
ldr PTR_REG (1), [x3, #PTR_SIZE]
|
|
bl _dl_tlsdesc_resolve_rela_fixup
|
|
|
|
RESTORE_Q_REGISTERS
|
|
|
|
ldr x0, [sp, #32+16*8]
|
|
DELOUSE (0)
|
|
ldr PTR_REG (1), [x0]
|
|
blr x1
|
|
|
|
ldp x1, x4, [sp, #32+16*0]
|
|
ldp x5, x6, [sp, #32+16*1]
|
|
ldp x7, x8, [sp, #32+16*2]
|
|
ldp x9, x10, [sp, #32+16*3]
|
|
ldp x11, x12, [sp, #32+16*4]
|
|
ldp x13, x14, [sp, #32+16*5]
|
|
ldp x15, x16, [sp, #32+16*6]
|
|
ldp x17, x18, [sp, #32+16*7]
|
|
ldp x29, x30, [sp], #(32+16*NSAVEXREGPAIRS)
|
|
cfi_adjust_cfa_offset (-32-16*NSAVEXREGPAIRS)
|
|
cfi_restore (x29)
|
|
cfi_restore (x30)
|
|
ldp x2, x3, [sp], #16
|
|
cfi_adjust_cfa_offset (-16)
|
|
RET
|
|
#undef NSAVEXREGPAIRS
|
|
cfi_endproc
|
|
.size _dl_tlsdesc_resolve_rela, .-_dl_tlsdesc_resolve_rela
|
|
|
|
/* This function is a placeholder for lazy resolving of TLS
|
|
relocations. Once some thread starts resolving a TLS
|
|
relocation, it sets up the TLS descriptor to use this
|
|
resolver, such that other threads that would attempt to
|
|
resolve it concurrently may skip the call to the original lazy
|
|
resolver and go straight to a condition wait.
|
|
|
|
When the actual resolver returns, it will have adjusted the
|
|
TLS descriptor such that we can tail-call it for it to return
|
|
the TP offset of the symbol. */
|
|
|
|
.hidden _dl_tlsdesc_resolve_hold
|
|
.global _dl_tlsdesc_resolve_hold
|
|
.type _dl_tlsdesc_resolve_hold,%function
|
|
cfi_startproc
|
|
.align 2
|
|
_dl_tlsdesc_resolve_hold:
|
|
#define NSAVEXREGPAIRS 10
|
|
1:
|
|
stp x29, x30, [sp, #-(32+16*NSAVEXREGPAIRS)]!
|
|
cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS)
|
|
cfi_rel_offset (x29, 0)
|
|
cfi_rel_offset (x30, 8)
|
|
mov x29, sp
|
|
stp x1, x2, [sp, #32+16*0]
|
|
stp x3, x4, [sp, #32+16*1]
|
|
stp x5, x6, [sp, #32+16*2]
|
|
stp x7, x8, [sp, #32+16*3]
|
|
stp x9, x10, [sp, #32+16*4]
|
|
stp x11, x12, [sp, #32+16*5]
|
|
stp x13, x14, [sp, #32+16*6]
|
|
stp x15, x16, [sp, #32+16*7]
|
|
stp x17, x18, [sp, #32+16*8]
|
|
str x0, [sp, #32+16*9]
|
|
cfi_rel_offset (x1, 32)
|
|
cfi_rel_offset (x2, 32+8)
|
|
cfi_rel_offset (x3, 32+16)
|
|
cfi_rel_offset (x4, 32+16+8)
|
|
cfi_rel_offset (x5, 32+16*2)
|
|
cfi_rel_offset (x6, 32+16*2+8)
|
|
cfi_rel_offset (x7, 32+16*3)
|
|
cfi_rel_offset (x8, 32+16*3+8)
|
|
cfi_rel_offset (x9, 32+16*4)
|
|
cfi_rel_offset (x10, 32+16*4+8)
|
|
cfi_rel_offset (x11, 32+16*5)
|
|
cfi_rel_offset (x12, 32+16*5+8)
|
|
cfi_rel_offset (x13, 32+16*6)
|
|
cfi_rel_offset (x14, 32+16*6+8)
|
|
cfi_rel_offset (x15, 32+16*7)
|
|
cfi_rel_offset (x16, 32+16*7+8)
|
|
cfi_rel_offset (x17, 32+16*8)
|
|
cfi_rel_offset (x18, 32+16*8+8)
|
|
cfi_rel_offset (x0, 32+16*9)
|
|
|
|
SAVE_Q_REGISTERS
|
|
|
|
adr x1, 1b
|
|
bl _dl_tlsdesc_resolve_hold_fixup
|
|
|
|
RESTORE_Q_REGISTERS
|
|
|
|
ldr x0, [sp, #32+16*9]
|
|
DELOUSE (0)
|
|
ldr PTR_REG (1), [x0]
|
|
blr x1
|
|
|
|
ldp x1, x2, [sp, #32+16*0]
|
|
ldp x3, x4, [sp, #32+16*1]
|
|
ldp x5, x6, [sp, #32+16*2]
|
|
ldp x7, x8, [sp, #32+16*3]
|
|
ldp x9, x10, [sp, #32+16*4]
|
|
ldp x11, x12, [sp, #32+16*5]
|
|
ldp x13, x14, [sp, #32+16*6]
|
|
ldp x15, x16, [sp, #32+16*7]
|
|
ldp x17, x18, [sp, #32+16*8]
|
|
ldp x29, x30, [sp], #(32+16*NSAVEXREGPAIRS)
|
|
cfi_adjust_cfa_offset (-32-16*NSAVEXREGPAIRS)
|
|
cfi_restore (x29)
|
|
cfi_restore (x30)
|
|
RET
|
|
cfi_endproc
|
|
.size _dl_tlsdesc_resolve_hold, .-_dl_tlsdesc_resolve_hold
|
|
#undef NSAVEXREGPAIRS
|