08325735c2
Lazy TLSDESC initialization needs to be synchronized with concurrent TLS accesses. The TLS descriptor contains a function pointer (entry) and an argument that is accessed from the entry function. With lazy initialization the first call to the entry function updates the entry and the argument to their final value. A final entry function must make sure that it accesses an initialized argument, this needs synchronization on systems with weak memory ordering otherwise the writes of the first call can be observed out of order. There are at least two issues with the current code: tlsdesc.c (i386, x86_64, arm, aarch64) uses volatile memory accesses on the write side (in the initial entry function) instead of C11 atomics. And on systems with weak memory ordering (arm, aarch64) the read side synchronization is missing from the final entry functions (dl-tlsdesc.S). This patch only deals with aarch64. * Write side: Volatile accesses were replaced with C11 relaxed atomics, and a release store was used for the initialization of entry so the read side can synchronize with it. * Read side: TLS access generated by the compiler and an entry function code is roughly ldr x1, [x0] // load the entry blr x1 // call it entryfunc: ldr x0, [x0,#8] // load the arg ret Various alternatives were considered to force the ordering in the entry function between the two loads: (1) barrier entryfunc: dmb ishld ldr x0, [x0,#8] (2) address dependency (if the address of the second load depends on the result of the first one the ordering is guaranteed): entryfunc: ldr x1,[x0] and x1,x1,#8 orr x1,x1,#8 ldr x0,[x0,x1] (3) load-acquire (ARMv8 instruction that is ordered before subsequent loads and stores) entryfunc: ldar xzr,[x0] ldr x0,[x0,#8] Option (1) is the simplest but slowest (note: this runs at every TLS access), options (2) and (3) do one extra load from [x0] (same address loads are ordered so it happens-after the load on the call site), option (2) clobbers x1 which is problematic because existing gcc does not expect that, so approach (3) was chosen. A new _dl_tlsdesc_return_lazy entry function was introduced for lazily relocated static TLS, so non-lazy static TLS can avoid the synchronization cost. [BZ #18034] * sysdeps/aarch64/dl-tlsdesc.h (_dl_tlsdesc_return_lazy): Declare. * sysdeps/aarch64/dl-tlsdesc.S (_dl_tlsdesc_return_lazy): Define. (_dl_tlsdesc_undefweak): Guarantee TLSDESC entry and argument load-load ordering using ldar. (_dl_tlsdesc_dynamic): Likewise. (_dl_tlsdesc_return_lazy): Likewise. * sysdeps/aarch64/tlsdesc.c (_dl_tlsdesc_resolve_rela_fixup): Use relaxed atomics instead of volatile and synchronize with release store. (_dl_tlsdesc_resolve_hold_fixup): Use relaxed atomics instead of volatile. * elf/tlsdeschtab.h (_dl_tlsdesc_resolve_early_return_p): Likewise.
69 lines
1.9 KiB
C
69 lines
1.9 KiB
C
/* Thread-local storage descriptor handling in the ELF dynamic linker.
|
|
AArch64 version.
|
|
Copyright (C) 2011-2015 Free Software Foundation, Inc.
|
|
|
|
This file is part of the GNU C Library.
|
|
|
|
The GNU C Library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
The GNU C Library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with the GNU C Library; if not, see
|
|
<http://www.gnu.org/licenses/>. */
|
|
|
|
#ifndef _AARCH64_DL_TLSDESC_H
|
|
#define _AARCH64_DL_TLSDESC_H 1
|
|
|
|
/* Type used to represent a TLS descriptor in the GOT. */
|
|
struct tlsdesc
|
|
{
|
|
ptrdiff_t (*entry) (struct tlsdesc *);
|
|
void *arg;
|
|
};
|
|
|
|
typedef struct dl_tls_index
|
|
{
|
|
unsigned long int ti_module;
|
|
unsigned long int ti_offset;
|
|
} tls_index;
|
|
|
|
/* Type used as the argument in a TLS descriptor for a symbol that
|
|
needs dynamic TLS offsets. */
|
|
struct tlsdesc_dynamic_arg
|
|
{
|
|
tls_index tlsinfo;
|
|
size_t gen_count;
|
|
};
|
|
|
|
extern ptrdiff_t attribute_hidden
|
|
_dl_tlsdesc_return (struct tlsdesc *);
|
|
|
|
extern ptrdiff_t attribute_hidden
|
|
_dl_tlsdesc_return_lazy (struct tlsdesc *);
|
|
|
|
extern ptrdiff_t attribute_hidden
|
|
_dl_tlsdesc_undefweak (struct tlsdesc *);
|
|
|
|
extern ptrdiff_t attribute_hidden
|
|
_dl_tlsdesc_resolve_rela (struct tlsdesc *);
|
|
|
|
extern ptrdiff_t attribute_hidden
|
|
_dl_tlsdesc_resolve_hold (struct tlsdesc *);
|
|
|
|
# ifdef SHARED
|
|
extern void *internal_function _dl_make_tlsdesc_dynamic (struct link_map *,
|
|
size_t);
|
|
|
|
extern ptrdiff_t attribute_hidden
|
|
_dl_tlsdesc_dynamic (struct tlsdesc *);
|
|
#endif
|
|
|
|
#endif
|