Browse Source

unify static and dynamic linked implementations of thread-local storage

this both allows removal of some of the main remaining uses of the
SHARED macro and clears one obstacle to static-linked dlopen support,
which may be added at some point in the future.

specialized single-TLS-module versions of __copy_tls and __reset_tls
are removed and replaced with code adapted from their dynamic-linked
versions, capable of operating on a whole chain of TLS modules, and
use of the dynamic linker's DSO chain (which contains large struct dso
objects) by these functions is replaced with a new chain of struct
tls_module objects containing only the information needed for
implementing TLS. this may also yield some performance benefit
initializing TLS for a new thread when a large number of modules
without TLS have been loaded, since since there is no need to walk
structures for modules without TLS.
master
Rich Felker 11 years ago
parent
commit
d56460c939
  1. 71
      src/env/__init_tls.c
  2. 23
      src/env/__reset_tls.c
  3. 9
      src/internal/libc.h
  4. 140
      src/ldso/dynlink.c

71
src/env/__init_tls.c

@ -8,9 +8,6 @@
#include "atomic.h"
#include "syscall.h"
#ifndef SHARED
static
#endif
int __init_tp(void *p)
{
pthread_t td = p;
@ -24,8 +21,6 @@ int __init_tp(void *p)
return 0;
}
#ifndef SHARED
static struct builtin_tls {
char c;
struct pthread pt;
@ -33,33 +28,40 @@ static struct builtin_tls {
} builtin_tls[1];
#define MIN_TLS_ALIGN offsetof(struct builtin_tls, pt)
struct tls_image {
void *image;
size_t len, size, align;
} __static_tls;
#define T __static_tls
static struct tls_module main_tls;
void *__copy_tls(unsigned char *mem)
{
pthread_t td;
if (!T.image) return mem;
void **dtv = (void *)mem;
dtv[0] = (void *)1;
struct tls_module *p;
size_t i;
void **dtv;
#ifdef TLS_ABOVE_TP
mem += sizeof(void *) * 2;
mem += -((uintptr_t)mem + sizeof(struct pthread)) & (T.align-1);
dtv = (void **)(mem + libc.tls_size) - (libc.tls_cnt + 1);
mem += -((uintptr_t)mem + sizeof(struct pthread)) & (libc.tls_align-1);
td = (pthread_t)mem;
mem += sizeof(struct pthread);
for (i=1, p=libc.tls_head; p; i++, p=p->next) {
dtv[i] = mem + p->offset;
memcpy(dtv[i], p->image, p->len);
}
#else
dtv = (void **)mem;
mem += libc.tls_size - sizeof(struct pthread);
mem -= (uintptr_t)mem & (T.align-1);
mem -= (uintptr_t)mem & (libc.tls_align-1);
td = (pthread_t)mem;
mem -= T.size;
for (i=1, p=libc.tls_head; p; i++, p=p->next) {
dtv[i] = mem - p->offset;
memcpy(dtv[i], p->image, p->len);
}
#endif
dtv[0] = (void *)libc.tls_cnt;
td->dtv = td->dtv_copy = dtv;
dtv[1] = mem;
memcpy(mem, T.image, T.len);
return td;
}
@ -69,7 +71,7 @@ typedef Elf32_Phdr Phdr;
typedef Elf64_Phdr Phdr;
#endif
void __init_tls(size_t *aux)
static void static_init_tls(size_t *aux)
{
unsigned char *p;
size_t n;
@ -86,16 +88,24 @@ void __init_tls(size_t *aux)
}
if (tls_phdr) {
T.image = (void *)(base + tls_phdr->p_vaddr);
T.len = tls_phdr->p_filesz;
T.size = tls_phdr->p_memsz;
T.align = tls_phdr->p_align;
main_tls.image = (void *)(base + tls_phdr->p_vaddr);
main_tls.len = tls_phdr->p_filesz;
main_tls.size = tls_phdr->p_memsz;
main_tls.align = tls_phdr->p_align;
libc.tls_cnt = 1;
libc.tls_head = &main_tls;
}
T.size += (-T.size - (uintptr_t)T.image) & (T.align-1);
if (T.align < MIN_TLS_ALIGN) T.align = MIN_TLS_ALIGN;
main_tls.size += (-main_tls.size - (uintptr_t)main_tls.image)
& (main_tls.align-1);
if (main_tls.align < MIN_TLS_ALIGN) main_tls.align = MIN_TLS_ALIGN;
#ifndef TLS_ABOVE_TP
main_tls.offset = main_tls.size;
#endif
libc.tls_size = 2*sizeof(void *)+T.size+T.align+sizeof(struct pthread)
libc.tls_align = main_tls.align;
libc.tls_size = 2*sizeof(void *) + sizeof(struct pthread)
+ main_tls.size + main_tls.align
+ MIN_TLS_ALIGN-1 & -MIN_TLS_ALIGN;
if (libc.tls_size > sizeof builtin_tls) {
@ -117,6 +127,5 @@ void __init_tls(size_t *aux)
if (__init_tp(__copy_tls(mem)) < 0)
a_crash();
}
#else
void __init_tls(size_t *auxv) { }
#endif
weak_alias(static_init_tls, __init_tls);

23
src/env/__reset_tls.c

@ -1,21 +1,16 @@
#ifndef SHARED
#include <string.h>
#include "pthread_impl.h"
extern struct tls_image {
void *image;
size_t len, size, align;
} __static_tls;
#define T __static_tls
#include "libc.h"
void __reset_tls()
{
if (!T.size) return;
pthread_t self = __pthread_self();
memcpy(self->dtv[1], T.image, T.len);
memset((char *)self->dtv[1]+T.len, 0, T.size-T.len);
struct tls_module *p;
size_t i, n = (size_t)self->dtv[0];
if (n) for (p=libc.tls_head, i=1; i<=n; i++, p=p->next) {
if (!self->dtv[i]) continue;
memcpy(self->dtv[i], p->image, p->len);
memset((char *)self->dtv[i]+p->len, 0,
p->size - p->len);
}
}
#endif

9
src/internal/libc.h

@ -11,13 +11,20 @@ struct __locale_struct {
const struct __locale_map *volatile cat[6];
};
struct tls_module {
struct tls_module *next;
void *image;
size_t len, size, align, offset;
};
struct __libc {
int can_do_threads;
int threaded;
int secure;
volatile int threads_minus_1;
size_t *auxv;
size_t tls_size;
struct tls_module *tls_head;
size_t tls_size, tls_align, tls_cnt;
size_t page_size;
struct __locale_struct global_locale;
};

140
src/ldso/dynlink.c

@ -70,8 +70,8 @@ struct dso {
char kernel_mapped;
struct dso **deps, *needed_by;
char *rpath_orig, *rpath;
void *tls_image;
size_t tls_len, tls_size, tls_align, tls_id, tls_offset;
struct tls_module tls;
size_t tls_id;
size_t relro_start, relro_end;
void **new_dtv;
unsigned char *new_tls;
@ -99,6 +99,7 @@ struct symdef {
int __init_tp(void *);
void __init_libc(char **, char *);
void *__copy_tls(unsigned char *);
const char *__libc_get_version(void);
@ -123,6 +124,7 @@ static int noload;
static jmp_buf *rtld_fail;
static pthread_rwlock_t lock;
static struct debug debug;
static struct tls_module *tls_tail;
static size_t tls_cnt, tls_offset, tls_align = MIN_TLS_ALIGN;
static size_t static_tls_cnt;
static pthread_mutex_t init_fini_lock = { ._m_type = PTHREAD_MUTEX_RECURSIVE };
@ -397,14 +399,14 @@ static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stri
break;
#ifdef TLS_ABOVE_TP
case REL_TPOFF:
*reloc_addr = tls_val + def.dso->tls_offset + TPOFF_K + addend;
*reloc_addr = tls_val + def.dso->tls.offset + TPOFF_K + addend;
break;
#else
case REL_TPOFF:
*reloc_addr = tls_val - def.dso->tls_offset + addend;
*reloc_addr = tls_val - def.dso->tls.offset + addend;
break;
case REL_TPOFF_NEG:
*reloc_addr = def.dso->tls_offset - tls_val + addend;
*reloc_addr = def.dso->tls.offset - tls_val + addend;
break;
#endif
case REL_TLSDESC:
@ -426,10 +428,10 @@ static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stri
} else {
reloc_addr[0] = (size_t)__tlsdesc_static;
#ifdef TLS_ABOVE_TP
reloc_addr[1] = tls_val + def.dso->tls_offset
reloc_addr[1] = tls_val + def.dso->tls.offset
+ TPOFF_K + addend;
#else
reloc_addr[1] = tls_val - def.dso->tls_offset
reloc_addr[1] = tls_val - def.dso->tls.offset
+ addend;
#endif
}
@ -567,9 +569,9 @@ static void *map_library(int fd, struct dso *dso)
dyn = ph->p_vaddr;
} else if (ph->p_type == PT_TLS) {
tls_image = ph->p_vaddr;
dso->tls_align = ph->p_align;
dso->tls_len = ph->p_filesz;
dso->tls_size = ph->p_memsz;
dso->tls.align = ph->p_align;
dso->tls.len = ph->p_filesz;
dso->tls.size = ph->p_memsz;
} else if (ph->p_type == PT_GNU_RELRO) {
dso->relro_start = ph->p_vaddr & -PAGE_SIZE;
dso->relro_end = (ph->p_vaddr + ph->p_memsz) & -PAGE_SIZE;
@ -694,7 +696,7 @@ static void *map_library(int fd, struct dso *dso)
done_mapping:
dso->base = base;
dso->dynv = laddr(dso, dyn);
if (dso->tls_size) dso->tls_image = laddr(dso, tls_image);
if (dso->tls.size) dso->tls.image = laddr(dso, tls_image);
if (!runtime) reclaim_gaps(dso);
free(allocated_buf);
return map;
@ -1011,8 +1013,8 @@ static struct dso *load_library(const char *name, struct dso *needed_by)
* extended DTV capable of storing an additional slot for
* the newly-loaded DSO. */
alloc_size = sizeof *p + strlen(pathname) + 1;
if (runtime && temp_dso.tls_image) {
size_t per_th = temp_dso.tls_size + temp_dso.tls_align
if (runtime && temp_dso.tls.image) {
size_t per_th = temp_dso.tls.size + temp_dso.tls.align
+ sizeof(void *) * (tls_cnt+3);
n_th = libc.threads_minus_1 + 1;
if (n_th > SSIZE_MAX / per_th) alloc_size = SIZE_MAX;
@ -1033,22 +1035,25 @@ static struct dso *load_library(const char *name, struct dso *needed_by)
strcpy(p->name, pathname);
/* Add a shortname only if name arg was not an explicit pathname. */
if (pathname != name) p->shortname = strrchr(p->name, '/')+1;
if (p->tls_image) {
if (p->tls.image) {
p->tls_id = ++tls_cnt;
tls_align = MAXP2(tls_align, p->tls_align);
tls_align = MAXP2(tls_align, p->tls.align);
#ifdef TLS_ABOVE_TP
p->tls_offset = tls_offset + ( (tls_align-1) &
-(tls_offset + (uintptr_t)p->tls_image) );
tls_offset += p->tls_size;
p->tls.offset = tls_offset + ( (tls_align-1) &
-(tls_offset + (uintptr_t)p->tls.image) );
tls_offset += p->tls.size;
#else
tls_offset += p->tls_size + p->tls_align - 1;
tls_offset -= (tls_offset + (uintptr_t)p->tls_image)
& (p->tls_align-1);
p->tls_offset = tls_offset;
tls_offset += p->tls.size + p->tls.align - 1;
tls_offset -= (tls_offset + (uintptr_t)p->tls.image)
& (p->tls.align-1);
p->tls.offset = tls_offset;
#endif
p->new_dtv = (void *)(-sizeof(size_t) &
(uintptr_t)(p->name+strlen(p->name)+sizeof(size_t)));
p->new_tls = (void *)(p->new_dtv + n_th*(tls_cnt+1));
if (tls_tail) tls_tail->next = &p->tls;
else libc.tls_head = &p->tls;
tls_tail = &p->tls;
}
tail->next = p;
@ -1238,53 +1243,8 @@ static void dl_debug_state(void)
weak_alias(dl_debug_state, _dl_debug_state);
void __reset_tls()
void __init_tls(size_t *auxv)
{
pthread_t self = __pthread_self();
struct dso *p;
for (p=head; p; p=p->next) {
if (!p->tls_id || !self->dtv[p->tls_id]) continue;
memcpy(self->dtv[p->tls_id], p->tls_image, p->tls_len);
memset((char *)self->dtv[p->tls_id]+p->tls_len, 0,
p->tls_size - p->tls_len);
if (p->tls_id == (size_t)self->dtv[0]) break;
}
}
void *__copy_tls(unsigned char *mem)
{
pthread_t td;
struct dso *p;
void **dtv;
#ifdef TLS_ABOVE_TP
dtv = (void **)(mem + libc.tls_size) - (tls_cnt + 1);
mem += -((uintptr_t)mem + sizeof(struct pthread)) & (tls_align-1);
td = (pthread_t)mem;
mem += sizeof(struct pthread);
for (p=head; p; p=p->next) {
if (!p->tls_id) continue;
dtv[p->tls_id] = mem + p->tls_offset;
memcpy(dtv[p->tls_id], p->tls_image, p->tls_len);
}
#else
dtv = (void **)mem;
mem += libc.tls_size - sizeof(struct pthread);
mem -= (uintptr_t)mem & (tls_align-1);
td = (pthread_t)mem;
for (p=head; p; p=p->next) {
if (!p->tls_id) continue;
dtv[p->tls_id] = mem - p->tls_offset;
memcpy(dtv[p->tls_id], p->tls_image, p->tls_len);
}
#endif
dtv[0] = (void *)tls_cnt;
td->dtv = td->dtv_copy = dtv;
return td;
}
__attribute__((__visibility__("hidden")))
@ -1321,12 +1281,12 @@ void *__tls_get_new(size_t *v)
unsigned char *mem;
for (p=head; ; p=p->next) {
if (!p->tls_id || self->dtv[p->tls_id]) continue;
mem = p->new_tls + (p->tls_size + p->tls_align)
mem = p->new_tls + (p->tls.size + p->tls.align)
* a_fetch_add(&p->new_tls_idx,1);
mem += ((uintptr_t)p->tls_image - (uintptr_t)mem)
& (p->tls_align-1);
mem += ((uintptr_t)p->tls.image - (uintptr_t)mem)
& (p->tls.align-1);
self->dtv[p->tls_id] = mem;
memcpy(mem, p->tls_image, p->tls_len);
memcpy(mem, p->tls.image, p->tls.len);
if (p->tls_id == v[0]) break;
}
__restore_sigs(&set);
@ -1335,6 +1295,8 @@ void *__tls_get_new(size_t *v)
static void update_tls_size()
{
libc.tls_cnt = tls_cnt;
libc.tls_align = tls_align;
libc.tls_size = ALIGN(
(1+tls_cnt) * sizeof(void *) +
tls_offset +
@ -1445,6 +1407,7 @@ _Noreturn void __dls3(size_t *sp)
* use during dynamic linking. If possible it will also serve as the
* thread pointer at runtime. */
libc.tls_size = sizeof builtin_tls;
libc.tls_align = tls_align;
if (__init_tp(__copy_tls((void *)builtin_tls)) < 0) {
a_crash();
}
@ -1472,13 +1435,13 @@ _Noreturn void __dls3(size_t *sp)
interp_off = (size_t)phdr->p_vaddr;
else if (phdr->p_type == PT_TLS) {
tls_image = phdr->p_vaddr;
app.tls_len = phdr->p_filesz;
app.tls_size = phdr->p_memsz;
app.tls_align = phdr->p_align;
app.tls.len = phdr->p_filesz;
app.tls.size = phdr->p_memsz;
app.tls.align = phdr->p_align;
}
}
if (DL_FDPIC) app.loadmap = app_loadmap;
if (app.tls_size) app.tls_image = laddr(&app, tls_image);
if (app.tls.size) app.tls.image = laddr(&app, tls_image);
if (interp_off) ldso.name = laddr(&app, interp_off);
if ((aux[0] & (1UL<<AT_EXECFN))
&& strncmp((char *)aux[AT_EXECFN], "/proc/", 6))
@ -1547,19 +1510,20 @@ _Noreturn void __dls3(size_t *sp)
dprintf(1, "\t%s (%p)\n", ldso.name, ldso.base);
}
}
if (app.tls_size) {
if (app.tls.size) {
libc.tls_head = &app.tls;
app.tls_id = tls_cnt = 1;
#ifdef TLS_ABOVE_TP
app.tls_offset = 0;
tls_offset = app.tls_size
+ ( -((uintptr_t)app.tls_image + app.tls_size)
& (app.tls_align-1) );
app.tls.offset = 0;
tls_offset = app.tls.size
+ ( -((uintptr_t)app.tls.image + app.tls.size)
& (app.tls.align-1) );
#else
tls_offset = app.tls_offset = app.tls_size
+ ( -((uintptr_t)app.tls_image + app.tls_size)
& (app.tls_align-1) );
tls_offset = app.tls.offset = app.tls.size
+ ( -((uintptr_t)app.tls.image + app.tls.size)
& (app.tls.align-1) );
#endif
tls_align = MAXP2(tls_align, app.tls_align);
tls_align = MAXP2(tls_align, app.tls.align);
}
app.global = 1;
decode_dyn(&app);
@ -1668,6 +1632,7 @@ _Noreturn void __dls3(size_t *sp)
void *dlopen(const char *file, int mode)
{
struct dso *volatile p, *orig_tail, *next;
struct tls_module *orig_tls_tail;
size_t orig_tls_cnt, orig_tls_offset, orig_tls_align;
size_t i;
int cs;
@ -1680,6 +1645,7 @@ void *dlopen(const char *file, int mode)
__inhibit_ptc();
p = 0;
orig_tls_tail = tls_tail;
orig_tls_cnt = tls_cnt;
orig_tls_offset = tls_offset;
orig_tls_align = tls_align;
@ -1706,6 +1672,8 @@ void *dlopen(const char *file, int mode)
unmap_library(p);
free(p);
}
if (!orig_tls_tail) libc.tls_head = 0;
tls_tail = orig_tls_tail;
tls_cnt = orig_tls_cnt;
tls_offset = orig_tls_offset;
tls_align = orig_tls_align;
@ -1922,7 +1890,7 @@ int dl_iterate_phdr(int(*callback)(struct dl_phdr_info *info, size_t size, void
info.dlpi_adds = gencnt;
info.dlpi_subs = 0;
info.dlpi_tls_modid = current->tls_id;
info.dlpi_tls_data = current->tls_image;
info.dlpi_tls_data = current->tls.image;
ret = (callback)(&info, sizeof (info), data);

Loading…
Cancel
Save