Skip to content

Commit

Permalink
Refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
rui314 committed Oct 17, 2022
1 parent 8fb6fb2 commit 2fb054d
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 56 deletions.
6 changes: 3 additions & 3 deletions elf/arch-ppc64v1.cc
Original file line number Diff line number Diff line change
Expand Up @@ -250,13 +250,13 @@ void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
*(ub16 *)loc = ctx.got->get_tlsld_addr(ctx) - ctx.TOC->value;
break;
case R_PPC64_DTPREL16_HA:
*(ub16 *)loc = ha(S + A - ctx.tls_begin - E::tls_dtv_offset);
*(ub16 *)loc = ha(S + A - ctx.tls_begin - E::tls_dtp_offset);
break;
case R_PPC64_TPREL16_HA:
*(ub16 *)loc = ha(S + A - ctx.tp_addr);
break;
case R_PPC64_DTPREL16_LO:
*(ub16 *)loc = S + A - ctx.tls_begin - E::tls_dtv_offset;
*(ub16 *)loc = S + A - ctx.tls_begin - E::tls_dtp_offset;
break;
case R_PPC64_TPREL16_LO:
*(ub16 *)loc = S + A - ctx.tp_addr;
Expand Down Expand Up @@ -327,7 +327,7 @@ void InputSection<E>::apply_reloc_nonalloc(Context<E> &ctx, u8 *base) {
break;
}
case R_PPC64_DTPREL64:
*(ub64 *)loc = S + A - ctx.tls_begin - E::tls_dtv_offset;
*(ub64 *)loc = S + A - ctx.tls_begin - E::tls_dtp_offset;
break;
default:
Fatal(ctx) << *this << ": apply_reloc_nonalloc: " << rel;
Expand Down
6 changes: 3 additions & 3 deletions elf/arch-ppc64v2.cc
Original file line number Diff line number Diff line change
Expand Up @@ -261,13 +261,13 @@ void InputSection<E>::apply_reloc_alloc(Context<E> &ctx, u8 *base) {
*(ul16 *)loc = ctx.got->get_tlsld_addr(ctx) - ctx.TOC->value;
break;
case R_PPC64_DTPREL16_HA:
*(ul16 *)loc = ha(S + A - ctx.tls_begin - E::tls_dtv_offset);
*(ul16 *)loc = ha(S + A - ctx.tls_begin - E::tls_dtp_offset);
break;
case R_PPC64_TPREL16_HA:
*(ul16 *)loc = ha(S + A - ctx.tp_addr);
break;
case R_PPC64_DTPREL16_LO:
*(ul16 *)loc = S + A - ctx.tls_begin - E::tls_dtv_offset;
*(ul16 *)loc = S + A - ctx.tls_begin - E::tls_dtp_offset;
break;
case R_PPC64_TPREL16_LO:
*(ul16 *)loc = S + A - ctx.tp_addr;
Expand Down Expand Up @@ -338,7 +338,7 @@ void InputSection<E>::apply_reloc_nonalloc(Context<E> &ctx, u8 *base) {
break;
}
case R_PPC64_DTPREL64:
*(ul64 *)loc = S + A - ctx.tls_begin - E::tls_dtv_offset;
*(ul64 *)loc = S + A - ctx.tls_begin - E::tls_dtp_offset;
break;
default:
Fatal(ctx) << *this << ": apply_reloc_nonalloc: " << rel;
Expand Down
24 changes: 12 additions & 12 deletions elf/elf.h
Original file line number Diff line number Diff line change
Expand Up @@ -2361,7 +2361,7 @@ struct X86_64 {
static constexpr u32 plt_hdr_size = 32;
static constexpr u32 plt_size = 16;
static constexpr u32 pltgot_size = 16;
static constexpr u32 tls_dtv_offset = 0;
static constexpr u32 tls_dtp_offset = 0;
};

template <> struct ElfSym<X86_64> : EL64Sym {};
Expand Down Expand Up @@ -2397,7 +2397,7 @@ struct I386 {
static constexpr u32 plt_hdr_size = 16;
static constexpr u32 plt_size = 16;
static constexpr u32 pltgot_size = 16;
static constexpr u32 tls_dtv_offset = 0;
static constexpr u32 tls_dtp_offset = 0;
};

template <> struct ElfSym<I386> : EL32Sym {};
Expand Down Expand Up @@ -2433,7 +2433,7 @@ struct ARM64 {
static constexpr u32 plt_hdr_size = 32;
static constexpr u32 plt_size = 16;
static constexpr u32 pltgot_size = 16;
static constexpr u32 tls_dtv_offset = 0;
static constexpr u32 tls_dtp_offset = 0;
static constexpr u32 thunk_hdr_size = 0;
static constexpr u32 thunk_size = 12;
};
Expand Down Expand Up @@ -2471,7 +2471,7 @@ struct ARM32 {
static constexpr u32 plt_hdr_size = 32;
static constexpr u32 plt_size = 16;
static constexpr u32 pltgot_size = 16;
static constexpr u32 tls_dtv_offset = 0;
static constexpr u32 tls_dtp_offset = 0;
static constexpr u32 thunk_hdr_size = 12;
static constexpr u32 thunk_size = 20;
};
Expand Down Expand Up @@ -2530,7 +2530,7 @@ struct RV64LE {
// values are usually computed and used only by runtime. But when we do
// compute DTPOFF for statically-linked executable, we need to offset
// the bias by subtracting 0x800.
static constexpr u32 tls_dtv_offset = 0x800;
static constexpr u32 tls_dtp_offset = 0x800;
};

template <> struct ElfSym<RV64LE> : EL64Sym {};
Expand Down Expand Up @@ -2565,7 +2565,7 @@ struct RV64BE {
static constexpr u32 plt_hdr_size = 32;
static constexpr u32 plt_size = 16;
static constexpr u32 pltgot_size = 16;
static constexpr u32 tls_dtv_offset = 0x800;
static constexpr u32 tls_dtp_offset = 0x800;
};

template <> struct ElfSym<RV64BE> : EB64Sym {};
Expand Down Expand Up @@ -2600,7 +2600,7 @@ struct RV32LE {
static constexpr u32 plt_hdr_size = 32;
static constexpr u32 plt_size = 16;
static constexpr u32 pltgot_size = 16;
static constexpr u32 tls_dtv_offset = 0x800;
static constexpr u32 tls_dtp_offset = 0x800;
};

template <> struct ElfSym<RV32LE> : EL32Sym {};
Expand Down Expand Up @@ -2635,7 +2635,7 @@ struct RV32BE {
static constexpr u32 plt_hdr_size = 32;
static constexpr u32 plt_size = 16;
static constexpr u32 pltgot_size = 16;
static constexpr u32 tls_dtv_offset = 0x800;
static constexpr u32 tls_dtp_offset = 0x800;
};

template <> struct ElfSym<RV32BE> : EB32Sym {};
Expand Down Expand Up @@ -2670,7 +2670,7 @@ struct PPC64V1 {
static constexpr u32 plt_hdr_size = 52;
static constexpr u32 plt_size = 8;
static constexpr u32 pltgot_size = 0;
static constexpr u32 tls_dtv_offset = 0x8000;
static constexpr u32 tls_dtp_offset = 0x8000;
static constexpr u32 thunk_hdr_size = 0;
static constexpr u32 thunk_size = 28;
};
Expand Down Expand Up @@ -2707,7 +2707,7 @@ struct PPC64V2 {
static constexpr u32 plt_hdr_size = 60;
static constexpr u32 plt_size = 4;
static constexpr u32 pltgot_size = 0;
static constexpr u32 tls_dtv_offset = 0x8000;
static constexpr u32 tls_dtp_offset = 0x8000;
static constexpr u32 thunk_hdr_size = 0;
static constexpr u32 thunk_size = 20;
};
Expand Down Expand Up @@ -2744,7 +2744,7 @@ struct S390X {
static constexpr u32 plt_hdr_size = 32;
static constexpr u32 plt_size = 32;
static constexpr u32 pltgot_size = 16;
static constexpr u32 tls_dtv_offset = 0;
static constexpr u32 tls_dtp_offset = 0;
};

template <> struct ElfSym<S390X> : EB64Sym {};
Expand Down Expand Up @@ -2779,7 +2779,7 @@ struct SPARC64 {
static constexpr u32 plt_hdr_size = 128;
static constexpr u32 plt_size = 32;
static constexpr u32 pltgot_size = 32;
static constexpr u32 tls_dtv_offset = 0;
static constexpr u32 tls_dtp_offset = 0;
};

template <> struct ElfSym<SPARC64> : EB64Sym {};
Expand Down
83 changes: 45 additions & 38 deletions elf/output-chunks.cc
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,48 @@ bool is_relro(Context<E> &ctx, Chunk<E> *chunk) {
return false;
}

// Some types of TLS relocations are defined relative to the TLS
// segment, so save its addresses for easy access.
template <typename E>
static void init_thread_pointers(Context<E> &ctx, ElfPhdr<E> phdr) {
assert(phdr.p_type == PT_TLS);
ctx.tls_begin = phdr.p_vaddr;

// Each thread has its own value in TP (thread pointer) register.
// Thread-local variables (TLVs) defined in the main executable are
// accessed relative to TP.
//
// On x86, SPARC and s390x, TP (%gs on i386, %fs on x86-64, %g7 on SPARC
// and %a0/%a1 on s390x) refers past the end of all TLVs for historical
// reasons. TLVs are accessed with negative offsets from TP.
//
// On ARM, the runtime appends two words at the beginning of TLV
// template image when copying TLVs to per-thread area, so we need
// to offset it.
//
// On PPC64, TP is 0x7000 (28 KiB) past the beginning of the TLV block
// to maximize the addressable range for load/store instructions with
// 16-bits signed immediates. It's not exactly 0x8000 (32 KiB) off
// because there's a small implementation-defined piece of data before
// the TLV block, and the runtime wants to access them efficiently
// too.
//
// RISC-V just uses the beginning of the TLV block as TP. RISC-V
// load/store instructions usually take 12-bits signed immediates,
// so the beginning of TLV ± 2 KiB is accessible with a single
// load/store instruction.
if constexpr (is_x86<E> || is_sparc<E> || is_s390x<E>) {
ctx.tp_addr = align_to(phdr.p_vaddr + phdr.p_memsz, phdr.p_align);
} else if constexpr (is_arm<E>) {
ctx.tp_addr = align_down(phdr.p_vaddr - sizeof(Word<E>) * 2, phdr.p_align);
} else if constexpr (is_ppc<E>) {
ctx.tp_addr = phdr.p_vaddr + 0x7000;
} else {
static_assert(is_riscv<E>);
ctx.tp_addr = phdr.p_vaddr;
}
}

template <typename E>
static std::vector<ElfPhdr<E>> create_phdr(Context<E> &ctx) {
std::vector<ElfPhdr<E>> vec;
Expand Down Expand Up @@ -268,43 +310,8 @@ static std::vector<ElfPhdr<E>> create_phdr(Context<E> &ctx) {
while (i < ctx.chunks.size() && (ctx.chunks[i]->shdr.sh_flags & SHF_TLS))
append(ctx.chunks[i++]);

// Some types of TLS relocations are defined relative to the TLS
// segment, so save its addresses for easy access.
ElfPhdr<E> &phdr = vec.back();
ctx.tls_begin = phdr.p_vaddr;

// Each thread has its own value in TP (thread pointer) register, and
// TLVs defined in the main executable are accessed relative to TP.
//
// On x86, SPARC and S390X, TP (%gs on i386, %fs on x86-64, %g7 on SPARC
// and %a0/%a1 on S390X) refers past the end of all TLVs for historical
// reasons. TLVs are accessed with negative offsets from TP.
//
// On ARM, the runtime appends two words at the beginning of TLV
// template image when copying TLVs to per-thread area, so we need
// to offset it.
//
// On PPC64, TP is 0x7000 (28 KiB) past the beginning of the TLV block
// to maximize the addressable range for load/store instructions with
// 16-bits signed immediates. It's not exactly 0x8000 (32 KiB) off
// because there's a small implementation-defined piece of data before
// the TLV block, and the runtime wants to access them efficiently
// too.
//
// RISC-V just uses the beginning of the TLV block as TP. RISC-V
// load/store instructions usually take 12-bits signed immediates,
// so the beginning of TLV ± 2 KiB is accessible with a single
// load/store instruction.
if constexpr (is_x86<E> || is_sparc<E> || is_s390x<E>) {
ctx.tp_addr = align_to(phdr.p_vaddr + phdr.p_memsz, phdr.p_align);
} else if constexpr (is_arm<E>) {
ctx.tp_addr = align_down(ctx.tls_begin - sizeof(Word<E>) * 2, phdr.p_align);
} else if constexpr (is_ppc<E>) {
ctx.tp_addr = ctx.tls_begin + 0x7000;
} else {
static_assert(is_riscv<E>);
ctx.tp_addr = ctx.tls_begin;
}
// Initialize ctx.tls_begin and ctx.tp_addr
init_thread_pointers(ctx, vec.back());
}

// Add PT_DYNAMIC
Expand Down Expand Up @@ -1233,7 +1240,7 @@ std::vector<GotEntry<E>> GotSection<E>::get_entries(Context<E> &ctx) const {
if (ctx.arg.is_static) {
entries.push_back({idx, 1}); // One indicates the main executable file
entries.push_back({idx + 1,
sym->get_addr(ctx) - ctx.tls_begin - E::tls_dtv_offset});
sym->get_addr(ctx) - ctx.tls_begin - E::tls_dtp_offset});
} else {
entries.push_back({idx, 0, E::R_DTPMOD, sym});
entries.push_back({idx + 1, 0, E::R_DTPOFF, sym});
Expand Down
24 changes: 24 additions & 0 deletions test/elf/tls-irregular-start-addr.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#!/bin/bash
. $(dirname $0)/common.inc

cat <<EOF | $CC -fPIC -c -o $t/a.o -xc -
#include <stdio.h>
_Thread_local char x1 = 42;
int main() {
printf("%d\n", x1);
}
EOF

$CC -B. -o $t/exe1 $t/a.o -pie -Wl,-section-start=.tdata=0x100001 -Wl,-relax
$QEMU $t/exe1 | grep -q '^42$'

$CC -B. -o $t/exe1 $t/a.o -pie -Wl,-section-start=.tdata=0x100001 -Wl,-no-relax
$QEMU $t/exe2 | grep -q '^42$'

$CC -B. -o $t/exe3 $t/a.o -pie -Wl,-section-start=.tdata=0x10000f -Wl,-relax
$QEMU $t/exe3 | grep -q '^42$'

$CC -B. -o $t/exe4 $t/a.o -pie -Wl,-section-start=.tdata=0x10000f -Wl,-no-relax
$QEMU $t/exe4 | grep -q '^42$'

0 comments on commit 2fb054d

Please sign in to comment.