From df18f2951eab8a4ca5f53a8c5582cdb5cbe501ab Mon Sep 17 00:00:00 2001 From: Alexander Monakov Date: Sun, 5 Jan 2020 18:47:02 +0300 Subject: [PATCH 001/189] math: move x86_64 fabs, fabsf to C with inline asm --- src/math/x86_64/fabs.c | 10 ++++++++++ src/math/x86_64/fabs.s | 9 --------- src/math/x86_64/fabsf.c | 10 ++++++++++ src/math/x86_64/fabsf.s | 7 ------- 4 files changed, 20 insertions(+), 16 deletions(-) create mode 100644 src/math/x86_64/fabs.c delete mode 100644 src/math/x86_64/fabs.s create mode 100644 src/math/x86_64/fabsf.c delete mode 100644 src/math/x86_64/fabsf.s diff --git a/src/math/x86_64/fabs.c b/src/math/x86_64/fabs.c new file mode 100644 index 000000000..165624777 --- /dev/null +++ b/src/math/x86_64/fabs.c @@ -0,0 +1,10 @@ +#include + +double fabs(double x) +{ + double t; + __asm__ ("pcmpeqd %0, %0" : "=x"(t)); // t = ~0 + __asm__ ("psrlq $1, %0" : "+x"(t)); // t >>= 1 + __asm__ ("andps %1, %0" : "+x"(x) : "x"(t)); // x &= t + return x; +} diff --git a/src/math/x86_64/fabs.s b/src/math/x86_64/fabs.s deleted file mode 100644 index 5715005e3..000000000 --- a/src/math/x86_64/fabs.s +++ /dev/null @@ -1,9 +0,0 @@ -.global fabs -.type fabs,@function -fabs: - xor %eax,%eax - dec %rax - shr %rax - movq %rax,%xmm1 - andpd %xmm1,%xmm0 - ret diff --git a/src/math/x86_64/fabsf.c b/src/math/x86_64/fabsf.c new file mode 100644 index 000000000..36ea7481f --- /dev/null +++ b/src/math/x86_64/fabsf.c @@ -0,0 +1,10 @@ +#include + +float fabsf(float x) +{ + float t; + __asm__ ("pcmpeqd %0, %0" : "=x"(t)); // t = ~0 + __asm__ ("psrld $1, %0" : "+x"(t)); // t >>= 1 + __asm__ ("andps %1, %0" : "+x"(x) : "x"(t)); // x &= t + return x; +} diff --git a/src/math/x86_64/fabsf.s b/src/math/x86_64/fabsf.s deleted file mode 100644 index 501a1f175..000000000 --- a/src/math/x86_64/fabsf.s +++ /dev/null @@ -1,7 +0,0 @@ -.global fabsf -.type fabsf,@function -fabsf: - mov $0x7fffffff,%eax - movq %rax,%xmm1 - andps %xmm1,%xmm0 - ret From 23a90460a0909259abdf6b5d534f98646348813b Mon Sep 17 00:00:00 2001 From: Alexander Monakov Date: Mon, 6 Jan 2020 11:36:18 +0300 Subject: [PATCH 002/189] math: move x87-family fabs functions to C with inline asm --- src/math/i386/fabs.c | 7 +++++++ src/math/i386/fabs.s | 6 ------ src/math/i386/fabsf.c | 7 +++++++ src/math/i386/fabsf.s | 6 ------ src/math/i386/fabsl.c | 7 +++++++ src/math/i386/fabsl.s | 6 ------ src/math/x86_64/fabsl.c | 7 +++++++ src/math/x86_64/fabsl.s | 6 ------ 8 files changed, 28 insertions(+), 24 deletions(-) create mode 100644 src/math/i386/fabs.c delete mode 100644 src/math/i386/fabs.s create mode 100644 src/math/i386/fabsf.c delete mode 100644 src/math/i386/fabsf.s create mode 100644 src/math/i386/fabsl.c delete mode 100644 src/math/i386/fabsl.s create mode 100644 src/math/x86_64/fabsl.c delete mode 100644 src/math/x86_64/fabsl.s diff --git a/src/math/i386/fabs.c b/src/math/i386/fabs.c new file mode 100644 index 000000000..396727863 --- /dev/null +++ b/src/math/i386/fabs.c @@ -0,0 +1,7 @@ +#include + +double fabs(double x) +{ + __asm__ ("fabs" : "+t"(x)); + return x; +} diff --git a/src/math/i386/fabs.s b/src/math/i386/fabs.s deleted file mode 100644 index d66ea9a19..000000000 --- a/src/math/i386/fabs.s +++ /dev/null @@ -1,6 +0,0 @@ -.global fabs -.type fabs,@function -fabs: - fldl 4(%esp) - fabs - ret diff --git a/src/math/i386/fabsf.c b/src/math/i386/fabsf.c new file mode 100644 index 000000000..d882eee34 --- /dev/null +++ b/src/math/i386/fabsf.c @@ -0,0 +1,7 @@ +#include + +float fabsf(float x) +{ + __asm__ ("fabs" : "+t"(x)); + return x; +} diff --git a/src/math/i386/fabsf.s b/src/math/i386/fabsf.s deleted file mode 100644 index a981c4222..000000000 --- a/src/math/i386/fabsf.s +++ /dev/null @@ -1,6 +0,0 @@ -.global fabsf -.type fabsf,@function -fabsf: - flds 4(%esp) - fabs - ret diff --git a/src/math/i386/fabsl.c b/src/math/i386/fabsl.c new file mode 100644 index 000000000..cc1c9ed9c --- /dev/null +++ b/src/math/i386/fabsl.c @@ -0,0 +1,7 @@ +#include + +long double fabsl(long double x) +{ + __asm__ ("fabs" : "+t"(x)); + return x; +} diff --git a/src/math/i386/fabsl.s b/src/math/i386/fabsl.s deleted file mode 100644 index ceef9e4cc..000000000 --- a/src/math/i386/fabsl.s +++ /dev/null @@ -1,6 +0,0 @@ -.global fabsl -.type fabsl,@function -fabsl: - fldt 4(%esp) - fabs - ret diff --git a/src/math/x86_64/fabsl.c b/src/math/x86_64/fabsl.c new file mode 100644 index 000000000..cc1c9ed9c --- /dev/null +++ b/src/math/x86_64/fabsl.c @@ -0,0 +1,7 @@ +#include + +long double fabsl(long double x) +{ + __asm__ ("fabs" : "+t"(x)); + return x; +} diff --git a/src/math/x86_64/fabsl.s b/src/math/x86_64/fabsl.s deleted file mode 100644 index 4e7ab525e..000000000 --- a/src/math/x86_64/fabsl.s +++ /dev/null @@ -1,6 +0,0 @@ -.global fabsl -.type fabsl,@function -fabsl: - fldt 8(%rsp) - fabs - ret From 37669d42e94d7c8754e40922da83659f13e2f014 Mon Sep 17 00:00:00 2001 From: Alexander Monakov Date: Mon, 6 Jan 2020 19:35:57 +0300 Subject: [PATCH 003/189] math: move trivial x86-family sqrt functions to C with inline asm --- src/math/i386/sqrtl.c | 7 +++++++ src/math/i386/sqrtl.s | 5 ----- src/math/x86_64/sqrt.c | 7 +++++++ src/math/x86_64/sqrt.s | 4 ---- src/math/x86_64/sqrtf.c | 7 +++++++ src/math/x86_64/sqrtf.s | 4 ---- src/math/x86_64/sqrtl.c | 7 +++++++ src/math/x86_64/sqrtl.s | 5 ----- 8 files changed, 28 insertions(+), 18 deletions(-) create mode 100644 src/math/i386/sqrtl.c delete mode 100644 src/math/i386/sqrtl.s create mode 100644 src/math/x86_64/sqrt.c delete mode 100644 src/math/x86_64/sqrt.s create mode 100644 src/math/x86_64/sqrtf.c delete mode 100644 src/math/x86_64/sqrtf.s create mode 100644 src/math/x86_64/sqrtl.c delete mode 100644 src/math/x86_64/sqrtl.s diff --git a/src/math/i386/sqrtl.c b/src/math/i386/sqrtl.c new file mode 100644 index 000000000..864cfcc4f --- /dev/null +++ b/src/math/i386/sqrtl.c @@ -0,0 +1,7 @@ +#include + +long double sqrtl(long double x) +{ + __asm__ ("fsqrt" : "+t"(x)); + return x; +} diff --git a/src/math/i386/sqrtl.s b/src/math/i386/sqrtl.s deleted file mode 100644 index e0d426168..000000000 --- a/src/math/i386/sqrtl.s +++ /dev/null @@ -1,5 +0,0 @@ -.global sqrtl -.type sqrtl,@function -sqrtl: fldt 4(%esp) - fsqrt - ret diff --git a/src/math/x86_64/sqrt.c b/src/math/x86_64/sqrt.c new file mode 100644 index 000000000..657e09e3b --- /dev/null +++ b/src/math/x86_64/sqrt.c @@ -0,0 +1,7 @@ +#include + +double sqrt(double x) +{ + __asm__ ("sqrtsd %1, %0" : "=x"(x) : "x"(x)); + return x; +} diff --git a/src/math/x86_64/sqrt.s b/src/math/x86_64/sqrt.s deleted file mode 100644 index d3c609f9f..000000000 --- a/src/math/x86_64/sqrt.s +++ /dev/null @@ -1,4 +0,0 @@ -.global sqrt -.type sqrt,@function -sqrt: sqrtsd %xmm0, %xmm0 - ret diff --git a/src/math/x86_64/sqrtf.c b/src/math/x86_64/sqrtf.c new file mode 100644 index 000000000..720baec60 --- /dev/null +++ b/src/math/x86_64/sqrtf.c @@ -0,0 +1,7 @@ +#include + +float sqrtf(float x) +{ + __asm__ ("sqrtss %1, %0" : "=x"(x) : "x"(x)); + return x; +} diff --git a/src/math/x86_64/sqrtf.s b/src/math/x86_64/sqrtf.s deleted file mode 100644 index eec48c609..000000000 --- a/src/math/x86_64/sqrtf.s +++ /dev/null @@ -1,4 +0,0 @@ -.global sqrtf -.type sqrtf,@function -sqrtf: sqrtss %xmm0, %xmm0 - ret diff --git a/src/math/x86_64/sqrtl.c b/src/math/x86_64/sqrtl.c new file mode 100644 index 000000000..864cfcc4f --- /dev/null +++ b/src/math/x86_64/sqrtl.c @@ -0,0 +1,7 @@ +#include + +long double sqrtl(long double x) +{ + __asm__ ("fsqrt" : "+t"(x)); + return x; +} diff --git a/src/math/x86_64/sqrtl.s b/src/math/x86_64/sqrtl.s deleted file mode 100644 index 23cd687d0..000000000 --- a/src/math/x86_64/sqrtl.s +++ /dev/null @@ -1,5 +0,0 @@ -.global sqrtl -.type sqrtl,@function -sqrtl: fldt 8(%rsp) - fsqrt - ret From 223a309458e5fd0cabe8e1d883d4653a5930f33e Mon Sep 17 00:00:00 2001 From: Alexander Monakov Date: Mon, 6 Jan 2020 20:31:47 +0300 Subject: [PATCH 004/189] math: move i386 sqrtf to C with inline asm --- src/math/i386/sqrtf.c | 12 ++++++++++++ src/math/i386/sqrtf.s | 7 ------- 2 files changed, 12 insertions(+), 7 deletions(-) create mode 100644 src/math/i386/sqrtf.c delete mode 100644 src/math/i386/sqrtf.s diff --git a/src/math/i386/sqrtf.c b/src/math/i386/sqrtf.c new file mode 100644 index 000000000..41c65c2bd --- /dev/null +++ b/src/math/i386/sqrtf.c @@ -0,0 +1,12 @@ +#include + +float sqrtf(float x) +{ + long double t; + /* The long double result has sufficient precision so that + * second rounding to float still keeps the returned value + * correctly rounded, see Pierre Roux, "Innocuous Double + * Rounding of Basic Arithmetic Operations". */ + __asm__ ("fsqrt" : "=t"(t) : "0"(x)); + return (float)t; +} diff --git a/src/math/i386/sqrtf.s b/src/math/i386/sqrtf.s deleted file mode 100644 index 9e944f456..000000000 --- a/src/math/i386/sqrtf.s +++ /dev/null @@ -1,7 +0,0 @@ -.global sqrtf -.type sqrtf,@function -sqrtf: flds 4(%esp) - fsqrt - fstps 4(%esp) - flds 4(%esp) - ret From a9bb6b2880d44adbe2a5b0784cc0a5472d2c4b6f Mon Sep 17 00:00:00 2001 From: Alexander Monakov Date: Tue, 7 Jan 2020 15:53:03 +0300 Subject: [PATCH 005/189] math: move i386 sqrt to C with inline asm --- src/math/i386/sqrt.c | 15 +++++++++++++++ src/math/i386/sqrt.s | 21 --------------------- 2 files changed, 15 insertions(+), 21 deletions(-) create mode 100644 src/math/i386/sqrt.c delete mode 100644 src/math/i386/sqrt.s diff --git a/src/math/i386/sqrt.c b/src/math/i386/sqrt.c new file mode 100644 index 000000000..934fbccab --- /dev/null +++ b/src/math/i386/sqrt.c @@ -0,0 +1,15 @@ +#include "libm.h" + +double sqrt(double x) +{ + union ldshape ux; + unsigned fpsr; + __asm__ ("fsqrt; fnstsw %%ax": "=t"(ux.f), "=a"(fpsr) : "0"(x)); + if ((ux.i.m & 0x7ff) != 0x400) + return (double)ux.f; + /* Rounding to double would have encountered an exact halfway case. + Adjust mantissa downwards if fsqrt rounded up, else upwards. + (result of fsqrt could not have been exact) */ + ux.i.m ^= (fpsr & 0x200) + 0x300; + return (double)ux.f; +} diff --git a/src/math/i386/sqrt.s b/src/math/i386/sqrt.s deleted file mode 100644 index 57837e256..000000000 --- a/src/math/i386/sqrt.s +++ /dev/null @@ -1,21 +0,0 @@ -.global sqrt -.type sqrt,@function -sqrt: fldl 4(%esp) - fsqrt - fnstsw %ax - sub $12,%esp - fld %st(0) - fstpt (%esp) - mov (%esp),%ecx - and $0x7ff,%ecx - cmp $0x400,%ecx - jnz 1f - and $0x200,%eax - sub $0x100,%eax - sub %eax,(%esp) - fstp %st(0) - fldt (%esp) -1: add $12,%esp - fstpl 4(%esp) - fldl 4(%esp) - ret From 2d829112e1c65cfe2e7a6099fd9a2b669161aff1 Mon Sep 17 00:00:00 2001 From: Alexander Monakov Date: Fri, 10 Jan 2020 23:06:36 +0300 Subject: [PATCH 006/189] math: move x86_64 (l)lrint(f) functions to C with inline asm --- src/math/x86_64/llrint.c | 8 ++++++++ src/math/x86_64/llrint.s | 5 ----- src/math/x86_64/llrintf.c | 8 ++++++++ src/math/x86_64/llrintf.s | 5 ----- src/math/x86_64/lrint.c | 8 ++++++++ src/math/x86_64/lrint.s | 5 ----- src/math/x86_64/lrintf.c | 8 ++++++++ src/math/x86_64/lrintf.s | 5 ----- 8 files changed, 32 insertions(+), 20 deletions(-) create mode 100644 src/math/x86_64/llrint.c delete mode 100644 src/math/x86_64/llrint.s create mode 100644 src/math/x86_64/llrintf.c delete mode 100644 src/math/x86_64/llrintf.s create mode 100644 src/math/x86_64/lrint.c delete mode 100644 src/math/x86_64/lrint.s create mode 100644 src/math/x86_64/lrintf.c delete mode 100644 src/math/x86_64/lrintf.s diff --git a/src/math/x86_64/llrint.c b/src/math/x86_64/llrint.c new file mode 100644 index 000000000..dd38a7223 --- /dev/null +++ b/src/math/x86_64/llrint.c @@ -0,0 +1,8 @@ +#include + +long long llrint(double x) +{ + long long r; + __asm__ ("cvtsd2si %1, %0" : "=r"(r) : "x"(x)); + return r; +} diff --git a/src/math/x86_64/llrint.s b/src/math/x86_64/llrint.s deleted file mode 100644 index bf4764983..000000000 --- a/src/math/x86_64/llrint.s +++ /dev/null @@ -1,5 +0,0 @@ -.global llrint -.type llrint,@function -llrint: - cvtsd2si %xmm0,%rax - ret diff --git a/src/math/x86_64/llrintf.c b/src/math/x86_64/llrintf.c new file mode 100644 index 000000000..fc8625e88 --- /dev/null +++ b/src/math/x86_64/llrintf.c @@ -0,0 +1,8 @@ +#include + +long long llrintf(float x) +{ + long long r; + __asm__ ("cvtss2si %1, %0" : "=r"(r) : "x"(x)); + return r; +} diff --git a/src/math/x86_64/llrintf.s b/src/math/x86_64/llrintf.s deleted file mode 100644 index d7204ac0c..000000000 --- a/src/math/x86_64/llrintf.s +++ /dev/null @@ -1,5 +0,0 @@ -.global llrintf -.type llrintf,@function -llrintf: - cvtss2si %xmm0,%rax - ret diff --git a/src/math/x86_64/lrint.c b/src/math/x86_64/lrint.c new file mode 100644 index 000000000..a742fec64 --- /dev/null +++ b/src/math/x86_64/lrint.c @@ -0,0 +1,8 @@ +#include + +long lrint(double x) +{ + long r; + __asm__ ("cvtsd2si %1, %0" : "=r"(r) : "x"(x)); + return r; +} diff --git a/src/math/x86_64/lrint.s b/src/math/x86_64/lrint.s deleted file mode 100644 index 15fc2454b..000000000 --- a/src/math/x86_64/lrint.s +++ /dev/null @@ -1,5 +0,0 @@ -.global lrint -.type lrint,@function -lrint: - cvtsd2si %xmm0,%rax - ret diff --git a/src/math/x86_64/lrintf.c b/src/math/x86_64/lrintf.c new file mode 100644 index 000000000..2ba5639dc --- /dev/null +++ b/src/math/x86_64/lrintf.c @@ -0,0 +1,8 @@ +#include + +long lrintf(float x) +{ + long r; + __asm__ ("cvtss2si %1, %0" : "=r"(r) : "x"(x)); + return r; +} diff --git a/src/math/x86_64/lrintf.s b/src/math/x86_64/lrintf.s deleted file mode 100644 index 488423d21..000000000 --- a/src/math/x86_64/lrintf.s +++ /dev/null @@ -1,5 +0,0 @@ -.global lrintf -.type lrintf,@function -lrintf: - cvtss2si %xmm0,%rax - ret From 15775ea679ebb93b1ec0c394f6b0293ad694bb10 Mon Sep 17 00:00:00 2001 From: Alexander Monakov Date: Sat, 11 Jan 2020 18:14:24 +0300 Subject: [PATCH 007/189] math: move x87-family lrint functions to C with inline asm --- src/math/i386/llrint.c | 8 ++++++++ src/math/i386/llrint.s | 8 -------- src/math/i386/llrintf.c | 8 ++++++++ src/math/i386/llrintf.s | 9 --------- src/math/i386/llrintl.c | 8 ++++++++ src/math/i386/llrintl.s | 8 -------- src/math/i386/lrint.c | 8 ++++++++ src/math/i386/lrint.s | 7 ------- src/math/i386/lrintf.c | 8 ++++++++ src/math/i386/lrintf.s | 7 ------- src/math/i386/lrintl.c | 8 ++++++++ src/math/i386/lrintl.s | 7 ------- src/math/x86_64/llrintl.c | 8 ++++++++ src/math/x86_64/llrintl.s | 7 ------- src/math/x86_64/lrintl.c | 8 ++++++++ src/math/x86_64/lrintl.s | 7 ------- 16 files changed, 64 insertions(+), 60 deletions(-) create mode 100644 src/math/i386/llrint.c delete mode 100644 src/math/i386/llrint.s create mode 100644 src/math/i386/llrintf.c delete mode 100644 src/math/i386/llrintf.s create mode 100644 src/math/i386/llrintl.c delete mode 100644 src/math/i386/llrintl.s create mode 100644 src/math/i386/lrint.c delete mode 100644 src/math/i386/lrint.s create mode 100644 src/math/i386/lrintf.c delete mode 100644 src/math/i386/lrintf.s create mode 100644 src/math/i386/lrintl.c delete mode 100644 src/math/i386/lrintl.s create mode 100644 src/math/x86_64/llrintl.c delete mode 100644 src/math/x86_64/llrintl.s create mode 100644 src/math/x86_64/lrintl.c delete mode 100644 src/math/x86_64/lrintl.s diff --git a/src/math/i386/llrint.c b/src/math/i386/llrint.c new file mode 100644 index 000000000..aa4008171 --- /dev/null +++ b/src/math/i386/llrint.c @@ -0,0 +1,8 @@ +#include + +long long llrint(double x) +{ + long long r; + __asm__ ("fistpll %0" : "=m"(r) : "t"(x) : "st"); + return r; +} diff --git a/src/math/i386/llrint.s b/src/math/i386/llrint.s deleted file mode 100644 index 8e89cd913..000000000 --- a/src/math/i386/llrint.s +++ /dev/null @@ -1,8 +0,0 @@ -.global llrint -.type llrint,@function -llrint: - fldl 4(%esp) - fistpll 4(%esp) - mov 4(%esp),%eax - mov 8(%esp),%edx - ret diff --git a/src/math/i386/llrintf.c b/src/math/i386/llrintf.c new file mode 100644 index 000000000..c41a317bd --- /dev/null +++ b/src/math/i386/llrintf.c @@ -0,0 +1,8 @@ +#include + +long long llrintf(float x) +{ + long long r; + __asm__ ("fistpll %0" : "=m"(r) : "t"(x) : "st"); + return r; +} diff --git a/src/math/i386/llrintf.s b/src/math/i386/llrintf.s deleted file mode 100644 index aa850c6cd..000000000 --- a/src/math/i386/llrintf.s +++ /dev/null @@ -1,9 +0,0 @@ -.global llrintf -.type llrintf,@function -llrintf: - sub $8,%esp - flds 12(%esp) - fistpll (%esp) - pop %eax - pop %edx - ret diff --git a/src/math/i386/llrintl.c b/src/math/i386/llrintl.c new file mode 100644 index 000000000..c439ef28d --- /dev/null +++ b/src/math/i386/llrintl.c @@ -0,0 +1,8 @@ +#include + +long long llrintl(long double x) +{ + long long r; + __asm__ ("fistpll %0" : "=m"(r) : "t"(x) : "st"); + return r; +} diff --git a/src/math/i386/llrintl.s b/src/math/i386/llrintl.s deleted file mode 100644 index 1cfb56f15..000000000 --- a/src/math/i386/llrintl.s +++ /dev/null @@ -1,8 +0,0 @@ -.global llrintl -.type llrintl,@function -llrintl: - fldt 4(%esp) - fistpll 4(%esp) - mov 4(%esp),%eax - mov 8(%esp),%edx - ret diff --git a/src/math/i386/lrint.c b/src/math/i386/lrint.c new file mode 100644 index 000000000..89563ab26 --- /dev/null +++ b/src/math/i386/lrint.c @@ -0,0 +1,8 @@ +#include + +long lrint(double x) +{ + long r; + __asm__ ("fistpl %0" : "=m"(r) : "t"(x) : "st"); + return r; +} diff --git a/src/math/i386/lrint.s b/src/math/i386/lrint.s deleted file mode 100644 index 02b83d9ff..000000000 --- a/src/math/i386/lrint.s +++ /dev/null @@ -1,7 +0,0 @@ -.global lrint -.type lrint,@function -lrint: - fldl 4(%esp) - fistpl 4(%esp) - mov 4(%esp),%eax - ret diff --git a/src/math/i386/lrintf.c b/src/math/i386/lrintf.c new file mode 100644 index 000000000..0bbf29de0 --- /dev/null +++ b/src/math/i386/lrintf.c @@ -0,0 +1,8 @@ +#include + +long lrintf(float x) +{ + long r; + __asm__ ("fistpl %0" : "=m"(r) : "t"(x) : "st"); + return r; +} diff --git a/src/math/i386/lrintf.s b/src/math/i386/lrintf.s deleted file mode 100644 index 907aac291..000000000 --- a/src/math/i386/lrintf.s +++ /dev/null @@ -1,7 +0,0 @@ -.global lrintf -.type lrintf,@function -lrintf: - flds 4(%esp) - fistpl 4(%esp) - mov 4(%esp),%eax - ret diff --git a/src/math/i386/lrintl.c b/src/math/i386/lrintl.c new file mode 100644 index 000000000..eb8c09028 --- /dev/null +++ b/src/math/i386/lrintl.c @@ -0,0 +1,8 @@ +#include + +long lrintl(long double x) +{ + long r; + __asm__ ("fistpl %0" : "=m"(r) : "t"(x) : "st"); + return r; +} diff --git a/src/math/i386/lrintl.s b/src/math/i386/lrintl.s deleted file mode 100644 index 3ae05aac2..000000000 --- a/src/math/i386/lrintl.s +++ /dev/null @@ -1,7 +0,0 @@ -.global lrintl -.type lrintl,@function -lrintl: - fldt 4(%esp) - fistpl 4(%esp) - mov 4(%esp),%eax - ret diff --git a/src/math/x86_64/llrintl.c b/src/math/x86_64/llrintl.c new file mode 100644 index 000000000..c439ef28d --- /dev/null +++ b/src/math/x86_64/llrintl.c @@ -0,0 +1,8 @@ +#include + +long long llrintl(long double x) +{ + long long r; + __asm__ ("fistpll %0" : "=m"(r) : "t"(x) : "st"); + return r; +} diff --git a/src/math/x86_64/llrintl.s b/src/math/x86_64/llrintl.s deleted file mode 100644 index 1ec0817d3..000000000 --- a/src/math/x86_64/llrintl.s +++ /dev/null @@ -1,7 +0,0 @@ -.global llrintl -.type llrintl,@function -llrintl: - fldt 8(%rsp) - fistpll 8(%rsp) - mov 8(%rsp),%rax - ret diff --git a/src/math/x86_64/lrintl.c b/src/math/x86_64/lrintl.c new file mode 100644 index 000000000..068e2e4d6 --- /dev/null +++ b/src/math/x86_64/lrintl.c @@ -0,0 +1,8 @@ +#include + +long lrintl(long double x) +{ + long r; + __asm__ ("fistpll %0" : "=m"(r) : "t"(x) : "st"); + return r; +} diff --git a/src/math/x86_64/lrintl.s b/src/math/x86_64/lrintl.s deleted file mode 100644 index d587b12b5..000000000 --- a/src/math/x86_64/lrintl.s +++ /dev/null @@ -1,7 +0,0 @@ -.global lrintl -.type lrintl,@function -lrintl: - fldt 8(%rsp) - fistpll 8(%rsp) - mov 8(%rsp),%rax - ret From d5c08afc384533b9ba72b08f90614a4ade3a507f Mon Sep 17 00:00:00 2001 From: Alexander Monakov Date: Tue, 14 Jan 2020 14:53:38 +0300 Subject: [PATCH 008/189] math: move x87-family rint functions to C with inline asm --- src/math/i386/rint.c | 7 +++++++ src/math/i386/rint.s | 6 ------ src/math/i386/rintf.c | 7 +++++++ src/math/i386/rintf.s | 6 ------ src/math/i386/rintl.c | 7 +++++++ src/math/i386/rintl.s | 6 ------ src/math/x86_64/rintl.c | 7 +++++++ src/math/x86_64/rintl.s | 6 ------ 8 files changed, 28 insertions(+), 24 deletions(-) create mode 100644 src/math/i386/rint.c delete mode 100644 src/math/i386/rint.s create mode 100644 src/math/i386/rintf.c delete mode 100644 src/math/i386/rintf.s create mode 100644 src/math/i386/rintl.c delete mode 100644 src/math/i386/rintl.s create mode 100644 src/math/x86_64/rintl.c delete mode 100644 src/math/x86_64/rintl.s diff --git a/src/math/i386/rint.c b/src/math/i386/rint.c new file mode 100644 index 000000000..a5276a60d --- /dev/null +++ b/src/math/i386/rint.c @@ -0,0 +1,7 @@ +#include + +double rint(double x) +{ + __asm__ ("frndint" : "+t"(x)); + return x; +} diff --git a/src/math/i386/rint.s b/src/math/i386/rint.s deleted file mode 100644 index bb99a11c3..000000000 --- a/src/math/i386/rint.s +++ /dev/null @@ -1,6 +0,0 @@ -.global rint -.type rint,@function -rint: - fldl 4(%esp) - frndint - ret diff --git a/src/math/i386/rintf.c b/src/math/i386/rintf.c new file mode 100644 index 000000000..bb4121a4e --- /dev/null +++ b/src/math/i386/rintf.c @@ -0,0 +1,7 @@ +#include + +float rintf(float x) +{ + __asm__ ("frndint" : "+t"(x)); + return x; +} diff --git a/src/math/i386/rintf.s b/src/math/i386/rintf.s deleted file mode 100644 index bce4c5a60..000000000 --- a/src/math/i386/rintf.s +++ /dev/null @@ -1,6 +0,0 @@ -.global rintf -.type rintf,@function -rintf: - flds 4(%esp) - frndint - ret diff --git a/src/math/i386/rintl.c b/src/math/i386/rintl.c new file mode 100644 index 000000000..e1a92077f --- /dev/null +++ b/src/math/i386/rintl.c @@ -0,0 +1,7 @@ +#include + +long double rintl(long double x) +{ + __asm__ ("frndint" : "+t"(x)); + return x; +} diff --git a/src/math/i386/rintl.s b/src/math/i386/rintl.s deleted file mode 100644 index cd2bf9a98..000000000 --- a/src/math/i386/rintl.s +++ /dev/null @@ -1,6 +0,0 @@ -.global rintl -.type rintl,@function -rintl: - fldt 4(%esp) - frndint - ret diff --git a/src/math/x86_64/rintl.c b/src/math/x86_64/rintl.c new file mode 100644 index 000000000..e1a92077f --- /dev/null +++ b/src/math/x86_64/rintl.c @@ -0,0 +1,7 @@ +#include + +long double rintl(long double x) +{ + __asm__ ("frndint" : "+t"(x)); + return x; +} diff --git a/src/math/x86_64/rintl.s b/src/math/x86_64/rintl.s deleted file mode 100644 index 64e663cd2..000000000 --- a/src/math/x86_64/rintl.s +++ /dev/null @@ -1,6 +0,0 @@ -.global rintl -.type rintl,@function -rintl: - fldt 8(%rsp) - frndint - ret From 81c6899a4c07fe510f1f88021b32aeab9125b62a Mon Sep 17 00:00:00 2001 From: Alexander Monakov Date: Tue, 14 Jan 2020 23:36:44 +0300 Subject: [PATCH 009/189] math: move x87-family remainder functions to C with inline asm --- src/math/i386/remainder.c | 12 ++++++++++++ src/math/i386/remainder.s | 14 -------------- src/math/i386/remainderf.c | 12 ++++++++++++ src/math/i386/remainderf.s | 14 -------------- src/math/i386/remainderl.c | 9 +++++++++ src/math/i386/remainderl.s | 11 ----------- src/math/x86_64/remainderl.c | 9 +++++++++ src/math/x86_64/remainderl.s | 11 ----------- 8 files changed, 42 insertions(+), 50 deletions(-) create mode 100644 src/math/i386/remainder.c delete mode 100644 src/math/i386/remainder.s create mode 100644 src/math/i386/remainderf.c delete mode 100644 src/math/i386/remainderf.s create mode 100644 src/math/i386/remainderl.c delete mode 100644 src/math/i386/remainderl.s create mode 100644 src/math/x86_64/remainderl.c delete mode 100644 src/math/x86_64/remainderl.s diff --git a/src/math/i386/remainder.c b/src/math/i386/remainder.c new file mode 100644 index 000000000..c083df904 --- /dev/null +++ b/src/math/i386/remainder.c @@ -0,0 +1,12 @@ +#include + +double remainder(double x, double y) +{ + unsigned short fpsr; + // fprem1 does not introduce excess precision into x + do __asm__ ("fprem1; fnstsw %%ax" : "+t"(x), "=a"(fpsr) : "u"(y)); + while (fpsr & 0x400); + return x; +} + +weak_alias(remainder, drem); diff --git a/src/math/i386/remainder.s b/src/math/i386/remainder.s deleted file mode 100644 index ab1da95dd..000000000 --- a/src/math/i386/remainder.s +++ /dev/null @@ -1,14 +0,0 @@ -.global remainder -.type remainder,@function -remainder: -.weak drem -.type drem,@function -drem: - fldl 12(%esp) - fldl 4(%esp) -1: fprem1 - fnstsw %ax - sahf - jp 1b - fstp %st(1) - ret diff --git a/src/math/i386/remainderf.c b/src/math/i386/remainderf.c new file mode 100644 index 000000000..280207d26 --- /dev/null +++ b/src/math/i386/remainderf.c @@ -0,0 +1,12 @@ +#include + +float remainderf(float x, float y) +{ + unsigned short fpsr; + // fprem1 does not introduce excess precision into x + do __asm__ ("fprem1; fnstsw %%ax" : "+t"(x), "=a"(fpsr) : "u"(y)); + while (fpsr & 0x400); + return x; +} + +weak_alias(remainderf, dremf); diff --git a/src/math/i386/remainderf.s b/src/math/i386/remainderf.s deleted file mode 100644 index 6a7378a34..000000000 --- a/src/math/i386/remainderf.s +++ /dev/null @@ -1,14 +0,0 @@ -.global remainderf -.type remainderf,@function -remainderf: -.weak dremf -.type dremf,@function -dremf: - flds 8(%esp) - flds 4(%esp) -1: fprem1 - fnstsw %ax - sahf - jp 1b - fstp %st(1) - ret diff --git a/src/math/i386/remainderl.c b/src/math/i386/remainderl.c new file mode 100644 index 000000000..8cf75071e --- /dev/null +++ b/src/math/i386/remainderl.c @@ -0,0 +1,9 @@ +#include + +long double remainderl(long double x, long double y) +{ + unsigned short fpsr; + do __asm__ ("fprem1; fnstsw %%ax" : "+t"(x), "=a"(fpsr) : "u"(y)); + while (fpsr & 0x400); + return x; +} diff --git a/src/math/i386/remainderl.s b/src/math/i386/remainderl.s deleted file mode 100644 index b41518ede..000000000 --- a/src/math/i386/remainderl.s +++ /dev/null @@ -1,11 +0,0 @@ -.global remainderl -.type remainderl,@function -remainderl: - fldt 16(%esp) - fldt 4(%esp) -1: fprem1 - fnstsw %ax - sahf - jp 1b - fstp %st(1) - ret diff --git a/src/math/x86_64/remainderl.c b/src/math/x86_64/remainderl.c new file mode 100644 index 000000000..8cf75071e --- /dev/null +++ b/src/math/x86_64/remainderl.c @@ -0,0 +1,9 @@ +#include + +long double remainderl(long double x, long double y) +{ + unsigned short fpsr; + do __asm__ ("fprem1; fnstsw %%ax" : "+t"(x), "=a"(fpsr) : "u"(y)); + while (fpsr & 0x400); + return x; +} diff --git a/src/math/x86_64/remainderl.s b/src/math/x86_64/remainderl.s deleted file mode 100644 index cb3857b48..000000000 --- a/src/math/x86_64/remainderl.s +++ /dev/null @@ -1,11 +0,0 @@ -.global remainderl -.type remainderl,@function -remainderl: - fldt 24(%rsp) - fldt 8(%rsp) -1: fprem1 - fnstsw %ax - testb $4,%ah - jnz 1b - fstp %st(1) - ret From d32b53cea2cc87820cedf5f087f39a36f4f7bb27 Mon Sep 17 00:00:00 2001 From: Alexander Monakov Date: Wed, 15 Jan 2020 18:42:46 +0300 Subject: [PATCH 010/189] math: move x87-family fmod functions to C with inline asm --- src/math/i386/fmod.c | 10 ++++++++++ src/math/i386/fmod.s | 11 ----------- src/math/i386/fmodf.c | 10 ++++++++++ src/math/i386/fmodf.s | 11 ----------- src/math/i386/fmodl.c | 9 +++++++++ src/math/i386/fmodl.s | 11 ----------- src/math/x86_64/fmodl.c | 9 +++++++++ src/math/x86_64/fmodl.s | 11 ----------- 8 files changed, 38 insertions(+), 44 deletions(-) create mode 100644 src/math/i386/fmod.c delete mode 100644 src/math/i386/fmod.s create mode 100644 src/math/i386/fmodf.c delete mode 100644 src/math/i386/fmodf.s create mode 100644 src/math/i386/fmodl.c delete mode 100644 src/math/i386/fmodl.s create mode 100644 src/math/x86_64/fmodl.c delete mode 100644 src/math/x86_64/fmodl.s diff --git a/src/math/i386/fmod.c b/src/math/i386/fmod.c new file mode 100644 index 000000000..ea0c58d9b --- /dev/null +++ b/src/math/i386/fmod.c @@ -0,0 +1,10 @@ +#include + +double fmod(double x, double y) +{ + unsigned short fpsr; + // fprem does not introduce excess precision into x + do __asm__ ("fprem; fnstsw %%ax" : "+t"(x), "=a"(fpsr) : "u"(y)); + while (fpsr & 0x400); + return x; +} diff --git a/src/math/i386/fmod.s b/src/math/i386/fmod.s deleted file mode 100644 index 2113b3c54..000000000 --- a/src/math/i386/fmod.s +++ /dev/null @@ -1,11 +0,0 @@ -.global fmod -.type fmod,@function -fmod: - fldl 12(%esp) - fldl 4(%esp) -1: fprem - fnstsw %ax - sahf - jp 1b - fstp %st(1) - ret diff --git a/src/math/i386/fmodf.c b/src/math/i386/fmodf.c new file mode 100644 index 000000000..90b56ab0f --- /dev/null +++ b/src/math/i386/fmodf.c @@ -0,0 +1,10 @@ +#include + +float fmodf(float x, float y) +{ + unsigned short fpsr; + // fprem does not introduce excess precision into x + do __asm__ ("fprem; fnstsw %%ax" : "+t"(x), "=a"(fpsr) : "u"(y)); + while (fpsr & 0x400); + return x; +} diff --git a/src/math/i386/fmodf.s b/src/math/i386/fmodf.s deleted file mode 100644 index e04e2a567..000000000 --- a/src/math/i386/fmodf.s +++ /dev/null @@ -1,11 +0,0 @@ -.global fmodf -.type fmodf,@function -fmodf: - flds 8(%esp) - flds 4(%esp) -1: fprem - fnstsw %ax - sahf - jp 1b - fstp %st(1) - ret diff --git a/src/math/i386/fmodl.c b/src/math/i386/fmodl.c new file mode 100644 index 000000000..3daeab060 --- /dev/null +++ b/src/math/i386/fmodl.c @@ -0,0 +1,9 @@ +#include + +long double fmodl(long double x, long double y) +{ + unsigned short fpsr; + do __asm__ ("fprem; fnstsw %%ax" : "+t"(x), "=a"(fpsr) : "u"(y)); + while (fpsr & 0x400); + return x; +} diff --git a/src/math/i386/fmodl.s b/src/math/i386/fmodl.s deleted file mode 100644 index 0cb3fe9ba..000000000 --- a/src/math/i386/fmodl.s +++ /dev/null @@ -1,11 +0,0 @@ -.global fmodl -.type fmodl,@function -fmodl: - fldt 16(%esp) - fldt 4(%esp) -1: fprem - fnstsw %ax - sahf - jp 1b - fstp %st(1) - ret diff --git a/src/math/x86_64/fmodl.c b/src/math/x86_64/fmodl.c new file mode 100644 index 000000000..3daeab060 --- /dev/null +++ b/src/math/x86_64/fmodl.c @@ -0,0 +1,9 @@ +#include + +long double fmodl(long double x, long double y) +{ + unsigned short fpsr; + do __asm__ ("fprem; fnstsw %%ax" : "+t"(x), "=a"(fpsr) : "u"(y)); + while (fpsr & 0x400); + return x; +} diff --git a/src/math/x86_64/fmodl.s b/src/math/x86_64/fmodl.s deleted file mode 100644 index ea07b402f..000000000 --- a/src/math/x86_64/fmodl.s +++ /dev/null @@ -1,11 +0,0 @@ -.global fmodl -.type fmodl,@function -fmodl: - fldt 24(%rsp) - fldt 8(%rsp) -1: fprem - fnstsw %ax - testb $4,%ah - jnz 1b - fstp %st(1) - ret From 77fb64739e9454ac8ff3e874c73e856741d32ab9 Mon Sep 17 00:00:00 2001 From: Alexander Monakov Date: Thu, 16 Jan 2020 23:58:13 +0300 Subject: [PATCH 011/189] math: add x86_64 remquol --- src/math/x86_64/remquol.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 src/math/x86_64/remquol.c diff --git a/src/math/x86_64/remquol.c b/src/math/x86_64/remquol.c new file mode 100644 index 000000000..60eef089f --- /dev/null +++ b/src/math/x86_64/remquol.c @@ -0,0 +1,32 @@ +#include + +long double remquol(long double x, long double y, int *quo) +{ + signed char *cx = (void *)&x, *cy = (void *)&y; + /* By ensuring that addresses of x and y cannot be discarded, + * this empty asm guides GCC into representing extraction of + * their sign bits as memory loads rather than making x and y + * not-address-taken internally and using bitfield operations, + * which in the end wouldn't work out, as extraction from FPU + * registers needs to go through memory anyway. This way GCC + * should manage to use incoming stack slots without spills. */ + __asm__ ("" :: "X"(cx), "X"(cy)); + + long double t = x; + unsigned fpsr; + do __asm__ ("fprem1; fnstsw %%ax" : "+t"(t), "=a"(fpsr) : "u"(y)); + while (fpsr & 0x400); + /* C0, C1, C3 flags in x87 status word carry low bits of quotient: + * 15 14 13 12 11 10 9 8 + * . C3 . . . C2 C1 C0 + * . b1 . . . 0 b0 b2 */ + unsigned char i = fpsr >> 8; + i = i>>4 | i<<4; + /* i[5:2] is now {b0 b2 ? b1}. Retrieve {0 b2 b1 b0} via + * in-register table lookup. */ + unsigned qbits = 0x7575313164642020 >> (i & 60); + qbits &= 7; + + *quo = (cx[9]^cy[9]) < 0 ? -qbits : qbits; + return t; +} From 1d848a6fa34f925af740c441117aeb9c65ff8a50 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Fri, 17 Apr 2020 13:46:57 -0400 Subject: [PATCH 012/189] fix undefined behavior in scanf core as reported/analyzed by Pascal Cuoq, the shlim and shcnt macros/functions are called by the scanf core (vfscanf) with f->rpos potentially null (if the FILE is not yet activated for reading at the time of the call). in this case, they compute differences between a null pointer (f->rpos) and a non-null one (f->buf), resulting in undefined behavior. it's unlikely that any observably wrong behavior occurred in practice, at least without LTO, due to limits on what's visible to the compiler from translation unit boundaries, but this has not been checked. fix is simply ensuring that the FILE is activated for read mode before entering the main scanf loop, and erroring out early if it can't be. --- src/stdio/vfscanf.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/stdio/vfscanf.c b/src/stdio/vfscanf.c index 9e030fc44..b5ebc16ea 100644 --- a/src/stdio/vfscanf.c +++ b/src/stdio/vfscanf.c @@ -76,6 +76,9 @@ int vfscanf(FILE *restrict f, const char *restrict fmt, va_list ap) FLOCK(f); + if (!f->rpos) __toread(f); + if (!f->rpos) goto input_fail; + for (p=(const unsigned char *)fmt; *p; p++) { alloc = 0; From 5288e59c482a608d4b85373dd3a9de6793b74d43 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Fri, 17 Apr 2020 15:31:16 -0400 Subject: [PATCH 013/189] fix possible access to uninitialized memory in shgetc (via scanf) shgetc sets up to be able to perform an "unget" operation without the caller having to remember and pass back the character value, and for this purpose used a conditional store idiom: if (f->rpos[-1] != c) f->rpos[-1] = c to make it safe to use with non-writable buffers (setup by the sh_fromstring macro or __string_read with sscanf). however, validity of this depends on the buffer space at rpos[-1] being initialized, which is not the case under some conditions (including at least unbuffered files and fmemopen ones). whenever data was read "through the buffer", the desired character value is already in place and does not need to be written. thus, rather than testing for the absence of the value, we can test for rpos<=buf, indicating that the last character read could not have come from the buffer, and thereby that we have a "real" buffer (possibly of zero length) with writable pushback (UNGET bytes) below it. --- src/internal/shgetc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/internal/shgetc.c b/src/internal/shgetc.c index a4a9c633d..7455d2f00 100644 --- a/src/internal/shgetc.c +++ b/src/internal/shgetc.c @@ -32,6 +32,6 @@ int __shgetc(FILE *f) else f->shend = f->rend; f->shcnt = f->buf - f->rpos + cnt; - if (f->rpos[-1] != c) f->rpos[-1] = c; + if (f->rpos <= f->buf) f->rpos[-1] = c; return c; } From 2a76c86e069d7f77a16414b93beab568210d7fb9 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Fri, 17 Apr 2020 16:10:28 -0400 Subject: [PATCH 014/189] combine two calls to memset in fmemopen this idea came up when I thought we might need to zero the UNGET portion of buf as well, but it seems like a useful improvement even when that turned out not to be necessary. --- src/stdio/fmemopen.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/stdio/fmemopen.c b/src/stdio/fmemopen.c index 5685092e3..5afd85cf7 100644 --- a/src/stdio/fmemopen.c +++ b/src/stdio/fmemopen.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include "libc.h" @@ -95,7 +96,7 @@ FILE *fmemopen(void *restrict buf, size_t size, const char *restrict mode) f = malloc(sizeof *f + (buf?0:size)); if (!f) return 0; - memset(&f->f, 0, sizeof f->f); + memset(f, 0, offsetof(struct mem_FILE, buf)); f->f.cookie = &f->c; f->f.fd = -1; f->f.lbf = EOF; @@ -106,7 +107,6 @@ FILE *fmemopen(void *restrict buf, size_t size, const char *restrict mode) memset(buf, 0, size); } - memset(&f->c, 0, sizeof f->c); f->c.buf = buf; f->c.size = size; f->c.mode = *mode; From e0f6fbd4edc0b55185f12b0b69eef67f7945f6d3 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Fri, 17 Apr 2020 16:11:43 -0400 Subject: [PATCH 015/189] remove spurious repeated semicolon in fmemopen --- src/stdio/fmemopen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/stdio/fmemopen.c b/src/stdio/fmemopen.c index 5afd85cf7..343e3e3fe 100644 --- a/src/stdio/fmemopen.c +++ b/src/stdio/fmemopen.c @@ -103,7 +103,7 @@ FILE *fmemopen(void *restrict buf, size_t size, const char *restrict mode) f->f.buf = f->buf + UNGET; f->f.buf_size = sizeof f->buf - UNGET; if (!buf) { - buf = f->buf2;; + buf = f->buf2; memset(buf, 0, size); } From d866a8a8086a6deb5839ee7c5f2211200c208dec Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Fri, 17 Apr 2020 16:18:07 -0400 Subject: [PATCH 016/189] move __string_read into vsscanf source file apparently this function was intended at some point to be used by strto* family as well, and thus was put in its own file; however, as far as I can tell, it's only ever been used by vsscanf. move it to the same file to reduce the number of source files and external symbols. --- src/internal/stdio_impl.h | 2 -- src/stdio/__string_read.c | 16 ---------------- src/stdio/vsscanf.c | 16 +++++++++++++--- 3 files changed, 13 insertions(+), 21 deletions(-) delete mode 100644 src/stdio/__string_read.c diff --git a/src/internal/stdio_impl.h b/src/internal/stdio_impl.h index d7398f591..0b2438d61 100644 --- a/src/internal/stdio_impl.h +++ b/src/internal/stdio_impl.h @@ -60,8 +60,6 @@ hidden size_t __stdout_write(FILE *, const unsigned char *, size_t); hidden off_t __stdio_seek(FILE *, off_t, int); hidden int __stdio_close(FILE *); -hidden size_t __string_read(FILE *, unsigned char *, size_t); - hidden int __toread(FILE *); hidden int __towrite(FILE *); diff --git a/src/stdio/__string_read.c b/src/stdio/__string_read.c deleted file mode 100644 index 7b50a7e11..000000000 --- a/src/stdio/__string_read.c +++ /dev/null @@ -1,16 +0,0 @@ -#include "stdio_impl.h" -#include - -size_t __string_read(FILE *f, unsigned char *buf, size_t len) -{ - char *src = f->cookie; - size_t k = len+256; - char *end = memchr(src, 0, k); - if (end) k = end-src; - if (k < len) len = k; - memcpy(buf, src, len); - f->rpos = (void *)(src+len); - f->rend = (void *)(src+k); - f->cookie = src+k; - return len; -} diff --git a/src/stdio/vsscanf.c b/src/stdio/vsscanf.c index 985002256..4d6d259b8 100644 --- a/src/stdio/vsscanf.c +++ b/src/stdio/vsscanf.c @@ -1,15 +1,25 @@ #include "stdio_impl.h" +#include -static size_t do_read(FILE *f, unsigned char *buf, size_t len) +static size_t string_read(FILE *f, unsigned char *buf, size_t len) { - return __string_read(f, buf, len); + char *src = f->cookie; + size_t k = len+256; + char *end = memchr(src, 0, k); + if (end) k = end-src; + if (k < len) len = k; + memcpy(buf, src, len); + f->rpos = (void *)(src+len); + f->rend = (void *)(src+k); + f->cookie = src+k; + return len; } int vsscanf(const char *restrict s, const char *restrict fmt, va_list ap) { FILE f = { .buf = (void *)s, .cookie = (void *)s, - .read = do_read, .lock = -1 + .read = string_read, .lock = -1 }; return vfscanf(&f, fmt, ap); } From 473f8ff0c19f5f1a01406c61424a4d802378d312 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Sat, 18 Apr 2020 03:23:40 -0400 Subject: [PATCH 017/189] fix sh fesetround failure to clear old mode the sh version of fesetround or'd the new rounding mode onto the control register without clearing the old rounding mode bits, making changes sticky. this was the root cause of multiple test failures. --- src/fenv/sh/fenv.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/fenv/sh/fenv.S b/src/fenv/sh/fenv.S index 907aefc0d..b3b7d66ad 100644 --- a/src/fenv/sh/fenv.S +++ b/src/fenv/sh/fenv.S @@ -12,6 +12,8 @@ fegetround: .type __fesetround, @function __fesetround: sts fpscr, r0 + mov #-4, r1 + and r1, r0 or r4, r0 lds r0, fpscr rts From 39063cd955122a6c9122b6dfd70e559e955ee089 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Fri, 24 Apr 2020 10:35:01 -0400 Subject: [PATCH 018/189] fix undefined behavior in wcsto[ld] family functions analogous to commit b287cd745c2243f8e5114331763a5a9813b5f6ee but for the custom FILE stream type the wcstol and wcstod family use. __toread could be used here as well, but there's a simple direct fix to make the buffer pointers initially valid for subtraction, so just do that to avoid pulling in stdio exit code in programs that don't use stdio. --- src/stdlib/wcstod.c | 3 +-- src/stdlib/wcstol.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/stdlib/wcstod.c b/src/stdlib/wcstod.c index 26fe9af8b..0deb7010b 100644 --- a/src/stdlib/wcstod.c +++ b/src/stdlib/wcstod.c @@ -33,8 +33,7 @@ static long double wcstox(const wchar_t *s, wchar_t **p, int prec) unsigned char buf[64]; FILE f = {0}; f.flags = 0; - f.rpos = f.rend = 0; - f.buf = buf + 4; + f.rpos = f.rend = f.buf = buf + 4; f.buf_size = sizeof buf - 4; f.lock = -1; f.read = do_read; diff --git a/src/stdlib/wcstol.c b/src/stdlib/wcstol.c index 4443f5772..1eeb495fd 100644 --- a/src/stdlib/wcstol.c +++ b/src/stdlib/wcstol.c @@ -35,8 +35,7 @@ static unsigned long long wcstox(const wchar_t *s, wchar_t **p, int base, unsign unsigned char buf[64]; FILE f = {0}; f.flags = 0; - f.rpos = f.rend = 0; - f.buf = buf + 4; + f.rpos = f.rend = f.buf = buf + 4; f.buf_size = sizeof buf - 4; f.lock = -1; f.read = do_read; From 89d5bd7919a97cfa742f419c074dd4f5e027ed01 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Sun, 26 Apr 2020 16:47:49 -0400 Subject: [PATCH 019/189] remove arm (32-bit) support for vdso clock_gettime it's been reported that the vdso clock_gettime64 function on (32-bit) arm is broken, producing erratic results that grow at a rate far greater than one reported second per actual elapsed second. the vdso function seems to have been added sometime between linux 5.4 and 5.6, so if there's ever been a working version, it was only present for a very short window. it's not clear what the eventual upstream kernel solution will be, but something needs to be done on the libc side so as not to be producing binaries that seem to work on older/existing/lts kernels (which lack the function and thus lack the bug) but will break fantastically when moving to newer kernels. hopefully vdso support will be added back soon, but with a new symbol name or version from the kernel to allow continued rejection of broken ones. --- arch/arm/syscall_arch.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/arm/syscall_arch.h b/arch/arm/syscall_arch.h index 4b08762d7..a877b2cff 100644 --- a/arch/arm/syscall_arch.h +++ b/arch/arm/syscall_arch.h @@ -98,12 +98,6 @@ static inline long __syscall6(long n, long a, long b, long c, long d, long e, lo __asm_syscall(R7_OPERAND, "0"(r0), "r"(r1), "r"(r2), "r"(r3), "r"(r4), "r"(r5)); } -#define VDSO_USEFUL -#define VDSO_CGT32_SYM "__vdso_clock_gettime" -#define VDSO_CGT32_VER "LINUX_2.6" -#define VDSO_CGT_SYM "__vdso_clock_gettime64" -#define VDSO_CGT_VER "LINUX_2.6" - #define SYSCALL_FADVISE_6_ARG #define SYSCALL_IPC_BROKEN_MODE From ba4d957601d18e8d96eea04ff501009109488d2b Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Thu, 30 Apr 2020 21:36:43 -0400 Subject: [PATCH 020/189] fix undefined behavior from signed overflow in strstr and memmem unsigned char promotes to int, which can overflow when shifted left by 24 bits or more. this has been reported multiple times but then forgotten. it's expected to be benign UB, but can trap when built with explicit overflow catching (ubsan or similar). fix it now. note that promotion to uint32_t is safe and portable even outside of the assumptions usually made in musl, since either uint32_t has rank at least unsigned int, so that no further default promotions happen, or int is wide enough that the shift can't overflow. this is a desirable property to have in case someone wants to reuse the code elsewhere. --- src/string/memmem.c | 8 ++++---- src/string/strstr.c | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/string/memmem.c b/src/string/memmem.c index 58a21fcd6..11eff86e4 100644 --- a/src/string/memmem.c +++ b/src/string/memmem.c @@ -12,8 +12,8 @@ static char *twobyte_memmem(const unsigned char *h, size_t k, const unsigned cha static char *threebyte_memmem(const unsigned char *h, size_t k, const unsigned char *n) { - uint32_t nw = n[0]<<24 | n[1]<<16 | n[2]<<8; - uint32_t hw = h[0]<<24 | h[1]<<16 | h[2]<<8; + uint32_t nw = (uint32_t)n[0]<<24 | n[1]<<16 | n[2]<<8; + uint32_t hw = (uint32_t)h[0]<<24 | h[1]<<16 | h[2]<<8; for (h+=3, k-=3; k; k--, hw = (hw|*h++)<<8) if (hw == nw) return (char *)h-3; return hw == nw ? (char *)h-3 : 0; @@ -21,8 +21,8 @@ static char *threebyte_memmem(const unsigned char *h, size_t k, const unsigned c static char *fourbyte_memmem(const unsigned char *h, size_t k, const unsigned char *n) { - uint32_t nw = n[0]<<24 | n[1]<<16 | n[2]<<8 | n[3]; - uint32_t hw = h[0]<<24 | h[1]<<16 | h[2]<<8 | h[3]; + uint32_t nw = (uint32_t)n[0]<<24 | n[1]<<16 | n[2]<<8 | n[3]; + uint32_t hw = (uint32_t)h[0]<<24 | h[1]<<16 | h[2]<<8 | h[3]; for (h+=4, k-=4; k; k--, hw = hw<<8 | *h++) if (hw == nw) return (char *)h-4; return hw == nw ? (char *)h-4 : 0; diff --git a/src/string/strstr.c b/src/string/strstr.c index 55ba1c7b4..43a0207a7 100644 --- a/src/string/strstr.c +++ b/src/string/strstr.c @@ -10,16 +10,16 @@ static char *twobyte_strstr(const unsigned char *h, const unsigned char *n) static char *threebyte_strstr(const unsigned char *h, const unsigned char *n) { - uint32_t nw = n[0]<<24 | n[1]<<16 | n[2]<<8; - uint32_t hw = h[0]<<24 | h[1]<<16 | h[2]<<8; + uint32_t nw = (uint32_t)n[0]<<24 | n[1]<<16 | n[2]<<8; + uint32_t hw = (uint32_t)h[0]<<24 | h[1]<<16 | h[2]<<8; for (h+=2; *h && hw != nw; hw = (hw|*++h)<<8); return *h ? (char *)h-2 : 0; } static char *fourbyte_strstr(const unsigned char *h, const unsigned char *n) { - uint32_t nw = n[0]<<24 | n[1]<<16 | n[2]<<8 | n[3]; - uint32_t hw = h[0]<<24 | h[1]<<16 | h[2]<<8 | h[3]; + uint32_t nw = (uint32_t)n[0]<<24 | n[1]<<16 | n[2]<<8 | n[3]; + uint32_t hw = (uint32_t)h[0]<<24 | h[1]<<16 | h[2]<<8 | h[3]; for (h+=3; *h && hw != nw; hw = hw<<8 | *++h); return *h ? (char *)h-3 : 0; } From 779b4768737d82825e09f7428e3cc13fc114da8c Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Mon, 18 May 2020 21:17:34 -0400 Subject: [PATCH 021/189] set AD bit in dns queries, suppress for internal use the AD (authenticated data) bit in outgoing dns queries is defined by rfc3655 to request that the nameserver report (via the same bit in the response) whether the result is authenticated by DNSSEC. while all results returned by a DNSSEC conforming nameserver will be either authenticated or cryptographically proven to lack DNSSEC protection, for some applications it's necessary to be able to distinguish these two cases. in particular, conforming and compatible handling of DANE (TLSA) records requires enforcing them only in signed zones. when the AD bit was first defined for queries, there were reports of compatibility problems with broken firewalls and nameservers dropping queries with it set. these problems are probably a thing of the past, and broken nameservers are already unsupported. however, since there is no use in the AD bit with the netdb.h interfaces, explicitly clear it in the queries they make. this ensures that, even with broken setups, the standard functions will work, and at most the res_* functions break. --- src/network/getnameinfo.c | 1 + src/network/lookup_name.c | 1 + src/network/res_mkquery.c | 1 + 3 files changed, 3 insertions(+) diff --git a/src/network/getnameinfo.c b/src/network/getnameinfo.c index f77e73ade..949e18115 100644 --- a/src/network/getnameinfo.c +++ b/src/network/getnameinfo.c @@ -158,6 +158,7 @@ int getnameinfo(const struct sockaddr *restrict sa, socklen_t sl, unsigned char query[18+PTR_MAX], reply[512]; int qlen = __res_mkquery(0, ptr, 1, RR_PTR, 0, 0, 0, query, sizeof query); + query[3] = 0; /* don't need AD flag */ int rlen = __res_send(query, qlen, reply, sizeof reply); buf[0] = 0; if (rlen > 0) diff --git a/src/network/lookup_name.c b/src/network/lookup_name.c index c93263a9d..c4d994a16 100644 --- a/src/network/lookup_name.c +++ b/src/network/lookup_name.c @@ -149,6 +149,7 @@ static int name_from_dns(struct address buf[static MAXADDRS], char canon[static 0, 0, 0, qbuf[nq], sizeof *qbuf); if (qlens[nq] == -1) return EAI_NONAME; + qbuf[nq][3] = 0; /* don't need AD flag */ nq++; } } diff --git a/src/network/res_mkquery.c b/src/network/res_mkquery.c index 6fa04a5cf..33f50cb93 100644 --- a/src/network/res_mkquery.c +++ b/src/network/res_mkquery.c @@ -20,6 +20,7 @@ int __res_mkquery(int op, const char *dname, int class, int type, /* Construct query template - ID will be filled later */ memset(q, 0, n); q[2] = op*8 + 1; + q[3] = 32; /* AD */ q[5] = 1; memcpy((char *)q+13, dname, l); for (i=13; q[i]; i=j+1) { From f51c8d7fa840e01d2efa557cd6dba385457f45ee Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Tue, 19 May 2020 19:11:16 -0400 Subject: [PATCH 022/189] fix handling of errors resolving one of paired A+AAAA query the old logic here likely dates back, at least in inspiration, to before it was recognized that transient errors must not be allowed to reflect the contents of successful results and must be reported to the application. here, the dns backend for getaddrinfo, when performing a paired query for v4 and v6 addresses, accepted results for one address family even if the other timed out. (the __res_msend backend does not propagate error rcodes back to the caller, but continues to retry until timeout, so other error conditions were not actually possible.) this patch moves the checks to take place before answer parsing, and performs them for each answer rather than only the answer to the first query. if nxdomain is seen it's assumed to apply to both queries since that's how dns semantics work. --- src/network/lookup_name.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/network/lookup_name.c b/src/network/lookup_name.c index c4d994a16..aae0d95a0 100644 --- a/src/network/lookup_name.c +++ b/src/network/lookup_name.c @@ -157,14 +157,17 @@ static int name_from_dns(struct address buf[static MAXADDRS], char canon[static if (__res_msend_rc(nq, qp, qlens, ap, alens, sizeof *abuf, conf) < 0) return EAI_SYSTEM; + for (i=0; i Date: Tue, 19 May 2020 19:25:42 -0400 Subject: [PATCH 023/189] fix return value of res_send, res_query on errors from nameserver the internal __res_msend returns 0 on timeout without having obtained any conclusive answer, but in this case has not filled in meaningful anslen. res_send wrongly treated that as success, but returned a zero answer length. any reasonable caller would eventually end up treating that as an error when attempting to parse/validate it, but it should just be reported as an error. alternatively we could return the last-received inconclusive answer (typically servfail), but doing so would require internal changes in __res_msend. this may be considered later. --- src/network/res_send.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/network/res_send.c b/src/network/res_send.c index b9cea0bfd..ee4abf1f1 100644 --- a/src/network/res_send.c +++ b/src/network/res_send.c @@ -3,7 +3,7 @@ int __res_send(const unsigned char *msg, int msglen, unsigned char *answer, int anslen) { int r = __res_msend(1, &msg, &msglen, &answer, &anslen, anslen); - return r<0 ? r : anslen; + return r<0 || !anslen ? -1 : anslen; } weak_alias(__res_send, res_send); From a1eddb21fff91fd31b2d01eba90e91fa901cd243 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Thu, 21 May 2020 13:14:40 -0400 Subject: [PATCH 024/189] handle possibility that SIGEMT replaces SIGSTKFLT in strsignal presently all archs define SIGSTKFLT but this is not correct. change strsignal as a prerequisite for fixing that. --- src/string/strsignal.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/string/strsignal.c b/src/string/strsignal.c index 96bfe841f..5156366e6 100644 --- a/src/string/strsignal.c +++ b/src/string/strsignal.c @@ -31,7 +31,11 @@ static const char map[] = { [SIGPIPE] = 13, [SIGALRM] = 14, [SIGTERM] = 15, +#if defined(SIGSTKFLT) [SIGSTKFLT] = 16, +#elif defined(SIGEMT) + [SIGEMT] = 16, +#endif [SIGCHLD] = 17, [SIGCONT] = 18, [SIGSTOP] = 19, @@ -70,7 +74,13 @@ static const char strings[] = "Broken pipe\0" "Alarm clock\0" "Terminated\0" +#if defined(SIGSTKFLT) "Stack fault\0" +#elif defined(SIGEMT) + "Emulator trap\0" +#else + "Unknown signal\0" +#endif "Child process status\0" "Continued\0" "Stopped (signal)\0" From 6dd6766c260b13694a7a847651ed061699dd4931 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Thu, 21 May 2020 13:06:21 -0400 Subject: [PATCH 025/189] fix incorrect SIGSTKFLT on all mips archs signal 7 is SIGEMT on Linux mips* ABI according to the man pages and kernel. it's not clear where the wrong name came from but it dates back to original mips commit. --- arch/mips/bits/signal.h | 2 +- arch/mips64/bits/signal.h | 2 +- arch/mipsn32/bits/signal.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/mips/bits/signal.h b/arch/mips/bits/signal.h index e1d97ac78..1b69e7625 100644 --- a/arch/mips/bits/signal.h +++ b/arch/mips/bits/signal.h @@ -93,7 +93,7 @@ typedef struct __ucontext { #define SIGTRAP 5 #define SIGABRT 6 #define SIGIOT SIGABRT -#define SIGSTKFLT 7 +#define SIGEMT 7 #define SIGFPE 8 #define SIGKILL 9 #define SIGBUS 10 diff --git a/arch/mips64/bits/signal.h b/arch/mips64/bits/signal.h index c31ad07ec..4f91c9fc8 100644 --- a/arch/mips64/bits/signal.h +++ b/arch/mips64/bits/signal.h @@ -112,7 +112,7 @@ typedef struct __ucontext { #define SIGTRAP 5 #define SIGABRT 6 #define SIGIOT SIGABRT -#define SIGSTKFLT 7 +#define SIGEMT 7 #define SIGFPE 8 #define SIGKILL 9 #define SIGBUS 10 diff --git a/arch/mipsn32/bits/signal.h b/arch/mipsn32/bits/signal.h index c31ad07ec..4f91c9fc8 100644 --- a/arch/mipsn32/bits/signal.h +++ b/arch/mipsn32/bits/signal.h @@ -112,7 +112,7 @@ typedef struct __ucontext { #define SIGTRAP 5 #define SIGABRT 6 #define SIGIOT SIGABRT -#define SIGSTKFLT 7 +#define SIGEMT 7 #define SIGFPE 8 #define SIGKILL 9 #define SIGBUS 10 From 6aacbb747cc80afbb79947164a1f94ca4af3236c Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Fri, 22 May 2020 17:35:14 -0400 Subject: [PATCH 026/189] reorder thread list unlink in pthread_exit after all locks since the backend for LOCK() skips locking if single-threaded, it's unsafe to make the process appear single-threaded before the last use of lock. this fixes potential unsynchronized access to a linked list via __dl_thread_cleanup. --- src/thread/pthread_create.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/thread/pthread_create.c b/src/thread/pthread_create.c index 5f4910925..6a3b0c216 100644 --- a/src/thread/pthread_create.c +++ b/src/thread/pthread_create.c @@ -90,14 +90,7 @@ _Noreturn void __pthread_exit(void *result) exit(0); } - /* At this point we are committed to thread termination. Unlink - * the thread from the list. This change will not be visible - * until the lock is released, which only happens after SYS_exit - * has been called, via the exit futex address pointing at the lock. */ - libc.threads_minus_1--; - self->next->prev = self->prev; - self->prev->next = self->next; - self->prev = self->next = self; + /* At this point we are committed to thread termination. */ /* Process robust list in userspace to handle non-pshared mutexes * and the detached thread case where the robust list head will @@ -121,6 +114,16 @@ _Noreturn void __pthread_exit(void *result) __do_orphaned_stdio_locks(); __dl_thread_cleanup(); + /* Last, unlink thread from the list. This change will not be visible + * until the lock is released, which only happens after SYS_exit + * has been called, via the exit futex address pointing at the lock. + * This needs to happen after any possible calls to LOCK() that might + * skip locking if libc.threads_minus_1 is zero. */ + libc.threads_minus_1--; + self->next->prev = self->prev; + self->prev->next = self->next; + self->prev = self->next = self; + /* This atomic potentially competes with a concurrent pthread_detach * call; the loser is responsible for freeing thread resources. */ int state = a_cas(&self->detach_state, DT_JOINABLE, DT_EXITING); From 60430654802fb84a06b2e9cc0e420517e31db505 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Thu, 21 May 2020 23:32:45 -0400 Subject: [PATCH 027/189] don't use libc.threads_minus_1 as relaxed atomic for skipping locks after all but the last thread exits, the next thread to observe libc.threads_minus_1==0 and conclude that it can skip locking fails to synchronize with any changes to memory that were made by the last-exiting thread. this can produce data races. on some archs, at least x86, memory synchronization is unlikely to be a problem; however, with the inline locks in malloc, skipping the lock also eliminated the compiler barrier, and caused code that needed to re-check chunk in-use bits after obtaining the lock to reuse a stale value, possibly from before the process became single-threaded. this in turn produced corruption of the heap state. some uses of libc.threads_minus_1 remain, especially for allocation of new TLS in the dynamic linker; otherwise, it could be removed entirely. it's made non-volatile to reflect that the remaining accesses are only made under lock on the thread list. instead of libc.threads_minus_1, libc.threaded is now used for skipping locks. the difference is that libc.threaded is permanently true once an additional thread has been created. this will produce some performance regression in processes that are mostly single-threaded but occasionally creating threads. in the future it may be possible to bring back the full lock-skipping, but more care needs to be taken to produce a safe design. --- src/internal/libc.h | 2 +- src/malloc/malloc.c | 2 +- src/thread/__lock.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/internal/libc.h b/src/internal/libc.h index ac97dc7eb..c0614852e 100644 --- a/src/internal/libc.h +++ b/src/internal/libc.h @@ -21,7 +21,7 @@ struct __libc { int can_do_threads; int threaded; int secure; - volatile int threads_minus_1; + int threads_minus_1; size_t *auxv; struct tls_module *tls_head; size_t tls_size, tls_align, tls_cnt; diff --git a/src/malloc/malloc.c b/src/malloc/malloc.c index 96982596b..2553a62e0 100644 --- a/src/malloc/malloc.c +++ b/src/malloc/malloc.c @@ -26,7 +26,7 @@ int __malloc_replaced; static inline void lock(volatile int *lk) { - if (libc.threads_minus_1) + if (libc.threaded) while(a_swap(lk, 1)) __wait(lk, lk+1, 1, 1); } diff --git a/src/thread/__lock.c b/src/thread/__lock.c index 45557c888..5b9b144e9 100644 --- a/src/thread/__lock.c +++ b/src/thread/__lock.c @@ -18,7 +18,7 @@ void __lock(volatile int *l) { - if (!libc.threads_minus_1) return; + if (!libc.threaded) return; /* fast path: INT_MIN for the lock, +1 for the congestion */ int current = a_cas(l, 0, INT_MIN + 1); if (!current) return; From de3ed1804a6590678102c73f9f0c528bb1f70e09 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Fri, 22 May 2020 17:25:38 -0400 Subject: [PATCH 028/189] cut down size of some libc struct members these are all flags that can be single-byte values. --- src/internal/libc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/internal/libc.h b/src/internal/libc.h index c0614852e..d47f58e01 100644 --- a/src/internal/libc.h +++ b/src/internal/libc.h @@ -18,9 +18,9 @@ struct tls_module { }; struct __libc { - int can_do_threads; - int threaded; - int secure; + char can_do_threads; + char threaded; + char secure; int threads_minus_1; size_t *auxv; struct tls_module *tls_head; From 11060c4433db1c14c450bb9aa879cadbf69008be Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Fri, 22 May 2020 17:45:47 -0400 Subject: [PATCH 029/189] restore lock-skipping for processes that return to single-threaded state the design used here relies on the barrier provided by the first lock operation after the process returns to single-threaded state to synchronize with actions by the last thread that exited. by storing the intent to change modes in the same object used to detect whether locking is needed, it's possible to avoid an extra (possibly costly) memory load after the lock is taken. --- src/internal/libc.h | 1 + src/malloc/malloc.c | 5 ++++- src/thread/__lock.c | 4 +++- src/thread/pthread_create.c | 8 ++++---- 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/internal/libc.h b/src/internal/libc.h index d47f58e01..619bba861 100644 --- a/src/internal/libc.h +++ b/src/internal/libc.h @@ -21,6 +21,7 @@ struct __libc { char can_do_threads; char threaded; char secure; + volatile signed char need_locks; int threads_minus_1; size_t *auxv; struct tls_module *tls_head; diff --git a/src/malloc/malloc.c b/src/malloc/malloc.c index 2553a62e0..a803d4c93 100644 --- a/src/malloc/malloc.c +++ b/src/malloc/malloc.c @@ -26,8 +26,11 @@ int __malloc_replaced; static inline void lock(volatile int *lk) { - if (libc.threaded) + int need_locks = libc.need_locks; + if (need_locks) { while(a_swap(lk, 1)) __wait(lk, lk+1, 1, 1); + if (need_locks < 0) libc.need_locks = 0; + } } static inline void unlock(volatile int *lk) diff --git a/src/thread/__lock.c b/src/thread/__lock.c index 5b9b144e9..60eece49a 100644 --- a/src/thread/__lock.c +++ b/src/thread/__lock.c @@ -18,9 +18,11 @@ void __lock(volatile int *l) { - if (!libc.threaded) return; + int need_locks = libc.need_locks; + if (!need_locks) return; /* fast path: INT_MIN for the lock, +1 for the congestion */ int current = a_cas(l, 0, INT_MIN + 1); + if (need_locks < 0) libc.need_locks = 0; if (!current) return; /* A first spin loop, for medium congestion. */ for (unsigned i = 0; i < 10; ++i) { diff --git a/src/thread/pthread_create.c b/src/thread/pthread_create.c index 6a3b0c216..6bdfb44f9 100644 --- a/src/thread/pthread_create.c +++ b/src/thread/pthread_create.c @@ -118,8 +118,8 @@ _Noreturn void __pthread_exit(void *result) * until the lock is released, which only happens after SYS_exit * has been called, via the exit futex address pointing at the lock. * This needs to happen after any possible calls to LOCK() that might - * skip locking if libc.threads_minus_1 is zero. */ - libc.threads_minus_1--; + * skip locking if process appears single-threaded. */ + if (!--libc.threads_minus_1) libc.need_locks = -1; self->next->prev = self->prev; self->prev->next = self->next; self->prev = self->next = self; @@ -339,7 +339,7 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att ~(1UL<<((SIGCANCEL-1)%(8*sizeof(long)))); __tl_lock(); - libc.threads_minus_1++; + if (!libc.threads_minus_1++) libc.need_locks = 1; ret = __clone((c11 ? start_c11 : start), stack, flags, args, &new->tid, TP_ADJ(new), &__thread_list_lock); /* All clone failures translate to EAGAIN. If explicit scheduling @@ -363,7 +363,7 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att new->next->prev = new; new->prev->next = new; } else { - libc.threads_minus_1--; + if (!--libc.threads_minus_1) libc.need_locks = 0; } __tl_unlock(); __restore_sigs(&set); From 47700055605227360782209ca752f142a9cafa7d Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Mon, 1 Jun 2020 20:53:42 -0400 Subject: [PATCH 030/189] suppress unwanted warnings when configuring with clang coding style warnings enabled by default in clang have long been a source of spurious questions/bug-reports. since clang provides a -w that behaves differently from gcc's, and that lets us enable any warnings we may actually want after turning them all off to start with a clean slate, use it at configure time if clang is detected. --- configure | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/configure b/configure index a2728969f..435988258 100755 --- a/configure +++ b/configure @@ -494,6 +494,13 @@ fnmatch '-march=*|*\ -march=*' "$CC $CFLAGS" || tryldflag CFLAGS_AUTO -march=i48 fnmatch '-mtune=*|*\ -mtune=*' "$CC $CFLAGS" || tryldflag CFLAGS_AUTO -mtune=generic fi +# +# GCC defines -w as overriding any -W options, regardless of order, but +# clang has a bunch of annoying warnings enabled by default and needs -w +# to start from a clean slate. So use -w if building with clang. +# +test "$cc_family" = clang && tryflag CFLAGS_AUTO -w + # # Even with -std=c99, gcc accepts some constructs which are constraint # violations. We want to treat these as errors regardless of whether From 4d78fc5dd861e29829305a4d3aea634ae49f4550 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Tue, 2 Jun 2020 17:37:14 -0400 Subject: [PATCH 031/189] fix unbounded heap expansion race in malloc this has been a longstanding issue reported many times over the years, with it becoming increasingly clear that it could be hit in practice. under concurrent malloc and free from multiple threads, it's possible to hit usage patterns where unbounded amounts of new memory are obtained via brk/mmap despite the total nominal usage being small and bounded. the underlying cause is that, as a fundamental consequence of keeping locking as fine-grained as possible, the state where free has unbinned an already-free chunk to merge it with a newly-freed one, but has not yet re-binned the combined chunk, is exposed to other threads. this is bad even with small chunks, and leads to suboptimal use of memory, but where it really blows up is where the already-freed chunk in question is the large free region "at the top of the heap". in this situation, other threads momentarily see a state of having almost no free memory, and conclude that they need to obtain more. as far as I can tell there is no fix for this that does not harm performance. the fix made here forces all split/merge of free chunks to take place under a single lock, which also takes the place of the old free_lock, being held at least momentarily at the time of free to determine whether there are neighboring free chunks that need merging. as a consequence, the pretrim, alloc_fwd, and alloc_rev operations no longer make sense and are deleted. simplified merging now takes place inline in free (__bin_chunk) and realloc. as commented in the source, holding the split_merge_lock precludes any chunk transition from in-use to free state. for the most part, it also precludes change to chunk header sizes. however, __memalign may still modify the sizes of an in-use chunk to split it into two in-use chunks. arguably this should require holding the split_merge_lock, but that would necessitate refactoring to expose it externally, which is a mess. and it turns out not to be necessary, at least assuming the existing sloppy memory model malloc has been using, because if free (__bin_chunk) or realloc sees any unsynchronized change to the size, it will also see the in-use bit being set, and thereby can't do anything with the neighboring chunk that changed size. --- src/malloc/malloc.c | 239 ++++++++++++++++---------------------------- 1 file changed, 87 insertions(+), 152 deletions(-) diff --git a/src/malloc/malloc.c b/src/malloc/malloc.c index a803d4c93..20598ec3a 100644 --- a/src/malloc/malloc.c +++ b/src/malloc/malloc.c @@ -17,7 +17,7 @@ static struct { volatile uint64_t binmap; struct bin bins[64]; - volatile int free_lock[2]; + volatile int split_merge_lock[2]; } mal; int __malloc_replaced; @@ -128,7 +128,6 @@ void __dump_heap(int x) static struct chunk *expand_heap(size_t n) { - static int heap_lock[2]; static void *end; void *p; struct chunk *w; @@ -138,13 +137,8 @@ static struct chunk *expand_heap(size_t n) * we need room for an extra zero-sized sentinel chunk. */ n += SIZE_ALIGN; - lock(heap_lock); - p = __expand_heap(&n); - if (!p) { - unlock(heap_lock); - return 0; - } + if (!p) return 0; /* If not just expanding existing space, we need to make a * new sentinel chunk below the allocated space. */ @@ -167,8 +161,6 @@ static struct chunk *expand_heap(size_t n) w = MEM_TO_CHUNK(p); w->csize = n | C_INUSE; - unlock(heap_lock); - return w; } @@ -198,96 +190,44 @@ static void unbin(struct chunk *c, int i) NEXT_CHUNK(c)->psize |= C_INUSE; } -static int alloc_fwd(struct chunk *c) -{ - int i; - size_t k; - while (!((k=c->csize) & C_INUSE)) { - i = bin_index(k); - lock_bin(i); - if (c->csize == k) { - unbin(c, i); - unlock_bin(i); - return 1; - } - unlock_bin(i); - } - return 0; -} - -static int alloc_rev(struct chunk *c) +static void bin_chunk(struct chunk *self, int i) { - int i; - size_t k; - while (!((k=c->psize) & C_INUSE)) { - i = bin_index(k); - lock_bin(i); - if (c->psize == k) { - unbin(PREV_CHUNK(c), i); - unlock_bin(i); - return 1; - } - unlock_bin(i); - } - return 0; + self->next = BIN_TO_CHUNK(i); + self->prev = mal.bins[i].tail; + self->next->prev = self; + self->prev->next = self; + if (self->prev == BIN_TO_CHUNK(i)) + a_or_64(&mal.binmap, 1ULL<= n1 - DONTCARE) return; next = NEXT_CHUNK(self); split = (void *)((char *)self + n); - split->prev = self->prev; - split->next = self->next; - split->prev->next = split; - split->next->prev = split; split->psize = n | C_INUSE; split->csize = n1-n; next->psize = n1-n; self->csize = n | C_INUSE; - return 1; -} -static void trim(struct chunk *self, size_t n) -{ - size_t n1 = CHUNK_SIZE(self); - struct chunk *next, *split; - - if (n >= n1 - DONTCARE) return; + int i = bin_index(n1-n); + lock_bin(i); - next = NEXT_CHUNK(self); - split = (void *)((char *)self + n); - - split->psize = n | C_INUSE; - split->csize = n1-n | C_INUSE; - next->psize = n1-n | C_INUSE; - self->csize = n | C_INUSE; + bin_chunk(split, i); - __bin_chunk(split); + unlock_bin(i); } void *malloc(size_t n) { struct chunk *c; int i, j; + uint64_t mask; if (adjust_size(&n) < 0) return 0; @@ -303,33 +243,37 @@ void *malloc(size_t n) } i = bin_index_up(n); - for (;;) { - uint64_t mask = mal.binmap & -(1ULL<psize = c->csize = - x->csize + CHUNK_SIZE(c); - } - break; + if (i<63 && (mal.binmap & (1ULL<psize; char *base = (char *)self - extra; @@ -408,27 +354,24 @@ void *realloc(void *p, size_t n) /* Crash on corrupted footer (likely from buffer overflow) */ if (next->psize != self->csize) a_crash(); - /* Merge adjacent chunks if we need more space. This is not - * a waste of time even if we fail to get enough space, because our - * subsequent call to free would otherwise have to do the merge. */ - if (n > n1 && alloc_fwd(next)) { - n1 += CHUNK_SIZE(next); - next = NEXT_CHUNK(next); - } - /* FIXME: find what's wrong here and reenable it..? */ - if (0 && n > n1 && alloc_rev(self)) { - self = PREV_CHUNK(self); - n1 += CHUNK_SIZE(self); - } - self->csize = n1 | C_INUSE; - next->psize = n1 | C_INUSE; + lock(mal.split_merge_lock); - /* If we got enough space, split off the excess and return */ - if (n <= n1) { - //memmove(CHUNK_TO_MEM(self), p, n0-OVERHEAD); - trim(self, n); - return CHUNK_TO_MEM(self); + size_t nsize = next->csize & C_INUSE ? 0 : CHUNK_SIZE(next); + if (n0+nsize >= n) { + int i = bin_index(nsize); + lock_bin(i); + if (!(next->csize & C_INUSE)) { + unbin(next, i); + unlock_bin(i); + next = NEXT_CHUNK(next); + self->csize = next->psize = n0+nsize | C_INUSE; + trim(self, n); + unlock(mal.split_merge_lock); + return CHUNK_TO_MEM(self); + } + unlock_bin(i); } + unlock(mal.split_merge_lock); copy_realloc: /* As a last resort, allocate a new chunk and copy to it. */ @@ -443,59 +386,51 @@ void *realloc(void *p, size_t n) void __bin_chunk(struct chunk *self) { struct chunk *next = NEXT_CHUNK(self); - size_t final_size, new_size, size; - int reclaim=0; - int i; - - final_size = new_size = CHUNK_SIZE(self); /* Crash on corrupted footer (likely from buffer overflow) */ if (next->psize != self->csize) a_crash(); - for (;;) { - if (self->psize & next->csize & C_INUSE) { - self->csize = final_size | C_INUSE; - next->psize = final_size | C_INUSE; - i = bin_index(final_size); - lock_bin(i); - lock(mal.free_lock); - if (self->psize & next->csize & C_INUSE) - break; - unlock(mal.free_lock); - unlock_bin(i); - } + lock(mal.split_merge_lock); - if (alloc_rev(self)) { - self = PREV_CHUNK(self); - size = CHUNK_SIZE(self); - final_size += size; - if (new_size+size > RECLAIM && (new_size+size^size) > size) - reclaim = 1; - } + size_t osize = CHUNK_SIZE(self), size = osize; + + /* Since we hold split_merge_lock, only transition from free to + * in-use can race; in-use to free is impossible */ + size_t psize = self->psize & C_INUSE ? 0 : CHUNK_PSIZE(self); + size_t nsize = next->csize & C_INUSE ? 0 : CHUNK_SIZE(next); - if (alloc_fwd(next)) { - size = CHUNK_SIZE(next); - final_size += size; - if (new_size+size > RECLAIM && (new_size+size^size) > size) - reclaim = 1; + if (psize) { + int i = bin_index(psize); + lock_bin(i); + if (!(self->psize & C_INUSE)) { + struct chunk *prev = PREV_CHUNK(self); + unbin(prev, i); + self = prev; + size += psize; + } + unlock_bin(i); + } + if (nsize) { + int i = bin_index(nsize); + lock_bin(i); + if (!(next->csize & C_INUSE)) { + unbin(next, i); next = NEXT_CHUNK(next); + size += nsize; } + unlock_bin(i); } - if (!(mal.binmap & 1ULL<csize = final_size; - next->psize = final_size; - unlock(mal.free_lock); + int i = bin_index(size); + lock_bin(i); - self->next = BIN_TO_CHUNK(i); - self->prev = mal.bins[i].tail; - self->next->prev = self; - self->prev->next = self; + self->csize = size; + next->psize = size; + bin_chunk(self, i); + unlock(mal.split_merge_lock); /* Replace middle of large chunks with fresh zero pages */ - if (reclaim) { + if (size > RECLAIM && (size^(size-osize)) > size-osize) { uintptr_t a = (uintptr_t)self + SIZE_ALIGN+PAGE_SIZE-1 & -PAGE_SIZE; uintptr_t b = (uintptr_t)next - SIZE_ALIGN & -PAGE_SIZE; #if 1 From 38ce1dc1eae40d0c487ae169d5ed075750981e26 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Tue, 2 Jun 2020 20:07:39 -0400 Subject: [PATCH 032/189] fix broken time64 clock_adjtime the 64-bit time code path used the wrong (time32) syscall. fortunately this code path is not yet taken unless attempting to set a post-Y2038 time. --- src/linux/clock_adjtime.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/linux/clock_adjtime.c b/src/linux/clock_adjtime.c index 23eb8729d..52d3e0c43 100644 --- a/src/linux/clock_adjtime.c +++ b/src/linux/clock_adjtime.c @@ -63,7 +63,7 @@ int clock_adjtime (clockid_t clock_id, struct timex *utx) .stbcnt = utx->stbcnt, .tai = utx->tai, }; - r = __syscall(SYS_clock_adjtime, clock_id, &ktx); + r = __syscall(SYS_clock_adjtime64, clock_id, &ktx); if (r>=0) { utx->modes = ktx.modes; utx->offset = ktx.offset; From dbebd3e33d7438f36ca02d5f34b1d21c52d337af Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Tue, 2 Jun 2020 20:19:11 -0400 Subject: [PATCH 033/189] always use time64 syscall first for clock_adjtime clock_adjtime always returns the current clock setting in struct timex, so it's always possible that the time64 version is needed. --- src/linux/clock_adjtime.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/linux/clock_adjtime.c b/src/linux/clock_adjtime.c index 52d3e0c43..94521bfcb 100644 --- a/src/linux/clock_adjtime.c +++ b/src/linux/clock_adjtime.c @@ -38,8 +38,7 @@ int clock_adjtime (clockid_t clock_id, struct timex *utx) { int r = -ENOSYS; #ifdef SYS_clock_adjtime64 - if (SYS_clock_adjtime == SYS_clock_adjtime64 || - (utx->modes & ADJ_SETOFFSET) && !IS32BIT(utx->time.tv_sec)) { + if (1) { struct ktimex64 ktx = { .modes = utx->modes, .offset = utx->offset, From 0ecc5e8b385fbe5bf1b0b2c278693a97548a5955 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Tue, 2 Jun 2020 20:21:48 -0400 Subject: [PATCH 034/189] reformat clock_adjtime with always-true condition removed --- src/linux/clock_adjtime.c | 94 +++++++++++++++++++-------------------- 1 file changed, 46 insertions(+), 48 deletions(-) diff --git a/src/linux/clock_adjtime.c b/src/linux/clock_adjtime.c index 94521bfcb..d4d03d24d 100644 --- a/src/linux/clock_adjtime.c +++ b/src/linux/clock_adjtime.c @@ -38,54 +38,52 @@ int clock_adjtime (clockid_t clock_id, struct timex *utx) { int r = -ENOSYS; #ifdef SYS_clock_adjtime64 - if (1) { - struct ktimex64 ktx = { - .modes = utx->modes, - .offset = utx->offset, - .freq = utx->freq, - .maxerror = utx->maxerror, - .esterror = utx->esterror, - .status = utx->status, - .constant = utx->constant, - .precision = utx->precision, - .tolerance = utx->tolerance, - .time_sec = utx->time.tv_sec, - .time_usec = utx->time.tv_usec, - .tick = utx->tick, - .ppsfreq = utx->ppsfreq, - .jitter = utx->jitter, - .shift = utx->shift, - .stabil = utx->stabil, - .jitcnt = utx->jitcnt, - .calcnt = utx->calcnt, - .errcnt = utx->errcnt, - .stbcnt = utx->stbcnt, - .tai = utx->tai, - }; - r = __syscall(SYS_clock_adjtime64, clock_id, &ktx); - if (r>=0) { - utx->modes = ktx.modes; - utx->offset = ktx.offset; - utx->freq = ktx.freq; - utx->maxerror = ktx.maxerror; - utx->esterror = ktx.esterror; - utx->status = ktx.status; - utx->constant = ktx.constant; - utx->precision = ktx.precision; - utx->tolerance = ktx.tolerance; - utx->time.tv_sec = ktx.time_sec; - utx->time.tv_usec = ktx.time_usec; - utx->tick = ktx.tick; - utx->ppsfreq = ktx.ppsfreq; - utx->jitter = ktx.jitter; - utx->shift = ktx.shift; - utx->stabil = ktx.stabil; - utx->jitcnt = ktx.jitcnt; - utx->calcnt = ktx.calcnt; - utx->errcnt = ktx.errcnt; - utx->stbcnt = ktx.stbcnt; - utx->tai = ktx.tai; - } + struct ktimex64 ktx = { + .modes = utx->modes, + .offset = utx->offset, + .freq = utx->freq, + .maxerror = utx->maxerror, + .esterror = utx->esterror, + .status = utx->status, + .constant = utx->constant, + .precision = utx->precision, + .tolerance = utx->tolerance, + .time_sec = utx->time.tv_sec, + .time_usec = utx->time.tv_usec, + .tick = utx->tick, + .ppsfreq = utx->ppsfreq, + .jitter = utx->jitter, + .shift = utx->shift, + .stabil = utx->stabil, + .jitcnt = utx->jitcnt, + .calcnt = utx->calcnt, + .errcnt = utx->errcnt, + .stbcnt = utx->stbcnt, + .tai = utx->tai, + }; + r = __syscall(SYS_clock_adjtime64, clock_id, &ktx); + if (r>=0) { + utx->modes = ktx.modes; + utx->offset = ktx.offset; + utx->freq = ktx.freq; + utx->maxerror = ktx.maxerror; + utx->esterror = ktx.esterror; + utx->status = ktx.status; + utx->constant = ktx.constant; + utx->precision = ktx.precision; + utx->tolerance = ktx.tolerance; + utx->time.tv_sec = ktx.time_sec; + utx->time.tv_usec = ktx.time_usec; + utx->tick = ktx.tick; + utx->ppsfreq = ktx.ppsfreq; + utx->jitter = ktx.jitter; + utx->shift = ktx.shift; + utx->stabil = ktx.stabil; + utx->jitcnt = ktx.jitcnt; + utx->calcnt = ktx.calcnt; + utx->errcnt = ktx.errcnt; + utx->stbcnt = ktx.stbcnt; + utx->tai = ktx.tai; } if (SYS_clock_adjtime == SYS_clock_adjtime64 || r!=-ENOSYS) return __syscall_ret(r); From 3ab8b15467ba06fdb4cb242309219d731d668296 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Tue, 2 Jun 2020 21:38:25 -0400 Subject: [PATCH 035/189] move declaration of interfaces between malloc and ldso to dynlink.h this eliminates consumers of malloc_impl.h outside of the malloc implementation. --- ldso/dynlink.c | 1 - src/internal/dynlink.h | 3 +++ src/internal/malloc_impl.h | 5 +---- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/ldso/dynlink.c b/ldso/dynlink.c index 6468f203c..f6926919a 100644 --- a/ldso/dynlink.c +++ b/ldso/dynlink.c @@ -23,7 +23,6 @@ #include "pthread_impl.h" #include "libc.h" #include "dynlink.h" -#include "malloc_impl.h" static void error(const char *, ...); diff --git a/src/internal/dynlink.h b/src/internal/dynlink.h index 764e3a1a8..b739add2f 100644 --- a/src/internal/dynlink.h +++ b/src/internal/dynlink.h @@ -105,4 +105,7 @@ hidden void __dl_vseterr(const char *, va_list); hidden ptrdiff_t __tlsdesc_static(), __tlsdesc_dynamic(); +hidden extern int __malloc_replaced; +hidden void __malloc_donate(char *, char *); + #endif diff --git a/src/internal/malloc_impl.h b/src/internal/malloc_impl.h index 59785a7fe..2c61b3ffe 100644 --- a/src/internal/malloc_impl.h +++ b/src/internal/malloc_impl.h @@ -2,11 +2,10 @@ #define MALLOC_IMPL_H #include +#include "dynlink.h" hidden void *__expand_heap(size_t *); -hidden void __malloc_donate(char *, char *); - hidden void *__memalign(size_t, size_t); struct chunk { @@ -41,6 +40,4 @@ struct bin { hidden void __bin_chunk(struct chunk *); -hidden extern int __malloc_replaced; - #endif From 907e46305f839271c13e09f63495b01297ed3a11 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Tue, 2 Jun 2020 21:40:05 -0400 Subject: [PATCH 036/189] move malloc_impl.h from src/internal to src/malloc this reflects that it is no longer intended for consumption outside of the malloc implementation. --- src/{internal => malloc}/malloc_impl.h | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/{internal => malloc}/malloc_impl.h (100%) diff --git a/src/internal/malloc_impl.h b/src/malloc/malloc_impl.h similarity index 100% rename from src/internal/malloc_impl.h rename to src/malloc/malloc_impl.h From 3195b7a6d4ba52cb2ec386c89546bd69c4051306 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Wed, 3 Jun 2020 18:13:18 -0400 Subject: [PATCH 037/189] rewrite bump allocator to fix corner cases, decouple from expand_heap this affects the bump allocator used when static linking in programs that don't need allocation metadata due to not using realloc, free, etc. commit e3bc22f1eff87b8f029a6ab31f1a269d69e4b053 refactored the bump allocator to share code with __expand_heap, used by malloc, for the purpose of fixing the case (mainly nommu) where brk doesn't work. however, the geometric growth behavior of __expand_heap is not actually well-suited to the bump allocator, and can produce significant excessive memory usage. in particular, by repeatedly requesting just over the remaining free space in the current mmap-allocated area, the total mapped memory will be roughly double the nominal usage. and since the main user of the no-brk mmap fallback in the bump allocator is nommu, this excessive usage is not just virtual address space but physical memory. in addition, even on systems with brk, having a unified size request to __expand_heap without knowing whether the brk or mmap backend would get used made it so the brk could be expanded twice as far as needed. for example, with malloc(n) and n-1 bytes available before the current brk, the brk would be expanded by n bytes rounded up to page size, when expansion by just one page would have sufficed. the new implementation computes request size separately for the cases where brk expansion is being attempted vs using mmap, and also performs individual mmap of large allocations without moving to a new bump area and throwing away the rest of the old one. this greatly reduces the need for geometric area size growth and limits the extent to which free space at the end of one bump area might be unusable for future allocations. as a bonus, the resulting code size is somewhat smaller than the combined old version plus __expand_heap. --- src/malloc/lite_malloc.c | 89 ++++++++++++++++++++++++++++++++-------- 1 file changed, 72 insertions(+), 17 deletions(-) diff --git a/src/malloc/lite_malloc.c b/src/malloc/lite_malloc.c index 050d84f64..c3f0c129a 100644 --- a/src/malloc/lite_malloc.c +++ b/src/malloc/lite_malloc.c @@ -2,44 +2,99 @@ #include #include #include +#include +#include "libc.h" #include "lock.h" -#include "malloc_impl.h" +#include "syscall.h" #define ALIGN 16 +/* This function returns true if the interval [old,new] + * intersects the 'len'-sized interval below &libc.auxv + * (interpreted as the main-thread stack) or below &b + * (the current stack). It is used to defend against + * buggy brk implementations that can cross the stack. */ + +static int traverses_stack_p(uintptr_t old, uintptr_t new) +{ + const uintptr_t len = 8<<20; + uintptr_t a, b; + + b = (uintptr_t)libc.auxv; + a = b > len ? b-len : 0; + if (new>a && old len ? b-len : 0; + if (new>a && old SIZE_MAX/2) { + errno = ENOMEM; + return 0; + } + if (!n) n++; while (align end-cur) { - size_t m = n; - char *new = __expand_heap(&m); - if (!new) { - UNLOCK(lock); - return 0; + size_t req = n - (end-cur) + PAGE_SIZE-1 & -PAGE_SIZE; + + if (!cur) { + brk = __syscall(SYS_brk, 0); + brk += -brk & PAGE_SIZE-1; + cur = end = brk; } - if (new != end) { - cur = new; - n -= pad; - pad = 0; + + if (brk == end && req < SIZE_MAX-brk + && !traverses_stack_p(brk, brk+req) + && __syscall(SYS_brk, brk+req)==brk+req) { + brk = end += req; + } else { + int new_area = 0; + req = n + PAGE_SIZE-1 & -PAGE_SIZE; + /* Only make a new area rather than individual mmap + * if wasted space would be over 1/8 of the map. */ + if (req-n > req/8) { + /* Geometric area size growth up to 64 pages, + * bounding waste by 1/8 of the area. */ + size_t min = PAGE_SIZE<<(mmap_step/2); + if (min-n > end-cur) { + if (req < min) { + req = min; + if (mmap_step < 12) + mmap_step++; + } + new_area = 1; + } + } + void *mem = __mmap(0, req, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (mem == MAP_FAILED || !new_area) { + UNLOCK(lock); + return mem==MAP_FAILED ? 0 : mem; + } + cur = (uintptr_t)mem; + end = cur + req; } - end = new + m; } - p = cur + pad; + p = (void *)cur; cur += n; UNLOCK(lock); return p; From ee4c742bf97fee596e189e2f01b640a260c32b1c Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Wed, 3 Jun 2020 18:51:21 -0400 Subject: [PATCH 038/189] remove stale document from malloc src directory this was an unfinished draft document present since the initial check-in, that was never intended to ship in its current form. remove it as part of reorganizing for replacement of the allocator. --- src/malloc/DESIGN | 22 ---------------------- 1 file changed, 22 deletions(-) delete mode 100644 src/malloc/DESIGN diff --git a/src/malloc/DESIGN b/src/malloc/DESIGN deleted file mode 100644 index 58b0523ff..000000000 --- a/src/malloc/DESIGN +++ /dev/null @@ -1,22 +0,0 @@ - - -In principle, this memory allocator is roughly equivalent to Doug -Lea's dlmalloc with fine-grained locking. - - - -malloc: - -Uses a freelist binned by chunk size, with a bitmap to optimize -searching for the smallest non-empty bin which can satisfy an -allocation. If no free chunks are available, it creates a new chunk of -the requested size and attempts to merge it with any existing free -chunk immediately below the newly created chunk. - -Whether the chunk was obtained from a bin or newly created, it's -likely to be larger than the requested allocation. malloc always -finishes its work by passing the new chunk to realloc, which will -split it into two chunks and free the tail portion. - - - From 3244d64f1544b5551e5a7e1543be672db6652fc1 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Wed, 3 Jun 2020 19:07:38 -0400 Subject: [PATCH 039/189] rename aligned_alloc source file this is the first step of swapping the name of the actual implementation to aligned_alloc while preserving history follow. --- src/malloc/{aligned_alloc.c => memalign_altname.c} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/malloc/{aligned_alloc.c => memalign_altname.c} (100%) diff --git a/src/malloc/aligned_alloc.c b/src/malloc/memalign_altname.c similarity index 100% rename from src/malloc/aligned_alloc.c rename to src/malloc/memalign_altname.c From 33d1a73340a5b2d981838238ebf0868e7b0ae52c Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Wed, 3 Jun 2020 19:11:23 -0400 Subject: [PATCH 040/189] reverse dependency order of memalign and aligned_alloc this change eliminates the internal __memalign function and makes the memalign and posix_memalign functions completely independent of the malloc implementation, written portably in terms of aligned_alloc. --- src/malloc/malloc_impl.h | 2 -- src/malloc/memalign.c | 4 +--- src/malloc/memalign_altname.c | 6 +++--- src/malloc/posix_memalign.c | 3 +-- 4 files changed, 5 insertions(+), 10 deletions(-) diff --git a/src/malloc/malloc_impl.h b/src/malloc/malloc_impl.h index 2c61b3ffe..6a92c1c6b 100644 --- a/src/malloc/malloc_impl.h +++ b/src/malloc/malloc_impl.h @@ -6,8 +6,6 @@ hidden void *__expand_heap(size_t *); -hidden void *__memalign(size_t, size_t); - struct chunk { size_t psize, csize; struct chunk *next, *prev; diff --git a/src/malloc/memalign.c b/src/malloc/memalign.c index cf9dfbda6..e06c76ed0 100644 --- a/src/malloc/memalign.c +++ b/src/malloc/memalign.c @@ -3,7 +3,7 @@ #include #include "malloc_impl.h" -void *__memalign(size_t align, size_t len) +void *aligned_alloc(size_t align, size_t len) { unsigned char *mem, *new; @@ -50,5 +50,3 @@ void *__memalign(size_t align, size_t len) __bin_chunk(c); return new; } - -weak_alias(__memalign, memalign); diff --git a/src/malloc/memalign_altname.c b/src/malloc/memalign_altname.c index b6143f303..32cd87d81 100644 --- a/src/malloc/memalign_altname.c +++ b/src/malloc/memalign_altname.c @@ -1,7 +1,7 @@ +#define _BSD_SOURCE #include -#include "malloc_impl.h" -void *aligned_alloc(size_t align, size_t len) +void *memalign(size_t align, size_t len) { - return __memalign(align, len); + return aligned_alloc(align, len); } diff --git a/src/malloc/posix_memalign.c b/src/malloc/posix_memalign.c index 2ea8bd8a4..ad4d8f473 100644 --- a/src/malloc/posix_memalign.c +++ b/src/malloc/posix_memalign.c @@ -1,11 +1,10 @@ #include #include -#include "malloc_impl.h" int posix_memalign(void **res, size_t align, size_t len) { if (align < sizeof(void *)) return EINVAL; - void *mem = __memalign(align, len); + void *mem = aligned_alloc(align, len); if (!mem) return errno; *res = mem; return 0; From b30b9ec9d93dad3bea99a7147a34daff9f8b031e Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Wed, 3 Jun 2020 19:13:11 -0400 Subject: [PATCH 041/189] rename aligned_alloc source file back to its proper name --- src/malloc/{memalign.c => aligned_alloc.c} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/malloc/{memalign.c => aligned_alloc.c} (100%) diff --git a/src/malloc/memalign.c b/src/malloc/aligned_alloc.c similarity index 100% rename from src/malloc/memalign.c rename to src/malloc/aligned_alloc.c From 61182202582494b37d5aa88d8028b8067bfd83df Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Wed, 3 Jun 2020 19:13:40 -0400 Subject: [PATCH 042/189] rename memalign source file back to its proper name --- src/malloc/{memalign_altname.c => memalign.c} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/malloc/{memalign_altname.c => memalign.c} (100%) diff --git a/src/malloc/memalign_altname.c b/src/malloc/memalign.c similarity index 100% rename from src/malloc/memalign_altname.c rename to src/malloc/memalign.c From d46d977a45d38e631d2ea531270e9cc927b46e32 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Wed, 3 Jun 2020 19:17:19 -0400 Subject: [PATCH 043/189] move __expand_heap into malloc.c this function is no longer used elsewhere, and moving it reduces the number of source files specific to the malloc implementation. --- src/malloc/expand_heap.c | 71 ---------------------------------------- src/malloc/malloc.c | 64 ++++++++++++++++++++++++++++++++++++ src/malloc/malloc_impl.h | 2 -- 3 files changed, 64 insertions(+), 73 deletions(-) delete mode 100644 src/malloc/expand_heap.c diff --git a/src/malloc/expand_heap.c b/src/malloc/expand_heap.c deleted file mode 100644 index e6a3d7a00..000000000 --- a/src/malloc/expand_heap.c +++ /dev/null @@ -1,71 +0,0 @@ -#include -#include -#include -#include -#include "libc.h" -#include "syscall.h" -#include "malloc_impl.h" - -/* This function returns true if the interval [old,new] - * intersects the 'len'-sized interval below &libc.auxv - * (interpreted as the main-thread stack) or below &b - * (the current stack). It is used to defend against - * buggy brk implementations that can cross the stack. */ - -static int traverses_stack_p(uintptr_t old, uintptr_t new) -{ - const uintptr_t len = 8<<20; - uintptr_t a, b; - - b = (uintptr_t)libc.auxv; - a = b > len ? b-len : 0; - if (new>a && old len ? b-len : 0; - if (new>a && old SIZE_MAX/2 - PAGE_SIZE) { - errno = ENOMEM; - return 0; - } - n += -n & PAGE_SIZE-1; - - if (!brk) { - brk = __syscall(SYS_brk, 0); - brk += -brk & PAGE_SIZE-1; - } - - if (n < SIZE_MAX-brk && !traverses_stack_p(brk, brk+n) - && __syscall(SYS_brk, brk+n)==brk+n) { - *pn = n; - brk += n; - return (void *)(brk-n); - } - - size_t min = (size_t)PAGE_SIZE << mmap_step/2; - if (n < min) n = min; - void *area = __mmap(0, n, PROT_READ|PROT_WRITE, - MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); - if (area == MAP_FAILED) return 0; - *pn = n; - mmap_step++; - return area; -} diff --git a/src/malloc/malloc.c b/src/malloc/malloc.c index 20598ec3a..df3ea1bec 100644 --- a/src/malloc/malloc.c +++ b/src/malloc/malloc.c @@ -126,6 +126,70 @@ void __dump_heap(int x) } #endif +/* This function returns true if the interval [old,new] + * intersects the 'len'-sized interval below &libc.auxv + * (interpreted as the main-thread stack) or below &b + * (the current stack). It is used to defend against + * buggy brk implementations that can cross the stack. */ + +static int traverses_stack_p(uintptr_t old, uintptr_t new) +{ + const uintptr_t len = 8<<20; + uintptr_t a, b; + + b = (uintptr_t)libc.auxv; + a = b > len ? b-len : 0; + if (new>a && old len ? b-len : 0; + if (new>a && old SIZE_MAX/2 - PAGE_SIZE) { + errno = ENOMEM; + return 0; + } + n += -n & PAGE_SIZE-1; + + if (!brk) { + brk = __syscall(SYS_brk, 0); + brk += -brk & PAGE_SIZE-1; + } + + if (n < SIZE_MAX-brk && !traverses_stack_p(brk, brk+n) + && __syscall(SYS_brk, brk+n)==brk+n) { + *pn = n; + brk += n; + return (void *)(brk-n); + } + + size_t min = (size_t)PAGE_SIZE << mmap_step/2; + if (n < min) n = min; + void *area = __mmap(0, n, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (area == MAP_FAILED) return 0; + *pn = n; + mmap_step++; + return area; +} + static struct chunk *expand_heap(size_t n) { static void *end; diff --git a/src/malloc/malloc_impl.h b/src/malloc/malloc_impl.h index 6a92c1c6b..e1cf4774c 100644 --- a/src/malloc/malloc_impl.h +++ b/src/malloc/malloc_impl.h @@ -4,8 +4,6 @@ #include #include "dynlink.h" -hidden void *__expand_heap(size_t *); - struct chunk { size_t psize, csize; struct chunk *next, *prev; From a453ed516d289610e4f5fbc0cf2dcdbe6af3ced1 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Wed, 3 Jun 2020 19:22:12 -0400 Subject: [PATCH 044/189] move oldmalloc to its own directory under src/malloc this sets the stage for replacement, and makes it practical to keep oldmalloc around as a build option for a while if that ends up being useful. only the files which are actually part of the implementation are moved. memalign and posix_memalign are entirely generic. in theory calloc could be pulled out too, but it's useful to have it tied to the implementation so as to optimize out unnecessary memset when implementation details make it possible to know the memory is already clear. --- Makefile | 3 ++- src/malloc/{ => oldmalloc}/aligned_alloc.c | 0 src/malloc/{ => oldmalloc}/malloc.c | 0 src/malloc/{ => oldmalloc}/malloc_impl.h | 0 src/malloc/{ => oldmalloc}/malloc_usable_size.c | 0 5 files changed, 2 insertions(+), 1 deletion(-) rename src/malloc/{ => oldmalloc}/aligned_alloc.c (100%) rename src/malloc/{ => oldmalloc}/malloc.c (100%) rename src/malloc/{ => oldmalloc}/malloc_impl.h (100%) rename src/malloc/{ => oldmalloc}/malloc_usable_size.c (100%) diff --git a/Makefile b/Makefile index bd8f5c389..3d3e3622b 100644 --- a/Makefile +++ b/Makefile @@ -17,7 +17,8 @@ includedir = $(prefix)/include libdir = $(prefix)/lib syslibdir = /lib -SRC_DIRS = $(addprefix $(srcdir)/,src/* crt ldso $(COMPAT_SRC_DIRS)) +MALLOC_DIR = oldmalloc +SRC_DIRS = $(addprefix $(srcdir)/,src/* src/malloc/$(MALLOC_DIR) crt ldso $(COMPAT_SRC_DIRS)) BASE_GLOBS = $(addsuffix /*.c,$(SRC_DIRS)) ARCH_GLOBS = $(addsuffix /$(ARCH)/*.[csS],$(SRC_DIRS)) BASE_SRCS = $(sort $(wildcard $(BASE_GLOBS))) diff --git a/src/malloc/aligned_alloc.c b/src/malloc/oldmalloc/aligned_alloc.c similarity index 100% rename from src/malloc/aligned_alloc.c rename to src/malloc/oldmalloc/aligned_alloc.c diff --git a/src/malloc/malloc.c b/src/malloc/oldmalloc/malloc.c similarity index 100% rename from src/malloc/malloc.c rename to src/malloc/oldmalloc/malloc.c diff --git a/src/malloc/malloc_impl.h b/src/malloc/oldmalloc/malloc_impl.h similarity index 100% rename from src/malloc/malloc_impl.h rename to src/malloc/oldmalloc/malloc_impl.h diff --git a/src/malloc/malloc_usable_size.c b/src/malloc/oldmalloc/malloc_usable_size.c similarity index 100% rename from src/malloc/malloc_usable_size.c rename to src/malloc/oldmalloc/malloc_usable_size.c From 36754b66e1051214366cf629a47bad9c660059ed Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Wed, 10 Jun 2020 19:41:27 -0400 Subject: [PATCH 045/189] switch to a common calloc implementation abstractly, calloc is completely malloc-implementation-independent; it's malloc followed by memset, or as we do it, a "conditional memset" that avoids touching fresh zero pages. previously, calloc was kept separate for the bump allocator, which can always skip memset, and the version of calloc provided with the full malloc conditionally skipped the clearing for large direct-mmapped allocations. the latter is a moderately attractive optimization, and can be added back if needed. however, further consideration to make it correct under malloc replacement would be needed. commit b4b1e10364c8737a632be61582e05a8d3acf5690 documented the contract for malloc replacement as allowing omission of calloc, and indeed that worked for dynamic linking, but for static linking it was possible to get the non-clearing definition from the bump allocator; if not for that, it would have been a link error trying to pull in malloc.o. the conditional-clearing code for the new common calloc is taken from mal0_clear in oldmalloc, but drops the need to access actual page size and just uses a fixed value of 4096. this avoids potentially needing access to global data for the sake of an optimization that at best marginally helps archs with offensively-large page sizes. --- src/malloc/calloc.c | 37 +++++++++++++++++++++++++++++++++++ src/malloc/lite_malloc.c | 11 ----------- src/malloc/oldmalloc/malloc.c | 36 ---------------------------------- 3 files changed, 37 insertions(+), 47 deletions(-) create mode 100644 src/malloc/calloc.c diff --git a/src/malloc/calloc.c b/src/malloc/calloc.c new file mode 100644 index 000000000..322193ca8 --- /dev/null +++ b/src/malloc/calloc.c @@ -0,0 +1,37 @@ +#include +#include +#include +#include + +static size_t mal0_clear(char *p, size_t n) +{ + const size_t pagesz = 4096; /* arbitrary */ + if (n < pagesz) return n; +#ifdef __GNUC__ + typedef uint64_t __attribute__((__may_alias__)) T; +#else + typedef unsigned char T; +#endif + char *pp = p + n; + size_t i = (uintptr_t)pp & (pagesz - 1); + for (;;) { + pp = memset(pp - i, 0, i); + if (pp - p < pagesz) return pp - p; + for (i = pagesz; i; i -= 2*sizeof(T), pp -= 2*sizeof(T)) + if (((T *)pp)[-1] | ((T *)pp)[-2]) + break; + } +} + +void *calloc(size_t m, size_t n) +{ + if (n && m > (size_t)-1/n) { + errno = ENOMEM; + return 0; + } + n *= m; + void *p = malloc(n); + if (!p) return p; + n = mal0_clear(p, n); + return memset(p, 0, n); +} diff --git a/src/malloc/lite_malloc.c b/src/malloc/lite_malloc.c index c3f0c129a..f8931ba59 100644 --- a/src/malloc/lite_malloc.c +++ b/src/malloc/lite_malloc.c @@ -101,14 +101,3 @@ static void *__simple_malloc(size_t n) } weak_alias(__simple_malloc, malloc); - -static void *__simple_calloc(size_t m, size_t n) -{ - if (n && m > (size_t)-1/n) { - errno = ENOMEM; - return 0; - } - return __simple_malloc(n * m); -} - -weak_alias(__simple_calloc, calloc); diff --git a/src/malloc/oldmalloc/malloc.c b/src/malloc/oldmalloc/malloc.c index df3ea1bec..afa75722b 100644 --- a/src/malloc/oldmalloc/malloc.c +++ b/src/malloc/oldmalloc/malloc.c @@ -341,42 +341,6 @@ void *malloc(size_t n) return CHUNK_TO_MEM(c); } -static size_t mal0_clear(char *p, size_t pagesz, size_t n) -{ -#ifdef __GNUC__ - typedef uint64_t __attribute__((__may_alias__)) T; -#else - typedef unsigned char T; -#endif - char *pp = p + n; - size_t i = (uintptr_t)pp & (pagesz - 1); - for (;;) { - pp = memset(pp - i, 0, i); - if (pp - p < pagesz) return pp - p; - for (i = pagesz; i; i -= 2*sizeof(T), pp -= 2*sizeof(T)) - if (((T *)pp)[-1] | ((T *)pp)[-2]) - break; - } -} - -void *calloc(size_t m, size_t n) -{ - if (n && m > (size_t)-1/n) { - errno = ENOMEM; - return 0; - } - n *= m; - void *p = malloc(n); - if (!p) return p; - if (!__malloc_replaced) { - if (IS_MMAPPED(MEM_TO_CHUNK(p))) - return p; - if (n >= PAGE_SIZE) - n = mal0_clear(p, PAGE_SIZE, n); - } - return memset(p, 0, n); -} - void *realloc(void *p, size_t n) { struct chunk *self, *next; From f3752e3abbba2c322cf48c3298c7449081d0f6cd Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Wed, 10 Jun 2020 20:42:54 -0400 Subject: [PATCH 046/189] move __malloc_replaced to a top-level malloc file it's not part of the malloc implementation but glue with musl dynamic linker. --- src/malloc/oldmalloc/malloc.c | 2 -- src/malloc/replaced.c | 3 +++ 2 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 src/malloc/replaced.c diff --git a/src/malloc/oldmalloc/malloc.c b/src/malloc/oldmalloc/malloc.c index afa75722b..1c6b07eca 100644 --- a/src/malloc/oldmalloc/malloc.c +++ b/src/malloc/oldmalloc/malloc.c @@ -20,8 +20,6 @@ static struct { volatile int split_merge_lock[2]; } mal; -int __malloc_replaced; - /* Synchronization tools */ static inline void lock(volatile int *lk) diff --git a/src/malloc/replaced.c b/src/malloc/replaced.c new file mode 100644 index 000000000..8acc249c8 --- /dev/null +++ b/src/malloc/replaced.c @@ -0,0 +1,3 @@ +#include "dynlink.h" + +int __malloc_replaced; From 4c239d1264423af28cd05d88df6502b6563a05fd Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Wed, 10 Jun 2020 20:44:51 -0400 Subject: [PATCH 047/189] reintroduce calloc elison of memset for direct-mmapped allocations a new weak predicate function replacable by the malloc implementation, __malloc_allzerop, is introduced. by default it's always false; the default version will be used when static linking if the bump allocator was used (in which case performance doesn't matter) or if malloc was replaced by the application. only if the real internal malloc is linked (always the case with dynamic linking) does the real version get used. if malloc was replaced dynamically, as indicated by __malloc_replaced, the predicate function is ignored and conditional-memset is always performed. --- src/internal/dynlink.h | 1 + src/malloc/calloc.c | 10 +++++++++- src/malloc/oldmalloc/malloc.c | 5 +++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/src/internal/dynlink.h b/src/internal/dynlink.h index b739add2f..78baa0804 100644 --- a/src/internal/dynlink.h +++ b/src/internal/dynlink.h @@ -107,5 +107,6 @@ hidden ptrdiff_t __tlsdesc_static(), __tlsdesc_dynamic(); hidden extern int __malloc_replaced; hidden void __malloc_donate(char *, char *); +hidden int __malloc_allzerop(void *); #endif diff --git a/src/malloc/calloc.c b/src/malloc/calloc.c index 322193ca8..bf6bddca3 100644 --- a/src/malloc/calloc.c +++ b/src/malloc/calloc.c @@ -2,6 +2,7 @@ #include #include #include +#include "dynlink.h" static size_t mal0_clear(char *p, size_t n) { @@ -23,6 +24,12 @@ static size_t mal0_clear(char *p, size_t n) } } +static int allzerop(void *p) +{ + return 0; +} +weak_alias(allzerop, __malloc_allzerop); + void *calloc(size_t m, size_t n) { if (n && m > (size_t)-1/n) { @@ -31,7 +38,8 @@ void *calloc(size_t m, size_t n) } n *= m; void *p = malloc(n); - if (!p) return p; + if (!p || (!__malloc_replaced && __malloc_allzerop(p))) + return p; n = mal0_clear(p, n); return memset(p, 0, n); } diff --git a/src/malloc/oldmalloc/malloc.c b/src/malloc/oldmalloc/malloc.c index 1c6b07eca..0a38690c6 100644 --- a/src/malloc/oldmalloc/malloc.c +++ b/src/malloc/oldmalloc/malloc.c @@ -339,6 +339,11 @@ void *malloc(size_t n) return CHUNK_TO_MEM(c); } +int __malloc_allzerop(void *p) +{ + return IS_MMAPPED(MEM_TO_CHUNK(p)); +} + void *realloc(void *p, size_t n) { struct chunk *self, *next; From b6064d1bd29bd5893a5bf1dfe589edccb2150841 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Wed, 10 Jun 2020 22:02:45 -0400 Subject: [PATCH 048/189] have ldso track replacement of aligned_alloc this is in preparation for improving behavior of malloc interposition. --- ldso/dynlink.c | 2 ++ src/internal/dynlink.h | 1 + src/malloc/replaced.c | 1 + 3 files changed, 4 insertions(+) diff --git a/ldso/dynlink.c b/ldso/dynlink.c index f6926919a..d3d4ddd28 100644 --- a/ldso/dynlink.c +++ b/ldso/dynlink.c @@ -1935,6 +1935,8 @@ void __dls3(size_t *sp, size_t *auxv) * possibility of incomplete replacement. */ if (find_sym(head, "malloc", 1).dso != &ldso) __malloc_replaced = 1; + if (find_sym(head, "aligned_alloc", 1).dso != &ldso) + __aligned_alloc_replaced = 1; /* Switch to runtime mode: any further failures in the dynamic * linker are a reportable failure rather than a fatal startup diff --git a/src/internal/dynlink.h b/src/internal/dynlink.h index 78baa0804..51c0639ff 100644 --- a/src/internal/dynlink.h +++ b/src/internal/dynlink.h @@ -106,6 +106,7 @@ hidden void __dl_vseterr(const char *, va_list); hidden ptrdiff_t __tlsdesc_static(), __tlsdesc_dynamic(); hidden extern int __malloc_replaced; +hidden extern int __aligned_alloc_replaced; hidden void __malloc_donate(char *, char *); hidden int __malloc_allzerop(void *); diff --git a/src/malloc/replaced.c b/src/malloc/replaced.c index 8acc249c8..07fce61ec 100644 --- a/src/malloc/replaced.c +++ b/src/malloc/replaced.c @@ -1,3 +1,4 @@ #include "dynlink.h" int __malloc_replaced; +int __aligned_alloc_replaced; From 8a48f26066d7dfe641b3accaca791d1a48219f27 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Wed, 10 Jun 2020 22:05:03 -0400 Subject: [PATCH 049/189] only disable aligned_alloc if malloc was replaced but it wasn't it both malloc and aligned_alloc have been replaced but the internal aligned_alloc still gets called, the replacement is a wrapper of some sort. it's not clear if this usage should be officially supported, but it's at least a plausibly interesting debugging usage, and easy to do. it should not be relied upon unless it's documented as supported at some later time. --- src/malloc/oldmalloc/aligned_alloc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/malloc/oldmalloc/aligned_alloc.c b/src/malloc/oldmalloc/aligned_alloc.c index e06c76ed0..4adca3b4f 100644 --- a/src/malloc/oldmalloc/aligned_alloc.c +++ b/src/malloc/oldmalloc/aligned_alloc.c @@ -12,7 +12,8 @@ void *aligned_alloc(size_t align, size_t len) return 0; } - if (len > SIZE_MAX - align || __malloc_replaced) { + if (len > SIZE_MAX - align || + (__malloc_replaced && !__aligned_alloc_replaced)) { errno = ENOMEM; return 0; } From f7e62dcae2418e6e4610bad82ff251095eb4d73c Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Thu, 11 Jun 2020 00:12:48 -0400 Subject: [PATCH 050/189] add fallback a_clz_32 implementation some archs already have a_clz_32, used to provide a_ctz_32, but it hasn't been mandatory because it's not used anywhere yet. mallocng will need it, however, so add it now. it should probably be optimized better, but doesn't seem to make a difference at present. --- src/internal/atomic.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/internal/atomic.h b/src/internal/atomic.h index f938879b0..99539cc03 100644 --- a/src/internal/atomic.h +++ b/src/internal/atomic.h @@ -315,4 +315,19 @@ static inline int a_clz_64(uint64_t x) } #endif +#ifndef a_clz_32 +#define a_clz_32 a_clz_32 +static inline int a_clz_32(uint32_t x) +{ + x--; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + x++; + return 31-a_ctz_32(x); +} +#endif + #endif From 758023bba74204d2c9cf2245d4f771ff1b058b0a Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Mon, 15 Jun 2020 18:59:59 -0400 Subject: [PATCH 051/189] fix invalid use of access function in nftw access always computes result with real ids not effective ones, so it is not a valid means of determining whether the directory is readable. instead, attempt to open it before reporting whether it's readable, and then use fdopendir rather than opendir to open and read the entries. effort is made here to keep fd_limit behavior the same as before even if it was not correct. --- src/misc/nftw.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/src/misc/nftw.c b/src/misc/nftw.c index 0a4641007..8dcff7fef 100644 --- a/src/misc/nftw.c +++ b/src/misc/nftw.c @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -26,6 +27,8 @@ static int do_nftw(char *path, int (*fn)(const char *, const struct stat *, int, struct history new; int type; int r; + int dfd; + int err; struct FTW lev; if ((flags & FTW_PHYS) ? lstat(path, &st) : stat(path, &st) < 0) { @@ -34,8 +37,7 @@ static int do_nftw(char *path, int (*fn)(const char *, const struct stat *, int, else if (errno != EACCES) return -1; else type = FTW_NS; } else if (S_ISDIR(st.st_mode)) { - if (access(path, R_OK) < 0) type = FTW_DNR; - else if (flags & FTW_DEPTH) type = FTW_DP; + if (flags & FTW_DEPTH) type = FTW_DP; else type = FTW_D; } else if (S_ISLNK(st.st_mode)) { if (flags & FTW_PHYS) type = FTW_SL; @@ -63,6 +65,13 @@ static int do_nftw(char *path, int (*fn)(const char *, const struct stat *, int, lev.base = k; } + if (type == FTW_D || type == FTW_DP) { + dfd = open(path, O_RDONLY); + err = errno; + if (dfd < 0 && err == EACCES) type = FTW_DNR; + if (!fd_limit) close(dfd); + } + if (!(flags & FTW_DEPTH) && (r=fn(path, &st, type, &lev))) return r; @@ -71,7 +80,11 @@ static int do_nftw(char *path, int (*fn)(const char *, const struct stat *, int, return 0; if ((type == FTW_D || type == FTW_DP) && fd_limit) { - DIR *d = opendir(path); + if (dfd < 0) { + errno = err; + return -1; + } + DIR *d = fdopendir(dfd); if (d) { struct dirent *de; while ((de = readdir(d))) { @@ -92,7 +105,8 @@ static int do_nftw(char *path, int (*fn)(const char *, const struct stat *, int, } } closedir(d); - } else if (errno != EACCES) { + } else { + close(dfd); return -1; } } From 527a0c0d1b109144adfd55177ba2a279de8492b2 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Tue, 16 Jun 2020 00:34:12 -0400 Subject: [PATCH 052/189] fix memset overflow in oldmalloc race fix overhaul commit 3e16313f8fe2ed143ae0267fd79d63014c24779f introduced this bug by making the copy case reachable with n (new size) smaller than n0 (original size). this was left as the only way of shrinking an allocation because it reduces fragmentation if a free chunk of the appropriate size is available. when that's not the case, another approach may be better, but any such improvement would be independent of fixing this bug. --- src/malloc/oldmalloc/malloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/malloc/oldmalloc/malloc.c b/src/malloc/oldmalloc/malloc.c index 0a38690c6..52af19759 100644 --- a/src/malloc/oldmalloc/malloc.c +++ b/src/malloc/oldmalloc/malloc.c @@ -409,7 +409,7 @@ void *realloc(void *p, size_t n) new = malloc(n-OVERHEAD); if (!new) return 0; copy_free_ret: - memcpy(new, p, n0-OVERHEAD); + memcpy(new, p, (n Date: Tue, 16 Jun 2020 00:53:57 -0400 Subject: [PATCH 053/189] only use memcpy realloc to shrink if an exact-sized free chunk exists otherwise, shrink in-place. as explained in the description of commit 3e16313f8fe2ed143ae0267fd79d63014c24779f, the split here is valid without holding split_merge_lock because all chunks involved are in the in-use state. --- src/malloc/oldmalloc/malloc.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/malloc/oldmalloc/malloc.c b/src/malloc/oldmalloc/malloc.c index 52af19759..c0997ad85 100644 --- a/src/malloc/oldmalloc/malloc.c +++ b/src/malloc/oldmalloc/malloc.c @@ -385,6 +385,18 @@ void *realloc(void *p, size_t n) /* Crash on corrupted footer (likely from buffer overflow) */ if (next->psize != self->csize) a_crash(); + if (n < n0) { + int i = bin_index_up(n); + int j = bin_index(n0); + if (icsize = split->psize = n | C_INUSE; + split->csize = next->psize = n0-n | C_INUSE; + __bin_chunk(split); + return CHUNK_TO_MEM(self); + } + lock(mal.split_merge_lock); size_t nsize = next->csize & C_INUSE ? 0 : CHUNK_SIZE(next); From 17a9f883ee1ccaab687eb08704fdb16a81c6c3f6 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Sun, 21 Jun 2020 02:15:50 -0400 Subject: [PATCH 054/189] clear need_locks in child after fork the child is single-threaded, but may still need to synchronize with last changes made to memory by another thread in the parent, so set need_locks to -1 whereby the next lock-taker will drop to 0 and prevent further barriers/locking. --- src/process/fork.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/process/fork.c b/src/process/fork.c index fb42478ae..7e984ff8c 100644 --- a/src/process/fork.c +++ b/src/process/fork.c @@ -30,6 +30,7 @@ pid_t fork(void) self->next = self->prev = self; __thread_list_lock = 0; libc.threads_minus_1 = 0; + if (libc.need_locks) libc.need_locks = -1; } __restore_sigs(&set); __fork_handler(!ret); From 4c450b310036f8ab0215abbeb712b63ac568f58c Mon Sep 17 00:00:00 2001 From: Andre McCurdy Date: Tue, 21 Jan 2020 10:52:15 -0800 Subject: [PATCH 055/189] add big-endian support to ARM assembler memcpy Allow the existing ARM assembler memcpy implementation to be used for both big and little endian targets. --- COPYRIGHT | 2 +- src/string/arm/{memcpy_le.S => memcpy.S} | 101 ++++++++++++++++++++++- src/string/arm/memcpy.c | 3 - 3 files changed, 98 insertions(+), 8 deletions(-) rename src/string/arm/{memcpy_le.S => memcpy.S} (82%) delete mode 100644 src/string/arm/memcpy.c diff --git a/COPYRIGHT b/COPYRIGHT index e64723714..d3edc2a2d 100644 --- a/COPYRIGHT +++ b/COPYRIGHT @@ -127,7 +127,7 @@ Copyright © 2017-2018 Arm Limited and labelled as such in comments in the individual source files. All have been licensed under extremely permissive terms. -The ARM memcpy code (src/string/arm/memcpy_el.S) is Copyright © 2008 +The ARM memcpy code (src/string/arm/memcpy.S) is Copyright © 2008 The Android Open Source Project and is licensed under a two-clause BSD license. It was taken from Bionic libc, used on Android. diff --git a/src/string/arm/memcpy_le.S b/src/string/arm/memcpy.S similarity index 82% rename from src/string/arm/memcpy_le.S rename to src/string/arm/memcpy.S index 7b35d305e..869e34481 100644 --- a/src/string/arm/memcpy_le.S +++ b/src/string/arm/memcpy.S @@ -1,5 +1,3 @@ -#if !__ARMEB__ - /* * Copyright (C) 2008 The Android Open Source Project * All rights reserved. @@ -42,7 +40,7 @@ * code safely callable from thumb mode, adjusting the return * instructions to be compatible with pre-thumb ARM cpus, removal of * prefetch code that is not compatible with older cpus and support for - * building as thumb 2. + * building as thumb 2 and big-endian. */ .syntax unified @@ -227,24 +225,45 @@ non_congruent: * becomes aligned to 32 bits (r5 = nb of words to copy for alignment) */ movs r5, r5, lsl #31 + +#if __ARMEB__ + movmi r3, r3, ror #24 + strbmi r3, [r0], #1 + movcs r3, r3, ror #24 + strbcs r3, [r0], #1 + movcs r3, r3, ror #24 + strbcs r3, [r0], #1 +#else strbmi r3, [r0], #1 movmi r3, r3, lsr #8 strbcs r3, [r0], #1 movcs r3, r3, lsr #8 strbcs r3, [r0], #1 movcs r3, r3, lsr #8 +#endif cmp r2, #4 blo partial_word_tail +#if __ARMEB__ + mov r3, r3, lsr r12 + mov r3, r3, lsl r12 +#endif + /* Align destination to 32 bytes (cache line boundary) */ 1: tst r0, #0x1c beq 2f ldr r5, [r1], #4 sub r2, r2, #4 +#if __ARMEB__ + mov r4, r5, lsr lr + orr r4, r4, r3 + mov r3, r5, lsl r12 +#else mov r4, r5, lsl lr orr r4, r4, r3 mov r3, r5, lsr r12 +#endif str r4, [r0], #4 cmp r2, #4 bhs 1b @@ -270,6 +289,25 @@ loop16: ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} subs r2, r2, #32 ldrhs r12, [r1], #4 +#if __ARMEB__ + orr r3, r3, r4, lsr #16 + mov r4, r4, lsl #16 + orr r4, r4, r5, lsr #16 + mov r5, r5, lsl #16 + orr r5, r5, r6, lsr #16 + mov r6, r6, lsl #16 + orr r6, r6, r7, lsr #16 + mov r7, r7, lsl #16 + orr r7, r7, r8, lsr #16 + mov r8, r8, lsl #16 + orr r8, r8, r9, lsr #16 + mov r9, r9, lsl #16 + orr r9, r9, r10, lsr #16 + mov r10, r10, lsl #16 + orr r10, r10, r11, lsr #16 + stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} + mov r3, r11, lsl #16 +#else orr r3, r3, r4, lsl #16 mov r4, r4, lsr #16 orr r4, r4, r5, lsl #16 @@ -287,6 +325,7 @@ loop16: orr r10, r10, r11, lsl #16 stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} mov r3, r11, lsr #16 +#endif bhs 1b b less_than_thirtytwo @@ -296,6 +335,25 @@ loop8: ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} subs r2, r2, #32 ldrhs r12, [r1], #4 +#if __ARMEB__ + orr r3, r3, r4, lsr #24 + mov r4, r4, lsl #8 + orr r4, r4, r5, lsr #24 + mov r5, r5, lsl #8 + orr r5, r5, r6, lsr #24 + mov r6, r6, lsl #8 + orr r6, r6, r7, lsr #24 + mov r7, r7, lsl #8 + orr r7, r7, r8, lsr #24 + mov r8, r8, lsl #8 + orr r8, r8, r9, lsr #24 + mov r9, r9, lsl #8 + orr r9, r9, r10, lsr #24 + mov r10, r10, lsl #8 + orr r10, r10, r11, lsr #24 + stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} + mov r3, r11, lsl #8 +#else orr r3, r3, r4, lsl #24 mov r4, r4, lsr #8 orr r4, r4, r5, lsl #24 @@ -313,6 +371,7 @@ loop8: orr r10, r10, r11, lsl #24 stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} mov r3, r11, lsr #8 +#endif bhs 1b b less_than_thirtytwo @@ -322,6 +381,25 @@ loop24: ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} subs r2, r2, #32 ldrhs r12, [r1], #4 +#if __ARMEB__ + orr r3, r3, r4, lsr #8 + mov r4, r4, lsl #24 + orr r4, r4, r5, lsr #8 + mov r5, r5, lsl #24 + orr r5, r5, r6, lsr #8 + mov r6, r6, lsl #24 + orr r6, r6, r7, lsr #8 + mov r7, r7, lsl #24 + orr r7, r7, r8, lsr #8 + mov r8, r8, lsl #24 + orr r8, r8, r9, lsr #8 + mov r9, r9, lsl #24 + orr r9, r9, r10, lsr #8 + mov r10, r10, lsl #24 + orr r10, r10, r11, lsr #8 + stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} + mov r3, r11, lsl #24 +#else orr r3, r3, r4, lsl #8 mov r4, r4, lsr #24 orr r4, r4, r5, lsl #8 @@ -339,6 +417,7 @@ loop24: orr r10, r10, r11, lsl #8 stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} mov r3, r11, lsr #24 +#endif bhs 1b less_than_thirtytwo: @@ -350,9 +429,15 @@ less_than_thirtytwo: 1: ldr r5, [r1], #4 sub r2, r2, #4 +#if __ARMEB__ + mov r4, r5, lsr lr + orr r4, r4, r3 + mov r3, r5, lsl r12 +#else mov r4, r5, lsl lr orr r4, r4, r3 mov r3, r5, lsr r12 +#endif str r4, [r0], #4 cmp r2, #4 bhs 1b @@ -360,11 +445,20 @@ less_than_thirtytwo: partial_word_tail: /* we have a partial word in the input buffer */ movs r5, lr, lsl #(31-3) +#if __ARMEB__ + movmi r3, r3, ror #24 + strbmi r3, [r0], #1 + movcs r3, r3, ror #24 + strbcs r3, [r0], #1 + movcs r3, r3, ror #24 + strbcs r3, [r0], #1 +#else strbmi r3, [r0], #1 movmi r3, r3, lsr #8 strbcs r3, [r0], #1 movcs r3, r3, lsr #8 strbcs r3, [r0], #1 +#endif /* Refill spilled registers from the stack. Don't update sp. */ ldmfd sp, {r5-r11} @@ -383,4 +477,3 @@ copy_last_3_and_return: ldmfd sp!, {r0, r4, lr} bx lr -#endif diff --git a/src/string/arm/memcpy.c b/src/string/arm/memcpy.c deleted file mode 100644 index 041614f4b..000000000 --- a/src/string/arm/memcpy.c +++ /dev/null @@ -1,3 +0,0 @@ -#if __ARMEB__ -#include "../memcpy.c" -#endif From b1ee0c887d94c4f0b7dcde1348d540f83e3f5fbc Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Fri, 26 Jun 2020 17:37:21 -0400 Subject: [PATCH 056/189] add optimized aarch64 memcpy and memset these are based on the ARM optimized-routines repository v20.05 (ef907c7a799a), with macro dependencies flattened out and memmove code removed from memcpy. this change is somewhat unfortunate since having the branch for memmove support in the large n case of memcpy is the performance-optimal and size-optimal way to do both, but it makes memcpy alone (static-linked) about 40% larger and suggests a policy that use of memcpy as memmove is supported. tabs used for alignment have also been replaced with spaces. --- COPYRIGHT | 3 + src/string/aarch64/memcpy.S | 186 ++++++++++++++++++++++++++++++++++++ src/string/aarch64/memset.S | 115 ++++++++++++++++++++++ 3 files changed, 304 insertions(+) create mode 100644 src/string/aarch64/memcpy.S create mode 100644 src/string/aarch64/memset.S diff --git a/COPYRIGHT b/COPYRIGHT index d3edc2a2d..c1628e9ac 100644 --- a/COPYRIGHT +++ b/COPYRIGHT @@ -131,6 +131,9 @@ The ARM memcpy code (src/string/arm/memcpy.S) is Copyright © 2008 The Android Open Source Project and is licensed under a two-clause BSD license. It was taken from Bionic libc, used on Android. +The AArch64 memcpy and memset code (src/string/aarch64/*) are +Copyright © 1999-2019, Arm Limited. + The implementation of DES for crypt (src/crypt/crypt_des.c) is Copyright © 1994 David Burren. It is licensed under a BSD license. diff --git a/src/string/aarch64/memcpy.S b/src/string/aarch64/memcpy.S new file mode 100644 index 000000000..48bb8a8d3 --- /dev/null +++ b/src/string/aarch64/memcpy.S @@ -0,0 +1,186 @@ +/* + * memcpy - copy memory area + * + * Copyright (c) 2012-2020, Arm Limited. + * SPDX-License-Identifier: MIT + */ + +/* Assumptions: + * + * ARMv8-a, AArch64, unaligned accesses. + * + */ + +#define dstin x0 +#define src x1 +#define count x2 +#define dst x3 +#define srcend x4 +#define dstend x5 +#define A_l x6 +#define A_lw w6 +#define A_h x7 +#define B_l x8 +#define B_lw w8 +#define B_h x9 +#define C_l x10 +#define C_lw w10 +#define C_h x11 +#define D_l x12 +#define D_h x13 +#define E_l x14 +#define E_h x15 +#define F_l x16 +#define F_h x17 +#define G_l count +#define G_h dst +#define H_l src +#define H_h srcend +#define tmp1 x14 + +/* This implementation of memcpy uses unaligned accesses and branchless + sequences to keep the code small, simple and improve performance. + + Copies are split into 3 main cases: small copies of up to 32 bytes, medium + copies of up to 128 bytes, and large copies. The overhead of the overlap + check is negligible since it is only required for large copies. + + Large copies use a software pipelined loop processing 64 bytes per iteration. + The destination pointer is 16-byte aligned to minimize unaligned accesses. + The loop tail is handled by always copying 64 bytes from the end. +*/ + +.global memcpy +.type memcpy,%function +memcpy: + add srcend, src, count + add dstend, dstin, count + cmp count, 128 + b.hi .Lcopy_long + cmp count, 32 + b.hi .Lcopy32_128 + + /* Small copies: 0..32 bytes. */ + cmp count, 16 + b.lo .Lcopy16 + ldp A_l, A_h, [src] + ldp D_l, D_h, [srcend, -16] + stp A_l, A_h, [dstin] + stp D_l, D_h, [dstend, -16] + ret + + /* Copy 8-15 bytes. */ +.Lcopy16: + tbz count, 3, .Lcopy8 + ldr A_l, [src] + ldr A_h, [srcend, -8] + str A_l, [dstin] + str A_h, [dstend, -8] + ret + + .p2align 3 + /* Copy 4-7 bytes. */ +.Lcopy8: + tbz count, 2, .Lcopy4 + ldr A_lw, [src] + ldr B_lw, [srcend, -4] + str A_lw, [dstin] + str B_lw, [dstend, -4] + ret + + /* Copy 0..3 bytes using a branchless sequence. */ +.Lcopy4: + cbz count, .Lcopy0 + lsr tmp1, count, 1 + ldrb A_lw, [src] + ldrb C_lw, [srcend, -1] + ldrb B_lw, [src, tmp1] + strb A_lw, [dstin] + strb B_lw, [dstin, tmp1] + strb C_lw, [dstend, -1] +.Lcopy0: + ret + + .p2align 4 + /* Medium copies: 33..128 bytes. */ +.Lcopy32_128: + ldp A_l, A_h, [src] + ldp B_l, B_h, [src, 16] + ldp C_l, C_h, [srcend, -32] + ldp D_l, D_h, [srcend, -16] + cmp count, 64 + b.hi .Lcopy128 + stp A_l, A_h, [dstin] + stp B_l, B_h, [dstin, 16] + stp C_l, C_h, [dstend, -32] + stp D_l, D_h, [dstend, -16] + ret + + .p2align 4 + /* Copy 65..128 bytes. */ +.Lcopy128: + ldp E_l, E_h, [src, 32] + ldp F_l, F_h, [src, 48] + cmp count, 96 + b.ls .Lcopy96 + ldp G_l, G_h, [srcend, -64] + ldp H_l, H_h, [srcend, -48] + stp G_l, G_h, [dstend, -64] + stp H_l, H_h, [dstend, -48] +.Lcopy96: + stp A_l, A_h, [dstin] + stp B_l, B_h, [dstin, 16] + stp E_l, E_h, [dstin, 32] + stp F_l, F_h, [dstin, 48] + stp C_l, C_h, [dstend, -32] + stp D_l, D_h, [dstend, -16] + ret + + .p2align 4 + /* Copy more than 128 bytes. */ +.Lcopy_long: + + /* Copy 16 bytes and then align dst to 16-byte alignment. */ + + ldp D_l, D_h, [src] + and tmp1, dstin, 15 + bic dst, dstin, 15 + sub src, src, tmp1 + add count, count, tmp1 /* Count is now 16 too large. */ + ldp A_l, A_h, [src, 16] + stp D_l, D_h, [dstin] + ldp B_l, B_h, [src, 32] + ldp C_l, C_h, [src, 48] + ldp D_l, D_h, [src, 64]! + subs count, count, 128 + 16 /* Test and readjust count. */ + b.ls .Lcopy64_from_end + +.Lloop64: + stp A_l, A_h, [dst, 16] + ldp A_l, A_h, [src, 16] + stp B_l, B_h, [dst, 32] + ldp B_l, B_h, [src, 32] + stp C_l, C_h, [dst, 48] + ldp C_l, C_h, [src, 48] + stp D_l, D_h, [dst, 64]! + ldp D_l, D_h, [src, 64]! + subs count, count, 64 + b.hi .Lloop64 + + /* Write the last iteration and copy 64 bytes from the end. */ +.Lcopy64_from_end: + ldp E_l, E_h, [srcend, -64] + stp A_l, A_h, [dst, 16] + ldp A_l, A_h, [srcend, -48] + stp B_l, B_h, [dst, 32] + ldp B_l, B_h, [srcend, -32] + stp C_l, C_h, [dst, 48] + ldp C_l, C_h, [srcend, -16] + stp D_l, D_h, [dst, 64] + stp E_l, E_h, [dstend, -64] + stp A_l, A_h, [dstend, -48] + stp B_l, B_h, [dstend, -32] + stp C_l, C_h, [dstend, -16] + ret + +.size memcpy,.-memcpy diff --git a/src/string/aarch64/memset.S b/src/string/aarch64/memset.S new file mode 100644 index 000000000..f0d29b7fa --- /dev/null +++ b/src/string/aarch64/memset.S @@ -0,0 +1,115 @@ +/* + * memset - fill memory with a constant byte + * + * Copyright (c) 2012-2020, Arm Limited. + * SPDX-License-Identifier: MIT + */ + +/* Assumptions: + * + * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses. + * + */ + +#define dstin x0 +#define val x1 +#define valw w1 +#define count x2 +#define dst x3 +#define dstend x4 +#define zva_val x5 + +.global memset +.type memset,%function +memset: + + dup v0.16B, valw + add dstend, dstin, count + + cmp count, 96 + b.hi .Lset_long + cmp count, 16 + b.hs .Lset_medium + mov val, v0.D[0] + + /* Set 0..15 bytes. */ + tbz count, 3, 1f + str val, [dstin] + str val, [dstend, -8] + ret + nop +1: tbz count, 2, 2f + str valw, [dstin] + str valw, [dstend, -4] + ret +2: cbz count, 3f + strb valw, [dstin] + tbz count, 1, 3f + strh valw, [dstend, -2] +3: ret + + /* Set 17..96 bytes. */ +.Lset_medium: + str q0, [dstin] + tbnz count, 6, .Lset96 + str q0, [dstend, -16] + tbz count, 5, 1f + str q0, [dstin, 16] + str q0, [dstend, -32] +1: ret + + .p2align 4 + /* Set 64..96 bytes. Write 64 bytes from the start and + 32 bytes from the end. */ +.Lset96: + str q0, [dstin, 16] + stp q0, q0, [dstin, 32] + stp q0, q0, [dstend, -32] + ret + + .p2align 4 +.Lset_long: + and valw, valw, 255 + bic dst, dstin, 15 + str q0, [dstin] + cmp count, 160 + ccmp valw, 0, 0, hs + b.ne .Lno_zva + +#ifndef SKIP_ZVA_CHECK + mrs zva_val, dczid_el0 + and zva_val, zva_val, 31 + cmp zva_val, 4 /* ZVA size is 64 bytes. */ + b.ne .Lno_zva +#endif + str q0, [dst, 16] + stp q0, q0, [dst, 32] + bic dst, dst, 63 + sub count, dstend, dst /* Count is now 64 too large. */ + sub count, count, 128 /* Adjust count and bias for loop. */ + + .p2align 4 +.Lzva_loop: + add dst, dst, 64 + dc zva, dst + subs count, count, 64 + b.hi .Lzva_loop + stp q0, q0, [dstend, -64] + stp q0, q0, [dstend, -32] + ret + +.Lno_zva: + sub count, dstend, dst /* Count is 16 too large. */ + sub dst, dst, 16 /* Dst is biased by -32. */ + sub count, count, 64 + 16 /* Adjust count and bias for loop. */ +.Lno_zva_loop: + stp q0, q0, [dst, 32] + stp q0, q0, [dst, 64]! + subs count, count, 64 + b.hi .Lno_zva_loop + stp q0, q0, [dstend, -64] + stp q0, q0, [dstend, -32] + ret + +.size memset,.-memset + From 4d6e737e319dee6fde2f59a8e0b32cca670532a5 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Mon, 29 Jun 2020 17:41:24 -0400 Subject: [PATCH 057/189] add glue code for mallocng merge this includes both an implementation of reclaimed-gap donation from ldso and a version of mallocng's glue.h with namespace-safe linkage to underlying syscalls, integration with AT_RANDOM initialization, and internal locking that's optimized out when the process is single-threaded. --- src/malloc/mallocng/README.mallocng | 13 +++++ src/malloc/mallocng/donate.c | 39 +++++++++++++++ src/malloc/mallocng/glue.h | 77 +++++++++++++++++++++++++++++ 3 files changed, 129 insertions(+) create mode 100644 src/malloc/mallocng/README.mallocng create mode 100644 src/malloc/mallocng/donate.c create mode 100644 src/malloc/mallocng/glue.h diff --git a/src/malloc/mallocng/README.mallocng b/src/malloc/mallocng/README.mallocng new file mode 100644 index 000000000..da32bf065 --- /dev/null +++ b/src/malloc/mallocng/README.mallocng @@ -0,0 +1,13 @@ +This directory is a skeleton for upcoming merge of musl's new malloc +implementation, mallocng. To use it, drop in copies of or symlinks to +the following files from mallocng: + +- meta.h +- malloc.c +- realloc.c +- free.c +- aligned_alloc.c +- malloc_usable_size.c + +and build with make variable MALLOC_DIR=mallocng in config.mak or on +make command line. diff --git a/src/malloc/mallocng/donate.c b/src/malloc/mallocng/donate.c new file mode 100644 index 000000000..41d850f35 --- /dev/null +++ b/src/malloc/mallocng/donate.c @@ -0,0 +1,39 @@ +#include +#include +#include +#include +#include +#include + +#include "meta.h" + +static void donate(unsigned char *base, size_t len) +{ + uintptr_t a = (uintptr_t)base; + uintptr_t b = a + len; + a += -a & (UNIT-1); + b -= b & (UNIT-1); + memset(base, 0, len); + for (int sc=47; sc>0 && b>a; sc-=4) { + if (b-a < (size_classes[sc]+1)*UNIT) continue; + struct meta *m = alloc_meta(); + m->avail_mask = 0; + m->freed_mask = 1; + m->mem = (void *)a; + m->mem->meta = m; + m->last_idx = 0; + m->freeable = 0; + m->sizeclass = sc; + m->maplen = 0; + *((unsigned char *)m->mem+UNIT-4) = 0; + *((unsigned char *)m->mem+UNIT-3) = 255; + m->mem->storage[size_classes[sc]*UNIT-4] = 0; + queue(&ctx.active[sc], m); + a += (size_classes[sc]+1)*UNIT; + } +} + +void __malloc_donate(char *start, char *end) +{ + donate((void *)start, end-start); +} diff --git a/src/malloc/mallocng/glue.h b/src/malloc/mallocng/glue.h new file mode 100644 index 000000000..16acd1ea3 --- /dev/null +++ b/src/malloc/mallocng/glue.h @@ -0,0 +1,77 @@ +#ifndef MALLOC_GLUE_H +#define MALLOC_GLUE_H + +#include +#include +#include +#include +#include +#include +#include "atomic.h" +#include "syscall.h" +#include "libc.h" +#include "lock.h" +#include "dynlink.h" + +// use macros to appropriately namespace these. +#define size_classes __malloc_size_classes +#define ctx __malloc_context +#define alloc_meta __malloc_alloc_meta +#define is_allzero __malloc_allzerop +#define dump_heap __dump_heap + +#if USE_REAL_ASSERT +#include +#else +#undef assert +#define assert(x) do { if (!(x)) a_crash(); } while(0) +#endif + +#define brk(p) ((uintptr_t)__syscall(SYS_brk, p)) + +#define mmap __mmap +#define madvise __madvise +#define mremap __mremap + +#define DISABLE_ALIGNED_ALLOC (__malloc_replaced && !__aligned_alloc_replaced) + +static inline uint64_t get_random_secret() +{ + uint64_t secret = (uintptr_t)&secret * 1103515245; + for (size_t i=0; libc.auxv[i]; i+=2) + if (libc.auxv[i]==AT_RANDOM) + memcpy(&secret, (char *)libc.auxv[i+1]+8, sizeof secret); + return secret; +} + +#ifndef PAGESIZE +#define PAGESIZE PAGE_SIZE +#endif + +#define MT (libc.need_locks) + +#define RDLOCK_IS_EXCLUSIVE 1 + +__attribute__((__visibility__("hidden"))) +extern int __malloc_lock[1]; + +#define LOCK_OBJ_DEF \ +int __malloc_lock[1]; + +static inline void rdlock() +{ + if (MT) LOCK(__malloc_lock); +} +static inline void wrlock() +{ + if (MT) LOCK(__malloc_lock); +} +static inline void unlock() +{ + UNLOCK(__malloc_lock); +} +static inline void upgradelock() +{ +} + +#endif From d6a76ef9ab41b0b83e8cb07281c9dad24a08f1a5 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Tue, 30 Jun 2020 00:59:48 -0400 Subject: [PATCH 058/189] import mallocng the files added come from the mallocng development repo, commit 2ed58817cca5bc055974e5a0e43c280d106e696b. they comprise a new malloc implementation, developed over the past 9 months, to replace the old allocator (since dubbed "oldmalloc") with one that retains low code size and minimal baseline memory overhead while avoiding fundamental flaws in oldmalloc and making significant enhancements. these include highly controlled fragmentation, fine-grained ability to return memory to the system when freed, and strong hardening against dynamic memory usage errors by the caller. internally, mallocng derives most of these properties from tightly structuring memory, creating space for allocations as uniform-sized slots within individually mmapped (and individually freeable) allocation groups. smaller-than-pagesize groups are created within slots of larger ones. minimal group size is very small, and larger sizes (in geometric progression) only come into play when usage is high. all data necessary for maintaining consistency of the allocator state is tracked in out-of-band metadata, reachable via a validated path from minimal in-band metadata. all pointers passed (to free, etc.) are validated before any stores to memory take place. early reuse of freed slots is avoided via approximate LRU order of freed slots. further hardening against use-after-free and double-free, even in the case where the freed slot has been reused, is made by cycling the offset within the slot at which the allocation is placed; this is possible whenever the slot size is larger than the requested allocation. --- src/malloc/mallocng/README.mallocng | 13 - src/malloc/mallocng/aligned_alloc.c | 57 ++++ src/malloc/mallocng/free.c | 143 +++++++++ src/malloc/mallocng/malloc.c | 387 +++++++++++++++++++++++ src/malloc/mallocng/malloc_usable_size.c | 12 + src/malloc/mallocng/meta.h | 288 +++++++++++++++++ src/malloc/mallocng/realloc.c | 51 +++ 7 files changed, 938 insertions(+), 13 deletions(-) delete mode 100644 src/malloc/mallocng/README.mallocng create mode 100644 src/malloc/mallocng/aligned_alloc.c create mode 100644 src/malloc/mallocng/free.c create mode 100644 src/malloc/mallocng/malloc.c create mode 100644 src/malloc/mallocng/malloc_usable_size.c create mode 100644 src/malloc/mallocng/meta.h create mode 100644 src/malloc/mallocng/realloc.c diff --git a/src/malloc/mallocng/README.mallocng b/src/malloc/mallocng/README.mallocng deleted file mode 100644 index da32bf065..000000000 --- a/src/malloc/mallocng/README.mallocng +++ /dev/null @@ -1,13 +0,0 @@ -This directory is a skeleton for upcoming merge of musl's new malloc -implementation, mallocng. To use it, drop in copies of or symlinks to -the following files from mallocng: - -- meta.h -- malloc.c -- realloc.c -- free.c -- aligned_alloc.c -- malloc_usable_size.c - -and build with make variable MALLOC_DIR=mallocng in config.mak or on -make command line. diff --git a/src/malloc/mallocng/aligned_alloc.c b/src/malloc/mallocng/aligned_alloc.c new file mode 100644 index 000000000..341168960 --- /dev/null +++ b/src/malloc/mallocng/aligned_alloc.c @@ -0,0 +1,57 @@ +#include +#include +#include "meta.h" + +void *aligned_alloc(size_t align, size_t len) +{ + if ((align & -align) != align) { + errno = EINVAL; + return 0; + } + + if (len > SIZE_MAX - align || align >= (1ULL<<31)*UNIT) { + errno = ENOMEM; + return 0; + } + + if (DISABLE_ALIGNED_ALLOC) { + errno = ENOMEM; + return 0; + } + + if (align <= UNIT) align = UNIT; + + unsigned char *p = malloc(len + align - UNIT); + struct meta *g = get_meta(p); + int idx = get_slot_index(p); + size_t stride = get_stride(g); + unsigned char *start = g->mem->storage + stride*idx; + unsigned char *end = g->mem->storage + stride*(idx+1) - IB; + size_t adj = -(uintptr_t)p & (align-1); + + if (!adj) { + set_size(p, end, len); + return p; + } + p += adj; + uint32_t offset = (size_t)(p-g->mem->storage)/UNIT; + if (offset <= 0xffff) { + *(uint16_t *)(p-2) = offset; + p[-4] = 0; + } else { + // use a 32-bit offset if 16-bit doesn't fit. for this, + // 16-bit field must be zero, [-4] byte nonzero. + *(uint16_t *)(p-2) = 0; + *(uint32_t *)(p-8) = offset; + p[-4] = 1; + } + p[-3] = idx; + set_size(p, end, len); + // store offset to aligned enframing. this facilitates cycling + // offset and also iteration of heap for debugging/measurement. + // for extreme overalignment it won't fit but these are classless + // allocations anyway. + *(uint16_t *)(start - 2) = (size_t)(p-start)/UNIT; + start[-3] = 7<<5; + return p; +} diff --git a/src/malloc/mallocng/free.c b/src/malloc/mallocng/free.c new file mode 100644 index 000000000..40745f97d --- /dev/null +++ b/src/malloc/mallocng/free.c @@ -0,0 +1,143 @@ +#define _BSD_SOURCE +#include +#include + +#include "meta.h" + +struct mapinfo { + void *base; + size_t len; +}; + +static struct mapinfo nontrivial_free(struct meta *, int); + +static struct mapinfo free_group(struct meta *g) +{ + struct mapinfo mi = { 0 }; + int sc = g->sizeclass; + if (sc < 48) { + ctx.usage_by_class[sc] -= g->last_idx+1; + } + if (g->maplen) { + step_seq(); + record_seq(sc); + mi.base = g->mem; + mi.len = g->maplen*4096UL; + } else { + void *p = g->mem; + struct meta *m = get_meta(p); + int idx = get_slot_index(p); + g->mem->meta = 0; + // not checking size/reserved here; it's intentionally invalid + mi = nontrivial_free(m, idx); + } + free_meta(g); + return mi; +} + +static int okay_to_free(struct meta *g) +{ + int sc = g->sizeclass; + + if (!g->freeable) return 0; + + // always free individual mmaps not suitable for reuse + if (sc >= 48 || get_stride(g) < UNIT*size_classes[sc]) + return 1; + + // always free groups allocated inside another group's slot + // since recreating them should not be expensive and they + // might be blocking freeing of a much larger group. + if (!g->maplen) return 1; + + // if there is another non-full group, free this one to + // consolidate future allocations, reduce fragmentation. + if (g->next != g) return 1; + + // free any group in a size class that's not bouncing + if (!is_bouncing(sc)) return 1; + + size_t cnt = g->last_idx+1; + size_t usage = ctx.usage_by_class[sc]; + + // if usage is high enough that a larger count should be + // used, free the low-count group so a new one will be made. + if (9*cnt <= usage && cnt < 20) + return 1; + + // otherwise, keep the last group in a bouncing class. + return 0; +} + +static struct mapinfo nontrivial_free(struct meta *g, int i) +{ + uint32_t self = 1u<sizeclass; + uint32_t mask = g->freed_mask | g->avail_mask; + + if (mask+self == (2u<last_idx)-1 && okay_to_free(g)) { + // any multi-slot group is necessarily on an active list + // here, but single-slot groups might or might not be. + if (g->next) { + assert(sc < 48); + int activate_new = (ctx.active[sc]==g); + dequeue(&ctx.active[sc], g); + if (activate_new && ctx.active[sc]) + activate_group(ctx.active[sc]); + } + return free_group(g); + } else if (!mask) { + assert(sc < 48); + // might still be active if there were no allocations + // after last available slot was taken. + if (ctx.active[sc] != g) { + queue(&ctx.active[sc], g); + } + } + a_or(&g->freed_mask, self); + return (struct mapinfo){ 0 }; +} + +void free(void *p) +{ + if (!p) return; + + struct meta *g = get_meta(p); + int idx = get_slot_index(p); + size_t stride = get_stride(g); + unsigned char *start = g->mem->storage + stride*idx; + unsigned char *end = start + stride - IB; + get_nominal_size(p, end); + uint32_t self = 1u<last_idx)-1; + ((unsigned char *)p)[-3] = 255; + // invalidate offset to group header, and cycle offset of + // used region within slot if current offset is zero. + *(uint16_t *)((char *)p-2) = 0; + + // release any whole pages contained in the slot to be freed + // unless it's a single-slot group that will be unmapped. + if (((uintptr_t)(start-1) ^ (uintptr_t)end) >= 2*PGSZ && g->last_idx) { + unsigned char *base = start + (-(uintptr_t)start & (PGSZ-1)); + size_t len = (end-base) & -PGSZ; + if (len) madvise(base, len, MADV_FREE); + } + + // atomic free without locking if this is neither first or last slot + for (;;) { + uint32_t freed = g->freed_mask; + uint32_t avail = g->avail_mask; + uint32_t mask = freed | avail; + assert(!(mask&self)); + if (!freed || mask+self==all) break; + if (!MT) + g->freed_mask = freed+self; + else if (a_cas(&g->freed_mask, freed, freed+self)!=freed) + continue; + return; + } + + wrlock(); + struct mapinfo mi = nontrivial_free(g, idx); + unlock(); + if (mi.len) munmap(mi.base, mi.len); +} diff --git a/src/malloc/mallocng/malloc.c b/src/malloc/mallocng/malloc.c new file mode 100644 index 000000000..d695ab8ec --- /dev/null +++ b/src/malloc/mallocng/malloc.c @@ -0,0 +1,387 @@ +#include +#include +#include +#include +#include +#include + +#include "meta.h" + +LOCK_OBJ_DEF; + +const uint16_t size_classes[] = { + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 12, 15, + 18, 20, 25, 31, + 36, 42, 50, 63, + 72, 84, 102, 127, + 146, 170, 204, 255, + 292, 340, 409, 511, + 584, 682, 818, 1023, + 1169, 1364, 1637, 2047, + 2340, 2730, 3276, 4095, + 4680, 5460, 6552, 8191, +}; + +static const uint8_t small_cnt_tab[][3] = { + { 30, 30, 30 }, + { 31, 15, 15 }, + { 20, 10, 10 }, + { 31, 15, 7 }, + { 25, 12, 6 }, + { 21, 10, 5 }, + { 18, 8, 4 }, + { 31, 15, 7 }, + { 28, 14, 6 }, +}; + +static const uint8_t med_cnt_tab[4] = { 28, 24, 20, 32 }; + +struct malloc_context ctx = { 0 }; + +struct meta *alloc_meta(void) +{ + struct meta *m; + unsigned char *p; + if (!ctx.init_done) { +#ifndef PAGESIZE + ctx.pagesize = get_page_size(); +#endif + ctx.secret = get_random_secret(); + ctx.init_done = 1; + } + size_t pagesize = PGSZ; + if (pagesize < 4096) pagesize = 4096; + if ((m = dequeue_head(&ctx.free_meta_head))) return m; + if (!ctx.avail_meta_count) { + int need_unprotect = 1; + if (!ctx.avail_meta_area_count && ctx.brk!=-1) { + uintptr_t new = ctx.brk + pagesize; + int need_guard = 0; + if (!ctx.brk) { + need_guard = 1; + ctx.brk = brk(0); + // some ancient kernels returned _ebss + // instead of next page as initial brk. + ctx.brk += -ctx.brk & (pagesize-1); + new = ctx.brk + 2*pagesize; + } + if (brk(new) != new) { + ctx.brk = -1; + } else { + if (need_guard) mmap((void *)ctx.brk, pagesize, + PROT_NONE, MAP_ANON|MAP_PRIVATE|MAP_FIXED, -1, 0); + ctx.brk = new; + ctx.avail_meta_areas = (void *)(new - pagesize); + ctx.avail_meta_area_count = pagesize>>12; + need_unprotect = 0; + } + } + if (!ctx.avail_meta_area_count) { + size_t n = 2UL << ctx.meta_alloc_shift; + p = mmap(0, n*pagesize, PROT_NONE, + MAP_PRIVATE|MAP_ANON, -1, 0); + if (p==MAP_FAILED) return 0; + ctx.avail_meta_areas = p + pagesize; + ctx.avail_meta_area_count = (n-1)*(pagesize>>12); + ctx.meta_alloc_shift++; + } + p = ctx.avail_meta_areas; + if ((uintptr_t)p & (pagesize-1)) need_unprotect = 0; + if (need_unprotect) + if (mprotect(p, pagesize, PROT_READ|PROT_WRITE) + && errno != ENOSYS) + return 0; + ctx.avail_meta_area_count--; + ctx.avail_meta_areas = p + 4096; + if (ctx.meta_area_tail) { + ctx.meta_area_tail->next = (void *)p; + } else { + ctx.meta_area_head = (void *)p; + } + ctx.meta_area_tail = (void *)p; + ctx.meta_area_tail->check = ctx.secret; + ctx.avail_meta_count = ctx.meta_area_tail->nslots + = (4096-sizeof(struct meta_area))/sizeof *m; + ctx.avail_meta = ctx.meta_area_tail->slots; + } + ctx.avail_meta_count--; + m = ctx.avail_meta++; + m->prev = m->next = 0; + return m; +} + +static uint32_t try_avail(struct meta **pm) +{ + struct meta *m = *pm; + uint32_t first; + if (!m) return 0; + uint32_t mask = m->avail_mask; + if (!mask) { + if (!m) return 0; + if (!m->freed_mask) { + dequeue(pm, m); + m = *pm; + if (!m) return 0; + } else { + m = m->next; + *pm = m; + } + + mask = m->freed_mask; + + // skip fully-free group unless it's the only one + // or it's a permanently non-freeable group + if (mask == (2u<last_idx)-1 && m->freeable) { + m = m->next; + *pm = m; + mask = m->freed_mask; + } + + // activate more slots in a not-fully-active group + // if needed, but only as a last resort. prefer using + // any other group with free slots. this avoids + // touching & dirtying as-yet-unused pages. + if (!(mask & ((2u<mem->active_idx)-1))) { + if (m->next != m) { + m = m->next; + *pm = m; + } else { + int cnt = m->mem->active_idx + 2; + int size = size_classes[m->sizeclass]*UNIT; + int span = UNIT + size*cnt; + // activate up to next 4k boundary + while ((span^(span+size-1)) < 4096) { + cnt++; + span += size; + } + if (cnt > m->last_idx+1) + cnt = m->last_idx+1; + m->mem->active_idx = cnt-1; + } + } + mask = activate_group(m); + assert(mask); + decay_bounces(m->sizeclass); + } + first = mask&-mask; + m->avail_mask = mask-first; + return first; +} + +static int alloc_slot(int, size_t); + +static struct meta *alloc_group(int sc, size_t req) +{ + size_t size = UNIT*size_classes[sc]; + int i = 0, cnt; + unsigned char *p; + struct meta *m = alloc_meta(); + if (!m) return 0; + size_t usage = ctx.usage_by_class[sc]; + size_t pagesize = PGSZ; + int active_idx; + if (sc < 9) { + while (i<2 && 4*small_cnt_tab[sc][i] > usage) + i++; + cnt = small_cnt_tab[sc][i]; + } else { + // lookup max number of slots fitting in power-of-two size + // from a table, along with number of factors of two we + // can divide out without a remainder or reaching 1. + cnt = med_cnt_tab[sc&3]; + + // reduce cnt to avoid excessive eagar allocation. + while (!(cnt&1) && 4*cnt > usage) + cnt >>= 1; + + // data structures don't support groups whose slot offsets + // in units don't fit in 16 bits. + while (size*cnt >= 65536*UNIT) + cnt >>= 1; + } + + // If we selected a count of 1 above but it's not sufficient to use + // mmap, increase to 2. Then it might be; if not it will nest. + if (cnt==1 && size*cnt+UNIT <= pagesize/2) cnt = 2; + + // All choices of size*cnt are "just below" a power of two, so anything + // larger than half the page size should be allocated as whole pages. + if (size*cnt+UNIT > pagesize/2) { + // check/update bounce counter to start/increase retention + // of freed maps, and inhibit use of low-count, odd-size + // small mappings and single-slot groups if activated. + int nosmall = is_bouncing(sc); + account_bounce(sc); + step_seq(); + + // since the following count reduction opportunities have + // an absolute memory usage cost, don't overdo them. count + // coarse usage as part of usage. + if (!(sc&1) && sc<32) usage += ctx.usage_by_class[sc+1]; + + // try to drop to a lower count if the one found above + // increases usage by more than 25%. these reduced counts + // roughly fill an integral number of pages, just not a + // power of two, limiting amount of unusable space. + if (4*cnt > usage && !nosmall) { + if (0); + else if ((sc&3)==1 && size*cnt>8*pagesize) cnt = 2; + else if ((sc&3)==2 && size*cnt>4*pagesize) cnt = 3; + else if ((sc&3)==0 && size*cnt>8*pagesize) cnt = 3; + else if ((sc&3)==0 && size*cnt>2*pagesize) cnt = 5; + } + size_t needed = size*cnt + UNIT; + needed += -needed & (pagesize-1); + + // produce an individually-mmapped allocation if usage is low, + // bounce counter hasn't triggered, and either it saves memory + // or it avoids eagar slot allocation without wasting too much. + if (!nosmall && cnt<=7) { + req += IB + UNIT; + req += -req & (pagesize-1); + if (req=4*pagesize && 2*cnt>usage)) { + cnt = 1; + needed = req; + } + } + + p = mmap(0, needed, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0); + if (p==MAP_FAILED) { + free_meta(m); + return 0; + } + m->maplen = needed>>12; + ctx.mmap_counter++; + active_idx = (4096-UNIT)/size-1; + if (active_idx > cnt-1) active_idx = cnt-1; + if (active_idx < 0) active_idx = 0; + } else { + int j = size_to_class(UNIT+cnt*size-IB); + int idx = alloc_slot(j, UNIT+cnt*size-IB); + if (idx < 0) { + free_meta(m); + return 0; + } + struct meta *g = ctx.active[j]; + p = enframe(g, idx, UNIT*size_classes[j]-IB, ctx.mmap_counter); + m->maplen = 0; + p[-3] = (p[-3]&31) | (6<<5); + for (int i=0; i<=cnt; i++) + p[UNIT+i*size-4] = 0; + active_idx = cnt-1; + } + ctx.usage_by_class[sc] += cnt; + m->avail_mask = (2u<freed_mask = (2u<<(cnt-1))-1 - m->avail_mask; + m->mem = (void *)p; + m->mem->meta = m; + m->mem->active_idx = active_idx; + m->last_idx = cnt-1; + m->freeable = 1; + m->sizeclass = sc; + return m; +} + +static int alloc_slot(int sc, size_t req) +{ + uint32_t first = try_avail(&ctx.active[sc]); + if (first) return a_ctz_32(first); + + struct meta *g = alloc_group(sc, req); + if (!g) return -1; + + g->avail_mask--; + queue(&ctx.active[sc], g); + return 0; +} + +void *malloc(size_t n) +{ + if (size_overflows(n)) return 0; + struct meta *g; + uint32_t mask, first; + int sc; + int idx; + int ctr; + + if (n >= MMAP_THRESHOLD) { + size_t needed = n + IB + UNIT; + void *p = mmap(0, needed, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANON, -1, 0); + if (p==MAP_FAILED) return 0; + wrlock(); + step_seq(); + g = alloc_meta(); + if (!g) { + unlock(); + munmap(p, needed); + return 0; + } + g->mem = p; + g->mem->meta = g; + g->last_idx = 0; + g->freeable = 1; + g->sizeclass = 63; + g->maplen = (needed+4095)/4096; + g->avail_mask = g->freed_mask = 0; + // use a global counter to cycle offset in + // individually-mmapped allocations. + ctx.mmap_counter++; + idx = 0; + goto success; + } + + sc = size_to_class(n); + + rdlock(); + g = ctx.active[sc]; + + // use coarse size classes initially when there are not yet + // any groups of desired size. this allows counts of 2 or 3 + // to be allocated at first rather than having to start with + // 7 or 5, the min counts for even size classes. + if (!g && sc>=4 && sc<32 && sc!=6 && !(sc&1) && !ctx.usage_by_class[sc]) { + size_t usage = ctx.usage_by_class[sc|1]; + // if a new group may be allocated, count it toward + // usage in deciding if we can use coarse class. + if (!ctx.active[sc|1] || (!ctx.active[sc|1]->avail_mask + && !ctx.active[sc|1]->freed_mask)) + usage += 3; + if (usage <= 12) + sc |= 1; + g = ctx.active[sc]; + } + + for (;;) { + mask = g ? g->avail_mask : 0; + first = mask&-mask; + if (!first) break; + if (RDLOCK_IS_EXCLUSIVE || !MT) + g->avail_mask = mask-first; + else if (a_cas(&g->avail_mask, mask, mask-first)!=mask) + continue; + idx = a_ctz_32(first); + goto success; + } + upgradelock(); + + idx = alloc_slot(sc, n); + if (idx < 0) { + unlock(); + return 0; + } + g = ctx.active[sc]; + +success: + ctr = ctx.mmap_counter; + unlock(); + return enframe(g, idx, n, ctr); +} + +int is_allzero(void *p) +{ + struct meta *g = get_meta(p); + return g->sizeclass >= 48 || + get_stride(g) < UNIT*size_classes[g->sizeclass]; +} diff --git a/src/malloc/mallocng/malloc_usable_size.c b/src/malloc/mallocng/malloc_usable_size.c new file mode 100644 index 000000000..a440a4eab --- /dev/null +++ b/src/malloc/mallocng/malloc_usable_size.c @@ -0,0 +1,12 @@ +#include +#include "meta.h" + +size_t malloc_usable_size(void *p) +{ + struct meta *g = get_meta(p); + int idx = get_slot_index(p); + size_t stride = get_stride(g); + unsigned char *start = g->mem->storage + stride*idx; + unsigned char *end = start + stride - IB; + return get_nominal_size(p, end); +} diff --git a/src/malloc/mallocng/meta.h b/src/malloc/mallocng/meta.h new file mode 100644 index 000000000..61ec53f9a --- /dev/null +++ b/src/malloc/mallocng/meta.h @@ -0,0 +1,288 @@ +#ifndef MALLOC_META_H +#define MALLOC_META_H + +#include +#include +#include +#include "glue.h" + +__attribute__((__visibility__("hidden"))) +extern const uint16_t size_classes[]; + +#define MMAP_THRESHOLD 131052 + +#define UNIT 16 +#define IB 4 + +struct group { + struct meta *meta; + unsigned char active_idx:5; + char pad[UNIT - sizeof(struct meta *) - 1]; + unsigned char storage[]; +}; + +struct meta { + struct meta *prev, *next; + struct group *mem; + volatile int avail_mask, freed_mask; + uintptr_t last_idx:5; + uintptr_t freeable:1; + uintptr_t sizeclass:6; + uintptr_t maplen:8*sizeof(uintptr_t)-12; +}; + +struct meta_area { + uint64_t check; + struct meta_area *next; + int nslots; + struct meta slots[]; +}; + +struct malloc_context { + uint64_t secret; +#ifndef PAGESIZE + size_t pagesize; +#endif + int init_done; + unsigned mmap_counter; + struct meta *free_meta_head; + struct meta *avail_meta; + size_t avail_meta_count, avail_meta_area_count, meta_alloc_shift; + struct meta_area *meta_area_head, *meta_area_tail; + unsigned char *avail_meta_areas; + struct meta *active[48]; + size_t usage_by_class[48]; + uint8_t unmap_seq[32], bounces[32]; + uint8_t seq; + uintptr_t brk; +}; + +__attribute__((__visibility__("hidden"))) +extern struct malloc_context ctx; + +#ifdef PAGESIZE +#define PGSZ PAGESIZE +#else +#define PGSZ ctx.pagesize +#endif + +__attribute__((__visibility__("hidden"))) +struct meta *alloc_meta(void); + +__attribute__((__visibility__("hidden"))) +int is_allzero(void *); + +static inline void queue(struct meta **phead, struct meta *m) +{ + assert(!m->next); + assert(!m->prev); + if (*phead) { + struct meta *head = *phead; + m->next = head; + m->prev = head->prev; + m->next->prev = m->prev->next = m; + } else { + m->prev = m->next = m; + *phead = m; + } +} + +static inline void dequeue(struct meta **phead, struct meta *m) +{ + if (m->next != m) { + m->prev->next = m->next; + m->next->prev = m->prev; + if (*phead == m) *phead = m->next; + } else { + *phead = 0; + } + m->prev = m->next = 0; +} + +static inline struct meta *dequeue_head(struct meta **phead) +{ + struct meta *m = *phead; + if (m) dequeue(phead, m); + return m; +} + +static inline void free_meta(struct meta *m) +{ + *m = (struct meta){0}; + queue(&ctx.free_meta_head, m); +} + +static inline uint32_t activate_group(struct meta *m) +{ + assert(!m->avail_mask); + uint32_t mask, act = (2u<mem->active_idx)-1; + do mask = m->freed_mask; + while (a_cas(&m->freed_mask, mask, mask&~act)!=mask); + return m->avail_mask = mask & act; +} + +static inline int get_slot_index(const unsigned char *p) +{ + return p[-3] & 31; +} + +static inline struct meta *get_meta(const unsigned char *p) +{ + assert(!((uintptr_t)p & 15)); + int offset = *(const uint16_t *)(p - 2); + int index = get_slot_index(p); + if (p[-4]) { + assert(!offset); + offset = *(uint32_t *)(p - 8); + assert(offset > 0xffff); + } + const struct group *base = (const void *)(p - UNIT*offset - UNIT); + const struct meta *meta = base->meta; + assert(meta->mem == base); + assert(index <= meta->last_idx); + assert(!(meta->avail_mask & (1u<freed_mask & (1u<check == ctx.secret); + if (meta->sizeclass < 48) { + assert(offset >= size_classes[meta->sizeclass]*index); + assert(offset < size_classes[meta->sizeclass]*(index+1)); + } else { + assert(meta->sizeclass == 63); + } + if (meta->maplen) { + assert(offset <= meta->maplen*4096UL/UNIT - 1); + } + return (struct meta *)meta; +} + +static inline size_t get_nominal_size(const unsigned char *p, const unsigned char *end) +{ + size_t reserved = p[-3] >> 5; + if (reserved >= 5) { + assert(reserved == 5); + reserved = *(const uint32_t *)(end-4); + assert(reserved >= 5); + assert(!end[-5]); + } + assert(reserved <= end-p); + assert(!*(end-reserved)); + // also check the slot's overflow byte + assert(!*end); + return end-reserved-p; +} + +static inline size_t get_stride(const struct meta *g) +{ + if (!g->last_idx && g->maplen) { + return g->maplen*4096UL - UNIT; + } else { + return UNIT*size_classes[g->sizeclass]; + } +} + +static inline void set_size(unsigned char *p, unsigned char *end, size_t n) +{ + int reserved = end-p-n; + if (reserved) end[-reserved] = 0; + if (reserved >= 5) { + *(uint32_t *)(end-4) = reserved; + end[-5] = 0; + reserved = 5; + } + p[-3] = (p[-3]&31) + (reserved<<5); +} + +static inline void *enframe(struct meta *g, int idx, size_t n, int ctr) +{ + size_t stride = get_stride(g); + size_t slack = (stride-IB-n)/UNIT; + unsigned char *p = g->mem->storage + stride*idx; + unsigned char *end = p+stride-IB; + // cycle offset within slot to increase interval to address + // reuse, facilitate trapping double-free. + int off = (p[-3] ? *(uint16_t *)(p-2) + 1 : ctr) & 255; + assert(!p[-4]); + if (off > slack) { + size_t m = slack; + m |= m>>1; m |= m>>2; m |= m>>4; + off &= m; + if (off > slack) off -= slack+1; + assert(off <= slack); + } + if (off) { + // store offset in unused header at offset zero + // if enframing at non-zero offset. + *(uint16_t *)(p-2) = off; + p[-3] = 7<<5; + p += UNIT*off; + // for nonzero offset there is no permanent check + // byte, so make one. + p[-4] = 0; + } + *(uint16_t *)(p-2) = (size_t)(p-g->mem->storage)/UNIT; + p[-3] = idx; + set_size(p, end, n); + return p; +} + +static inline int size_to_class(size_t n) +{ + n = (n+IB-1)>>4; + if (n<10) return n; + n++; + int i = (28-a_clz_32(n))*4 + 8; + if (n>size_classes[i+1]) i+=2; + if (n>size_classes[i]) i++; + return i; +} + +static inline int size_overflows(size_t n) +{ + if (n >= SIZE_MAX/2 - 4096) { + errno = ENOMEM; + return 1; + } + return 0; +} + +static inline void step_seq(void) +{ + if (ctx.seq==255) { + for (int i=0; i<32; i++) ctx.unmap_seq[i] = 0; + ctx.seq = 1; + } else { + ctx.seq++; + } +} + +static inline void record_seq(int sc) +{ + if (sc-7U < 32) ctx.unmap_seq[sc-7] = ctx.seq; +} + +static inline void account_bounce(int sc) +{ + if (sc-7U < 32) { + int seq = ctx.unmap_seq[sc-7]; + if (seq && ctx.seq-seq < 10) { + if (ctx.bounces[sc-7]+1 < 100) + ctx.bounces[sc-7]++; + else + ctx.bounces[sc-7] = 150; + } + } +} + +static inline void decay_bounces(int sc) +{ + if (sc-7U < 32 && ctx.bounces[sc-7]) + ctx.bounces[sc-7]--; +} + +static inline int is_bouncing(int sc) +{ + return (sc-7U < 32 && ctx.bounces[sc-7] >= 100); +} + +#endif diff --git a/src/malloc/mallocng/realloc.c b/src/malloc/mallocng/realloc.c new file mode 100644 index 000000000..18769f42d --- /dev/null +++ b/src/malloc/mallocng/realloc.c @@ -0,0 +1,51 @@ +#define _GNU_SOURCE +#include +#include +#include +#include "meta.h" + +void *realloc(void *p, size_t n) +{ + if (!p) return malloc(n); + if (size_overflows(n)) return 0; + + struct meta *g = get_meta(p); + int idx = get_slot_index(p); + size_t stride = get_stride(g); + unsigned char *start = g->mem->storage + stride*idx; + unsigned char *end = start + stride - IB; + size_t old_size = get_nominal_size(p, end); + size_t avail_size = end-(unsigned char *)p; + void *new; + + // only resize in-place if size class matches + if (n <= avail_size && n= g->sizeclass) { + set_size(p, end, n); + return p; + } + + // use mremap if old and new size are both mmap-worthy + if (g->sizeclass>=48 && n>=MMAP_THRESHOLD) { + assert(g->sizeclass==63); + size_t base = (unsigned char *)p-start; + size_t needed = (n + base + UNIT + IB + 4095) & -4096; + new = g->maplen*4096UL == needed ? g->mem : + mremap(g->mem, g->maplen*4096UL, needed, MREMAP_MAYMOVE); + if (new!=MAP_FAILED) { + g->mem = new; + g->maplen = needed/4096; + p = g->mem->storage + base; + end = g->mem->storage + (needed - UNIT) - IB; + *end = 0; + set_size(p, end, n); + return p; + } + } + + new = malloc(n); + if (!new) return 0; + memcpy(new, p, n < old_size ? n : old_size); + free(p); + return new; +} From 3853388d29bee80f5633b39cd6d2f0dbea8cda91 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Tue, 30 Jun 2020 15:29:58 -0400 Subject: [PATCH 059/189] add malloc implementation selection to configure the intent here is to keep oldmalloc as an option, at least for the short term, in case any users are negatively impacted in some way by mallocng and need to fallback until their issues are resolved. --- configure | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/configure b/configure index 435988258..0dfd2d170 100755 --- a/configure +++ b/configure @@ -35,6 +35,9 @@ Optional features: --disable-shared inhibit building shared library [enabled] --disable-static inhibit building static library [enabled] +Optional packages: + --with-malloc=... choose malloc implementation [oldmalloc] + Some influential environment variables: CC C compiler command [detected] CFLAGS C compiler flags [-Os -pipe ...] @@ -139,6 +142,7 @@ static=yes wrapper=auto gcc_wrapper=no clang_wrapper=no +malloc_dir=oldmalloc for arg ; do case "$arg" in @@ -168,6 +172,7 @@ case "$arg" in --disable-wrapper|--enable-wrapper=no) wrapper=no ;; --enable-gcc-wrapper|--enable-gcc-wrapper=yes) wrapper=yes ; gcc_wrapper=yes ;; --disable-gcc-wrapper|--enable-gcc-wrapper=no) wrapper=no ;; +--with-malloc=*) malloc_dir=${arg#*=} ;; --enable-*|--disable-*|--with-*|--without-*|--*dir=*) ;; --host=*|--target=*) target=${arg#*=} ;; --build=*) build=${arg#*=} ;; @@ -214,6 +219,12 @@ done set +C trap 'rm "$tmpc"' EXIT INT QUIT TERM HUP +# +# Check that the requested malloc implementation exists +# +test -d "$srcdir/src/malloc/$malloc_dir" \ +|| fail "$0: error: chosen malloc implementation '$malloc_dir' does not exist" + # # Check whether we are cross-compiling, and set a default # CROSS_COMPILE prefix if none was provided. @@ -779,6 +790,7 @@ OPTIMIZE_GLOBS = $OPTIMIZE_GLOBS ALL_TOOLS = $tools TOOL_LIBS = $tool_libs ADD_CFI = $ADD_CFI +MALLOC_DIR = $malloc_dir EOF test "x$static" = xno && echo "STATIC_LIBS =" test "x$shared" = xno && echo "SHARED_LIBS =" From 8fac75738be08d72c92e7cbb793645dcd5fdfafc Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Tue, 30 Jun 2020 15:30:27 -0400 Subject: [PATCH 060/189] make mallocng the default malloc implementation --- Makefile | 2 +- configure | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 3d3e3622b..e8cc44367 100644 --- a/Makefile +++ b/Makefile @@ -17,7 +17,7 @@ includedir = $(prefix)/include libdir = $(prefix)/lib syslibdir = /lib -MALLOC_DIR = oldmalloc +MALLOC_DIR = mallocng SRC_DIRS = $(addprefix $(srcdir)/,src/* src/malloc/$(MALLOC_DIR) crt ldso $(COMPAT_SRC_DIRS)) BASE_GLOBS = $(addsuffix /*.c,$(SRC_DIRS)) ARCH_GLOBS = $(addsuffix /$(ARCH)/*.[csS],$(SRC_DIRS)) diff --git a/configure b/configure index 0dfd2d170..18fda9afc 100755 --- a/configure +++ b/configure @@ -36,7 +36,7 @@ Optional features: --disable-static inhibit building static library [enabled] Optional packages: - --with-malloc=... choose malloc implementation [oldmalloc] + --with-malloc=... choose malloc implementation [mallocng] Some influential environment variables: CC C compiler command [detected] @@ -142,7 +142,7 @@ static=yes wrapper=auto gcc_wrapper=no clang_wrapper=no -malloc_dir=oldmalloc +malloc_dir=mallocng for arg ; do case "$arg" in From 05fa53ba36785eeb73f0a83e0a64fb010bae232c Mon Sep 17 00:00:00 2001 From: Julien Ramseier Date: Wed, 1 Jul 2020 15:12:14 +0200 Subject: [PATCH 061/189] vfscanf: fix possible invalid free due to uninitialized variable use vfscanf() may use the variable 'alloc' uninitialized when taking the branch introduced by commit b287cd745c2243f8e5114331763a5a9813b5f6ee. Spotted by clang. --- src/stdio/vfscanf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/stdio/vfscanf.c b/src/stdio/vfscanf.c index b5ebc16ea..b78a374d3 100644 --- a/src/stdio/vfscanf.c +++ b/src/stdio/vfscanf.c @@ -57,7 +57,7 @@ int vfscanf(FILE *restrict f, const char *restrict fmt, va_list ap) { int width; int size; - int alloc; + int alloc = 0; int base; const unsigned char *p; int c, t; From 07ff8810fae19f1666f8fb68992a414340fa8ce8 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Sun, 5 Jul 2020 13:49:55 -0400 Subject: [PATCH 062/189] fix C implementation of a_clz_32 this broke mallocng size_to_class on archs without a native implementation of a_clz_32. the incorrect logic seems to have been something i derived from a related but distinct log2-type operation. with the change made here, it passes an exhaustive test. as this function is new and presently only used by mallocng, no other functionality was affected. --- src/internal/atomic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/internal/atomic.h b/src/internal/atomic.h index 99539cc03..96c1552d6 100644 --- a/src/internal/atomic.h +++ b/src/internal/atomic.h @@ -319,7 +319,7 @@ static inline int a_clz_64(uint64_t x) #define a_clz_32 a_clz_32 static inline int a_clz_32(uint32_t x) { - x--; + x >>= 1; x |= x >> 1; x |= x >> 2; x |= x >> 4; From 9537dae0bae79d72302701155eaf4afa99d5502f Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Mon, 6 Jul 2020 17:56:19 -0400 Subject: [PATCH 063/189] make thread killlock async-signal-safe for pthread_kill pthread_kill is required to be AS-safe. that requirement can't be met if the target thread's killlock can be taken in contexts where application-installed signal handlers can run. block signals around use of this lock in all pthread_* functions which target a tid, and reorder blocking/unblocking of signals in pthread_exit so that they're blocked whenever the killlock is held. --- src/thread/pthread_create.c | 11 ++++++----- src/thread/pthread_getschedparam.c | 3 +++ src/thread/pthread_kill.c | 3 +++ src/thread/pthread_setschedparam.c | 3 +++ src/thread/pthread_setschedprio.c | 3 +++ 5 files changed, 18 insertions(+), 5 deletions(-) diff --git a/src/thread/pthread_create.c b/src/thread/pthread_create.c index 6bdfb44f9..10f1b7d8c 100644 --- a/src/thread/pthread_create.c +++ b/src/thread/pthread_create.c @@ -72,12 +72,13 @@ _Noreturn void __pthread_exit(void *result) /* Access to target the exiting thread with syscalls that use * its kernel tid is controlled by killlock. For detached threads, * any use past this point would have undefined behavior, but for - * joinable threads it's a valid usage that must be handled. */ + * joinable threads it's a valid usage that must be handled. + * Signals must be blocked since pthread_kill must be AS-safe. */ + __block_app_sigs(&set); LOCK(self->killlock); - /* The thread list lock must be AS-safe, and thus requires - * application signals to be blocked before it can be taken. */ - __block_app_sigs(&set); + /* The thread list lock must be AS-safe, and thus depends on + * application signals being blocked above. */ __tl_lock(); /* If this is the only thread in the list, don't proceed with @@ -85,8 +86,8 @@ _Noreturn void __pthread_exit(void *result) * signal state to prepare for exit to call atexit handlers. */ if (self->next == self) { __tl_unlock(); - __restore_sigs(&set); UNLOCK(self->killlock); + __restore_sigs(&set); exit(0); } diff --git a/src/thread/pthread_getschedparam.c b/src/thread/pthread_getschedparam.c index 1cba073d0..c098befb1 100644 --- a/src/thread/pthread_getschedparam.c +++ b/src/thread/pthread_getschedparam.c @@ -4,6 +4,8 @@ int pthread_getschedparam(pthread_t t, int *restrict policy, struct sched_param *restrict param) { int r; + sigset_t set; + __block_app_sigs(&set); LOCK(t->killlock); if (!t->tid) { r = ESRCH; @@ -14,5 +16,6 @@ int pthread_getschedparam(pthread_t t, int *restrict policy, struct sched_param } } UNLOCK(t->killlock); + __restore_sigs(&set); return r; } diff --git a/src/thread/pthread_kill.c b/src/thread/pthread_kill.c index 3d9395cb7..446254b68 100644 --- a/src/thread/pthread_kill.c +++ b/src/thread/pthread_kill.c @@ -4,9 +4,12 @@ int pthread_kill(pthread_t t, int sig) { int r; + sigset_t set; + __block_app_sigs(&set); LOCK(t->killlock); r = t->tid ? -__syscall(SYS_tkill, t->tid, sig) : (sig+0U >= _NSIG ? EINVAL : 0); UNLOCK(t->killlock); + __restore_sigs(&set); return r; } diff --git a/src/thread/pthread_setschedparam.c b/src/thread/pthread_setschedparam.c index 038d13d8a..76d4d45a3 100644 --- a/src/thread/pthread_setschedparam.c +++ b/src/thread/pthread_setschedparam.c @@ -4,8 +4,11 @@ int pthread_setschedparam(pthread_t t, int policy, const struct sched_param *param) { int r; + sigset_t set; + __block_app_sigs(&set); LOCK(t->killlock); r = !t->tid ? ESRCH : -__syscall(SYS_sched_setscheduler, t->tid, policy, param); UNLOCK(t->killlock); + __restore_sigs(&set); return r; } diff --git a/src/thread/pthread_setschedprio.c b/src/thread/pthread_setschedprio.c index 5bf4a0197..fc2e13ddb 100644 --- a/src/thread/pthread_setschedprio.c +++ b/src/thread/pthread_setschedprio.c @@ -4,8 +4,11 @@ int pthread_setschedprio(pthread_t t, int prio) { int r; + sigset_t set; + __block_app_sigs(&set); LOCK(t->killlock); r = !t->tid ? ESRCH : -__syscall(SYS_sched_setparam, t->tid, &prio); UNLOCK(t->killlock); + __restore_sigs(&set); return r; } From a3a65f341666d9a7cfd02df1799939c6385d2416 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Mon, 6 Jul 2020 18:46:57 -0400 Subject: [PATCH 064/189] fix async-cancel-safety of pthread_cancel the previous commit addressing async-signal-safety issues around pthread_kill did not fully fix pthread_cancel, which is also required (albeit rather irrationally) to be async-cancel-safe. without blocking implementation-internal signals, it's possible that, when async cancellation is enabled, a cancel signal sent by another thread interrupts pthread_kill while the killlock for a targeted thread is held. as a result, the calling thread will terminate due to cancellation without ever unlocking the targeted thread's killlock, and thus the targeted thread will be unable to exit. --- src/thread/pthread_kill.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/thread/pthread_kill.c b/src/thread/pthread_kill.c index 446254b68..79ddb2097 100644 --- a/src/thread/pthread_kill.c +++ b/src/thread/pthread_kill.c @@ -5,7 +5,10 @@ int pthread_kill(pthread_t t, int sig) { int r; sigset_t set; - __block_app_sigs(&set); + /* Block not just app signals, but internal ones too, since + * pthread_kill is used to implement pthread_cancel, which + * must be async-cancel-safe. */ + __block_all_sigs(&set); LOCK(t->killlock); r = t->tid ? -__syscall(SYS_tkill, t->tid, sig) : (sig+0U >= _NSIG ? EINVAL : 0); From 2114a3f4f4d96278410e319c7a1c8937ff251e07 Mon Sep 17 00:00:00 2001 From: Bartosz Brachaczek Date: Fri, 17 Jul 2020 01:16:28 +0200 Subject: [PATCH 065/189] getentropy: fix UB if len==0 if len==0, an uninitalized variable would be returned --- src/misc/getentropy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/misc/getentropy.c b/src/misc/getentropy.c index d2f282ce8..651ea95f1 100644 --- a/src/misc/getentropy.c +++ b/src/misc/getentropy.c @@ -6,7 +6,7 @@ int getentropy(void *buffer, size_t len) { - int cs, ret; + int cs, ret = 0; char *pos = buffer; if (len > 256) { From 648805a415506fbc67b07099900b7496ebab7502 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Sun, 2 Aug 2020 23:31:51 -0400 Subject: [PATCH 066/189] add m68k sqrtl using native instruction this is actually a functional fix at present, since the C sqrtl does not support ld80 and just wraps double sqrt. once that's fixed it will just be an optimization. --- src/math/m68k/sqrtl.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 src/math/m68k/sqrtl.c diff --git a/src/math/m68k/sqrtl.c b/src/math/m68k/sqrtl.c new file mode 100644 index 000000000..b1c303c7e --- /dev/null +++ b/src/math/m68k/sqrtl.c @@ -0,0 +1,15 @@ +#include + +#if __HAVE_68881__ + +long double sqrtl(long double x) +{ + __asm__ ("fsqrt.x %1,%0" : "=f"(x) : "fm"(x)); + return x; +} + +#else + +#include "../sqrtl.c" + +#endif From dde3277b1b6594afd250d36f92cbfb1bfdf5d96f Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Tue, 4 Aug 2020 00:21:09 -0400 Subject: [PATCH 067/189] release 1.2.1 --- VERSION | 2 +- WHATSNEW | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 26aaba0e8..6085e9465 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.2.0 +1.2.1 diff --git a/WHATSNEW b/WHATSNEW index ecf0cebab..d9826fc0e 100644 --- a/WHATSNEW +++ b/WHATSNEW @@ -2200,3 +2200,39 @@ arch-specific bugs fixed: - arm dynamic linker chose wrong tls/atomic variants since 1.1.21 - some math library functions returned excess precision on i386 - unconfirmed regression in fchmodat AT_SYMLINK_NOFOLLOW on mips* + + + +1.2.1 release notes + +major changes: +- new malloc implementation (mallocng & overhauled bump allocator) + +new features: +- DNS queries via res_* now set AD flag, report zone signedness (DNSSEC) +- PTHREAD_NULL macro (POSIX-future) + +performance: +- optimized memcpy and memset for aarch64 +- optimized memcpy for arm now supports big endian +- optimized x86_64 remquol +- improved strerror without linear search + +bugs fixed: +- lock-skipping for processes that returned to single-threaded was wrong +- AF_UNSPEC dns lookups mishandled single failure in paired A+AAAA +- res_send and res_query returned wrong value on errors from nameserver +- corrupted sysvipc timestamps on 32-bit archs with old kernels +- incorrect parsing of timezone offsets after overly-long zone name +- clock_adjtime was broken on 32-bit archs (time64) +- pthread_kill as not async-signal-safe +- pthread_cancel was not async-cancel-safe +- large-ulp errors in various math functions in non-default rounding modes + +arch-specific bugs fixed: +- arm clock_gettime was broken on some hw due to bad time64 vdso +- m68k sqrtl lacked long double precision +- mips* syscall mechanism regressions on older kernels +- mips* had negated error codes for some syscalls (kernel bug) +- mips* SIGEMT was wrongly called SIGSTKFLT +- sh fesetround didn't work correctly on sh From a1f5cf3eea0205cb43e122d2b6fc85e5e5597579 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Tue, 4 Aug 2020 14:20:40 -0400 Subject: [PATCH 068/189] in hosts file lookups, use only first match for canonical name the existing code clobbered the canonical name already discovered every time another matching line was found, which will necessarily be the case when a hostname has both IPv4 and v6 definitions. patch by Wolf. --- src/network/lookup_name.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/network/lookup_name.c b/src/network/lookup_name.c index aae0d95a0..046248456 100644 --- a/src/network/lookup_name.c +++ b/src/network/lookup_name.c @@ -50,7 +50,7 @@ static int name_from_hosts(struct address buf[static MAXADDRS], char canon[stati { char line[512]; size_t l = strlen(name); - int cnt = 0, badfam = 0; + int cnt = 0, badfam = 0, have_canon = 0; unsigned char _buf[1032]; FILE _f, *f = __fopen_rb_ca("/etc/hosts", &_f, _buf, sizeof _buf); if (!f) switch (errno) { @@ -83,11 +83,16 @@ static int name_from_hosts(struct address buf[static MAXADDRS], char canon[stati continue; } + if (have_canon) continue; + /* Extract first name as canonical name */ for (; *p && isspace(*p); p++); for (z=p; *z && !isspace(*z); z++); *z = 0; - if (is_valid_hostname(p)) memcpy(canon, p, z-p+1); + if (is_valid_hostname(p)) { + have_canon = 1; + memcpy(canon, p, z-p+1); + } } __fclose_ca(f); return cnt ? cnt : badfam; From 155e1fa0a0f014d9e3321cac2246d0eec2b0d15e Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Wed, 5 Aug 2020 21:35:00 -0400 Subject: [PATCH 069/189] in hosts file lookups, honor first canonical name regardless of family prior to this change, the canonical name came from the first hosts file line matching the requested family, so the canonical name for a given hostname could differ depending on whether it was requested with AF_UNSPEC or a particular family (AF_INET or AF_INET6). now, the canonical name is deterministically the first one to appear with the requested name as an alias. --- src/network/lookup_name.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/network/lookup_name.c b/src/network/lookup_name.c index 046248456..aa558c197 100644 --- a/src/network/lookup_name.c +++ b/src/network/lookup_name.c @@ -80,7 +80,7 @@ static int name_from_hosts(struct address buf[static MAXADDRS], char canon[stati continue; default: badfam = EAI_NONAME; - continue; + break; } if (have_canon) continue; From c20833694960b12adb082c7191362ecb34db31dd Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Sat, 13 Jun 2020 22:03:13 +0000 Subject: [PATCH 070/189] math: new software sqrt approximate 1/sqrt(x) and sqrt(x) with goldschmidt iterations. this is known to be a fast method for computing sqrt, but it is tricky to get right, so added detailed comments. use a lookup table for the initial estimate, this adds 256bytes rodata but it can be shared between sqrt, sqrtf and sqrtl. this saves one iteration compared to a linear estimate. this is for soft float targets, but it supports fenv by using a floating-point operation to get the final result. the result is correctly rounded in all rounding modes. if fenv support is turned off then the nearest rounded result is computed and inexact exception is not signaled. assumes fast 32bit integer arithmetics and 32 to 64bit mul. --- src/math/sqrt.c | 320 ++++++++++++++++++++----------------------- src/math/sqrt_data.c | 19 +++ src/math/sqrt_data.h | 13 ++ 3 files changed, 179 insertions(+), 173 deletions(-) create mode 100644 src/math/sqrt_data.c create mode 100644 src/math/sqrt_data.h diff --git a/src/math/sqrt.c b/src/math/sqrt.c index f1f6d76c7..5ba265596 100644 --- a/src/math/sqrt.c +++ b/src/math/sqrt.c @@ -1,184 +1,158 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/e_sqrt.c */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunSoft, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ -/* sqrt(x) - * Return correctly rounded sqrt. - * ------------------------------------------ - * | Use the hardware sqrt if you have one | - * ------------------------------------------ - * Method: - * Bit by bit method using integer arithmetic. (Slow, but portable) - * 1. Normalization - * Scale x to y in [1,4) with even powers of 2: - * find an integer k such that 1 <= (y=x*2^(2k)) < 4, then - * sqrt(x) = 2^k * sqrt(y) - * 2. Bit by bit computation - * Let q = sqrt(y) truncated to i bit after binary point (q = 1), - * i 0 - * i+1 2 - * s = 2*q , and y = 2 * ( y - q ). (1) - * i i i i - * - * To compute q from q , one checks whether - * i+1 i - * - * -(i+1) 2 - * (q + 2 ) <= y. (2) - * i - * -(i+1) - * If (2) is false, then q = q ; otherwise q = q + 2 . - * i+1 i i+1 i - * - * With some algebric manipulation, it is not difficult to see - * that (2) is equivalent to - * -(i+1) - * s + 2 <= y (3) - * i i - * - * The advantage of (3) is that s and y can be computed by - * i i - * the following recurrence formula: - * if (3) is false - * - * s = s , y = y ; (4) - * i+1 i i+1 i - * - * otherwise, - * -i -(i+1) - * s = s + 2 , y = y - s - 2 (5) - * i+1 i i+1 i i - * - * One may easily use induction to prove (4) and (5). - * Note. Since the left hand side of (3) contain only i+2 bits, - * it does not necessary to do a full (53-bit) comparison - * in (3). - * 3. Final rounding - * After generating the 53 bits result, we compute one more bit. - * Together with the remainder, we can decide whether the - * result is exact, bigger than 1/2ulp, or less than 1/2ulp - * (it will never equal to 1/2ulp). - * The rounding mode can be detected by checking whether - * huge + tiny is equal to huge, and whether huge - tiny is - * equal to huge for some floating point number "huge" and "tiny". - * - * Special cases: - * sqrt(+-0) = +-0 ... exact - * sqrt(inf) = inf - * sqrt(-ve) = NaN ... with invalid signal - * sqrt(NaN) = NaN ... with invalid signal for signaling NaN - */ - +#include +#include #include "libm.h" +#include "sqrt_data.h" -static const double tiny = 1.0e-300; +#define FENV_SUPPORT 1 -double sqrt(double x) +/* returns a*b*2^-32 - e, with error 0 <= e < 1. */ +static inline uint32_t mul32(uint32_t a, uint32_t b) { - double z; - int32_t sign = (int)0x80000000; - int32_t ix0,s0,q,m,t,i; - uint32_t r,t1,s1,ix1,q1; + return (uint64_t)a*b >> 32; +} - EXTRACT_WORDS(ix0, ix1, x); +/* returns a*b*2^-64 - e, with error 0 <= e < 3. */ +static inline uint64_t mul64(uint64_t a, uint64_t b) +{ + uint64_t ahi = a>>32; + uint64_t alo = a&0xffffffff; + uint64_t bhi = b>>32; + uint64_t blo = b&0xffffffff; + return ahi*bhi + (ahi*blo >> 32) + (alo*bhi >> 32); +} - /* take care of Inf and NaN */ - if ((ix0&0x7ff00000) == 0x7ff00000) { - return x*x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ - } - /* take care of zero */ - if (ix0 <= 0) { - if (((ix0&~sign)|ix1) == 0) - return x; /* sqrt(+-0) = +-0 */ - if (ix0 < 0) - return (x-x)/(x-x); /* sqrt(-ve) = sNaN */ - } - /* normalize x */ - m = ix0>>20; - if (m == 0) { /* subnormal x */ - while (ix0 == 0) { - m -= 21; - ix0 |= (ix1>>11); - ix1 <<= 21; - } - for (i=0; (ix0&0x00100000) == 0; i++) - ix0<<=1; - m -= i - 1; - ix0 |= ix1>>(32-i); - ix1 <<= i; - } - m -= 1023; /* unbias exponent */ - ix0 = (ix0&0x000fffff)|0x00100000; - if (m & 1) { /* odd m, double x to make it even */ - ix0 += ix0 + ((ix1&sign)>>31); - ix1 += ix1; - } - m >>= 1; /* m = [m/2] */ - - /* generate sqrt(x) bit by bit */ - ix0 += ix0 + ((ix1&sign)>>31); - ix1 += ix1; - q = q1 = s0 = s1 = 0; /* [q,q1] = sqrt(x) */ - r = 0x00200000; /* r = moving bit from right to left */ - - while (r != 0) { - t = s0 + r; - if (t <= ix0) { - s0 = t + r; - ix0 -= t; - q += r; - } - ix0 += ix0 + ((ix1&sign)>>31); - ix1 += ix1; - r >>= 1; - } +double sqrt(double x) +{ + uint64_t ix, top, m; - r = sign; - while (r != 0) { - t1 = s1 + r; - t = s0; - if (t < ix0 || (t == ix0 && t1 <= ix1)) { - s1 = t1 + r; - if ((t1&sign) == sign && (s1&sign) == 0) - s0++; - ix0 -= t; - if (ix1 < t1) - ix0--; - ix1 -= t1; - q1 += r; - } - ix0 += ix0 + ((ix1&sign)>>31); - ix1 += ix1; - r >>= 1; + /* special case handling. */ + ix = asuint64(x); + top = ix >> 52; + if (predict_false(top - 0x001 >= 0x7ff - 0x001)) { + /* x < 0x1p-1022 or inf or nan. */ + if (ix * 2 == 0) + return x; + if (ix == 0x7ff0000000000000) + return x; + if (ix > 0x7ff0000000000000) + return __math_invalid(x); + /* x is subnormal, normalize it. */ + ix = asuint64(x * 0x1p52); + top = ix >> 52; + top -= 52; } - /* use floating add to find out rounding direction */ - if ((ix0|ix1) != 0) { - z = 1.0 - tiny; /* raise inexact flag */ - if (z >= 1.0) { - z = 1.0 + tiny; - if (q1 == (uint32_t)0xffffffff) { - q1 = 0; - q++; - } else if (z > 1.0) { - if (q1 == (uint32_t)0xfffffffe) - q++; - q1 += 2; - } else - q1 += q1 & 1; - } + /* argument reduction: + x = 4^e m; with integer e, and m in [1, 4) + m: fixed point representation [2.62] + 2^e is the exponent part of the result. */ + int even = top & 1; + m = (ix << 11) | 0x8000000000000000; + if (even) m >>= 1; + top = (top + 0x3ff) >> 1; + + /* approximate r ~ 1/sqrt(m) and s ~ sqrt(m) when m in [1,4) + + initial estimate: + 7bit table lookup (1bit exponent and 6bit significand). + + iterative approximation: + using 2 goldschmidt iterations with 32bit int arithmetics + and a final iteration with 64bit int arithmetics. + + details: + + the relative error (e = r0 sqrt(m)-1) of a linear estimate + (r0 = a m + b) is |e| < 0.085955 ~ 0x1.6p-4 at best, + a table lookup is faster and needs one less iteration + 6 bit lookup table (128b) gives |e| < 0x1.f9p-8 + 7 bit lookup table (256b) gives |e| < 0x1.fdp-9 + for single and double prec 6bit is enough but for quad + prec 7bit is needed (or modified iterations). to avoid + one more iteration >=13bit table would be needed (16k). + + a newton-raphson iteration for r is + w = r*r + u = 3 - m*w + r = r*u/2 + can use a goldschmidt iteration for s at the end or + s = m*r + + first goldschmidt iteration is + s = m*r + u = 3 - s*r + r = r*u/2 + s = s*u/2 + next goldschmidt iteration is + u = 3 - s*r + r = r*u/2 + s = s*u/2 + and at the end r is not computed only s. + + they use the same amount of operations and converge at the + same quadratic rate, i.e. if + r1 sqrt(m) - 1 = e, then + r2 sqrt(m) - 1 = -3/2 e^2 - 1/2 e^3 + the advantage of goldschmidt is that the mul for s and r + are independent (computed in parallel), however it is not + "self synchronizing": it only uses the input m in the + first iteration so rounding errors accumulate. at the end + or when switching to larger precision arithmetics rounding + errors dominate so the first iteration should be used. + + the fixed point representations are + m: 2.30 r: 0.32, s: 2.30, d: 2.30, u: 2.30, three: 2.30 + and after switching to 64 bit + m: 2.62 r: 0.64, s: 2.62, d: 2.62, u: 2.62, three: 2.62 */ + + static const uint64_t three = 0xc0000000; + uint64_t r, s, d, u, i; + + i = (ix >> 46) % 128; + r = (uint32_t)__rsqrt_tab[i] << 16; + /* |r sqrt(m) - 1| < 0x1.fdp-9 */ + s = mul32(m>>32, r); + /* |s/sqrt(m) - 1| < 0x1.fdp-9 */ + d = mul32(s, r); + u = three - d; + r = mul32(r, u) << 1; + /* |r sqrt(m) - 1| < 0x1.7bp-16 */ + s = mul32(s, u) << 1; + /* |s/sqrt(m) - 1| < 0x1.7bp-16 */ + d = mul32(s, r); + u = three - d; + r = mul32(r, u) << 1; + /* |r sqrt(m) - 1| < 0x1.3704p-29 (measured worst-case) */ + r = r << 32; + s = mul64(m, r); + d = mul64(s, r); + u = (three<<32) - d; + s = mul64(s, u); /* repr: 3.61 */ + /* -0x1p-57 < s - sqrt(m) < 0x1.8001p-61 */ + s = (s - 2) >> 9; /* repr: 12.52 */ + /* -0x1.09p-52 < s - sqrt(m) < -0x1.fffcp-63 */ + + /* s < sqrt(m) < s + 0x1.09p-52, + compute nearest rounded result: + the nearest result to 52 bits is either s or s+0x1p-52, + we can decide by comparing (2^52 s + 0.5)^2 to 2^104 m. */ + uint64_t d0, d1, d2; + double y, t; + d0 = (m << 42) - s*s; + d1 = s - d0; + d2 = d1 + s + 1; + s += d1 >> 63; + s &= 0x000fffffffffffff; + s |= top << 52; + y = asdouble(s); + if (FENV_SUPPORT) { + /* handle rounding modes and inexact exception: + only (s+1)^2 == 2^42 m case is exact otherwise + add a tiny value to cause the fenv effects. */ + uint64_t tiny = predict_false(d2==0) ? 0 : 0x0010000000000000; + tiny |= (d1^d2) & 0x8000000000000000; + t = asdouble(tiny); + y = eval_as_double(y + t); } - ix0 = (q>>1) + 0x3fe00000; - ix1 = q1>>1; - if (q&1) - ix1 |= sign; - INSERT_WORDS(z, ix0 + ((uint32_t)m << 20), ix1); - return z; + return y; } diff --git a/src/math/sqrt_data.c b/src/math/sqrt_data.c new file mode 100644 index 000000000..61bc22f43 --- /dev/null +++ b/src/math/sqrt_data.c @@ -0,0 +1,19 @@ +#include "sqrt_data.h" +const uint16_t __rsqrt_tab[128] = { +0xb451,0xb2f0,0xb196,0xb044,0xaef9,0xadb6,0xac79,0xab43, +0xaa14,0xa8eb,0xa7c8,0xa6aa,0xa592,0xa480,0xa373,0xa26b, +0xa168,0xa06a,0x9f70,0x9e7b,0x9d8a,0x9c9d,0x9bb5,0x9ad1, +0x99f0,0x9913,0x983a,0x9765,0x9693,0x95c4,0x94f8,0x9430, +0x936b,0x92a9,0x91ea,0x912e,0x9075,0x8fbe,0x8f0a,0x8e59, +0x8daa,0x8cfe,0x8c54,0x8bac,0x8b07,0x8a64,0x89c4,0x8925, +0x8889,0x87ee,0x8756,0x86c0,0x862b,0x8599,0x8508,0x8479, +0x83ec,0x8361,0x82d8,0x8250,0x81c9,0x8145,0x80c2,0x8040, +0xff02,0xfd0e,0xfb25,0xf947,0xf773,0xf5aa,0xf3ea,0xf234, +0xf087,0xeee3,0xed47,0xebb3,0xea27,0xe8a3,0xe727,0xe5b2, +0xe443,0xe2dc,0xe17a,0xe020,0xdecb,0xdd7d,0xdc34,0xdaf1, +0xd9b3,0xd87b,0xd748,0xd61a,0xd4f1,0xd3cd,0xd2ad,0xd192, +0xd07b,0xcf69,0xce5b,0xcd51,0xcc4a,0xcb48,0xca4a,0xc94f, +0xc858,0xc764,0xc674,0xc587,0xc49d,0xc3b7,0xc2d4,0xc1f4, +0xc116,0xc03c,0xbf65,0xbe90,0xbdbe,0xbcef,0xbc23,0xbb59, +0xba91,0xb9cc,0xb90a,0xb84a,0xb78c,0xb6d0,0xb617,0xb560, +}; diff --git a/src/math/sqrt_data.h b/src/math/sqrt_data.h new file mode 100644 index 000000000..260c7f9c2 --- /dev/null +++ b/src/math/sqrt_data.h @@ -0,0 +1,13 @@ +#ifndef _SQRT_DATA_H +#define _SQRT_DATA_H + +#include +#include + +/* if x in [1,2): i = (int)(64*x); + if x in [2,4): i = (int)(32*x-64); + __rsqrt_tab[i]*2^-16 is estimating 1/sqrt(x) with small relative error: + |__rsqrt_tab[i]*0x1p-16*sqrt(x) - 1| < -0x1.fdp-9 < 2^-8 */ +extern hidden const uint16_t __rsqrt_tab[128]; + +#endif From bfd77d268c72b4205343077a2aac9ece557c413c Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Fri, 12 Jun 2020 17:34:28 +0000 Subject: [PATCH 071/189] math: new software sqrtf same method as in sqrt, this was tested on all inputs against an sqrtf instruction. (the only difference found was that x86 sqrtf does not signal the x86 specific input-denormal exception on negative subnormal inputs while the software sqrtf does, this is fine as it was designed for ieee754 exceptions only.) there is known faster method: "Computing Floating-Point Square Roots via Bivariate Polynomial Evaluation" that computes sqrtf directly via pipelined polynomial evaluation which allows more parallelism, but the design does not generalize easily to higher precisions. --- src/math/sqrtf.c | 140 +++++++++++++++++++++++------------------------ 1 file changed, 70 insertions(+), 70 deletions(-) diff --git a/src/math/sqrtf.c b/src/math/sqrtf.c index d6ace38aa..740d81cba 100644 --- a/src/math/sqrtf.c +++ b/src/math/sqrtf.c @@ -1,83 +1,83 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/e_sqrtf.c */ -/* - * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. - */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - +#include +#include #include "libm.h" +#include "sqrt_data.h" -static const float tiny = 1.0e-30; +#define FENV_SUPPORT 1 -float sqrtf(float x) +static inline uint32_t mul32(uint32_t a, uint32_t b) { - float z; - int32_t sign = (int)0x80000000; - int32_t ix,s,q,m,t,i; - uint32_t r; + return (uint64_t)a*b >> 32; +} - GET_FLOAT_WORD(ix, x); +/* see sqrt.c for more detailed comments. */ - /* take care of Inf and NaN */ - if ((ix&0x7f800000) == 0x7f800000) - return x*x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ +float sqrtf(float x) +{ + uint32_t ix, m, m1, m0, even, ey; - /* take care of zero */ - if (ix <= 0) { - if ((ix&~sign) == 0) - return x; /* sqrt(+-0) = +-0 */ - if (ix < 0) - return (x-x)/(x-x); /* sqrt(-ve) = sNaN */ - } - /* normalize x */ - m = ix>>23; - if (m == 0) { /* subnormal x */ - for (i = 0; (ix&0x00800000) == 0; i++) - ix<<=1; - m -= i - 1; + ix = asuint(x); + if (predict_false(ix - 0x00800000 >= 0x7f800000 - 0x00800000)) { + /* x < 0x1p-126 or inf or nan. */ + if (ix * 2 == 0) + return x; + if (ix == 0x7f800000) + return x; + if (ix > 0x7f800000) + return __math_invalidf(x); + /* x is subnormal, normalize it. */ + ix = asuint(x * 0x1p23f); + ix -= 23 << 23; } - m -= 127; /* unbias exponent */ - ix = (ix&0x007fffff)|0x00800000; - if (m&1) /* odd m, double x to make it even */ - ix += ix; - m >>= 1; /* m = [m/2] */ - /* generate sqrt(x) bit by bit */ - ix += ix; - q = s = 0; /* q = sqrt(x) */ - r = 0x01000000; /* r = moving bit from right to left */ + /* x = 4^e m; with int e and m in [1, 4). */ + even = ix & 0x00800000; + m1 = (ix << 8) | 0x80000000; + m0 = (ix << 7) & 0x7fffffff; + m = even ? m0 : m1; - while (r != 0) { - t = s + r; - if (t <= ix) { - s = t+r; - ix -= t; - q += r; - } - ix += ix; - r >>= 1; - } + /* 2^e is the exponent part of the return value. */ + ey = ix >> 1; + ey += 0x3f800000 >> 1; + ey &= 0x7f800000; + + /* compute r ~ 1/sqrt(m), s ~ sqrt(m) with 2 goldschmidt iterations. */ + static const uint32_t three = 0xc0000000; + uint32_t r, s, d, u, i; + i = (ix >> 17) % 128; + r = (uint32_t)__rsqrt_tab[i] << 16; + /* |r*sqrt(m) - 1| < 0x1p-8 */ + s = mul32(m, r); + /* |s/sqrt(m) - 1| < 0x1p-8 */ + d = mul32(s, r); + u = three - d; + r = mul32(r, u) << 1; + /* |r*sqrt(m) - 1| < 0x1.7bp-16 */ + s = mul32(s, u) << 1; + /* |s/sqrt(m) - 1| < 0x1.7bp-16 */ + d = mul32(s, r); + u = three - d; + s = mul32(s, u); + /* -0x1.03p-28 < s/sqrt(m) - 1 < 0x1.fp-31 */ + s = (s - 1)>>6; + /* s < sqrt(m) < s + 0x1.08p-23 */ - /* use floating add to find out rounding direction */ - if (ix != 0) { - z = 1.0f - tiny; /* raise inexact flag */ - if (z >= 1.0f) { - z = 1.0f + tiny; - if (z > 1.0f) - q += 2; - else - q += q & 1; - } + /* compute nearest rounded result. */ + uint32_t d0, d1, d2; + float y, t; + d0 = (m << 16) - s*s; + d1 = s - d0; + d2 = d1 + s + 1; + s += d1 >> 31; + s &= 0x007fffff; + s |= ey; + y = asfloat(s); + if (FENV_SUPPORT) { + /* handle rounding and inexact exception. */ + uint32_t tiny = predict_false(d2==0) ? 0 : 0x01000000; + tiny |= (d1^d2) & 0x80000000; + t = asfloat(tiny); + y = eval_as_float(y + t); } - ix = (q>>1) + 0x3f000000; - SET_FLOAT_WORD(z, ix + ((uint32_t)m << 23)); - return z; + return y; } From dc455e93c70b8e6f988fddb3b73def2b209a2a8a Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Mon, 29 Jun 2020 17:14:42 +0000 Subject: [PATCH 072/189] math: add __math_invalidl for targets where long double is different from double. --- src/internal/libm.h | 3 +++ src/math/__math_invalidl.c | 9 +++++++++ 2 files changed, 12 insertions(+) create mode 100644 src/math/__math_invalidl.c diff --git a/src/internal/libm.h b/src/internal/libm.h index 7533f6bae..72ad17d8e 100644 --- a/src/internal/libm.h +++ b/src/internal/libm.h @@ -267,5 +267,8 @@ hidden double __math_uflow(uint32_t); hidden double __math_oflow(uint32_t); hidden double __math_divzero(uint32_t); hidden double __math_invalid(double); +#if LDBL_MANT_DIG != DBL_MANT_DIG +hidden long double __math_invalidl(long double); +#endif #endif diff --git a/src/math/__math_invalidl.c b/src/math/__math_invalidl.c new file mode 100644 index 000000000..1fca99de4 --- /dev/null +++ b/src/math/__math_invalidl.c @@ -0,0 +1,9 @@ +#include +#include "libm.h" + +#if LDBL_MANT_DIG != DBL_MANT_DIG +long double __math_invalidl(long double x) +{ + return (x - x) / (x - x); +} +#endif From b01ea2af01ffab0a5e3a7a01206f71ab084ad4da Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Sun, 14 Jun 2020 13:41:21 +0000 Subject: [PATCH 073/189] math: new software sqrtl same approach as in sqrt. sqrtl was broken on aarch64, riscv64 and s390x targets because of missing quad precision support and on m68k-sf because of missing ld80 sqrtl. this implementation is written for quad precision and then edited to make it work for both m68k and x86 style ld80 formats too, but it is not expected to be optimal for them. note: using fp instructions for the initial estimate when such instructions are available (e.g. double prec sqrt or rsqrt) is avoided because of fenv correctness. --- src/math/sqrtl.c | 254 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 253 insertions(+), 1 deletion(-) diff --git a/src/math/sqrtl.c b/src/math/sqrtl.c index 83a8f80c9..1b9f19c7d 100644 --- a/src/math/sqrtl.c +++ b/src/math/sqrtl.c @@ -1,7 +1,259 @@ +#include #include +#include +#include "libm.h" +#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 long double sqrtl(long double x) { - /* FIXME: implement in C, this is for LDBL_MANT_DIG == 64 only */ return sqrt(x); } +#elif (LDBL_MANT_DIG == 113 || LDBL_MANT_DIG == 64) && LDBL_MAX_EXP == 16384 +#include "sqrt_data.h" + +#define FENV_SUPPORT 1 + +typedef struct { + uint64_t hi; + uint64_t lo; +} u128; + +/* top: 16 bit sign+exponent, x: significand. */ +static inline long double mkldbl(uint64_t top, u128 x) +{ + union ldshape u; +#if LDBL_MANT_DIG == 113 + u.i2.hi = x.hi; + u.i2.lo = x.lo; + u.i2.hi &= 0x0000ffffffffffff; + u.i2.hi |= top << 48; +#elif LDBL_MANT_DIG == 64 + u.i.se = top; + u.i.m = x.lo; + /* force the top bit on non-zero (and non-subnormal) results. */ + if (top & 0x7fff) + u.i.m |= 0x8000000000000000; +#endif + return u.f; +} + +/* return: top 16 bit is sign+exp and following bits are the significand. */ +static inline u128 asu128(long double x) +{ + union ldshape u = {.f=x}; + u128 r; +#if LDBL_MANT_DIG == 113 + r.hi = u.i2.hi; + r.lo = u.i2.lo; +#elif LDBL_MANT_DIG == 64 + r.lo = u.i.m<<49; + /* ignore the top bit: pseudo numbers are not handled. */ + r.hi = u.i.m>>15; + r.hi &= 0x0000ffffffffffff; + r.hi |= (uint64_t)u.i.se << 48; +#endif + return r; +} + +/* returns a*b*2^-32 - e, with error 0 <= e < 1. */ +static inline uint32_t mul32(uint32_t a, uint32_t b) +{ + return (uint64_t)a*b >> 32; +} + +/* returns a*b*2^-64 - e, with error 0 <= e < 3. */ +static inline uint64_t mul64(uint64_t a, uint64_t b) +{ + uint64_t ahi = a>>32; + uint64_t alo = a&0xffffffff; + uint64_t bhi = b>>32; + uint64_t blo = b&0xffffffff; + return ahi*bhi + (ahi*blo >> 32) + (alo*bhi >> 32); +} + +static inline u128 add64(u128 a, uint64_t b) +{ + u128 r; + r.lo = a.lo + b; + r.hi = a.hi; + if (r.lo < a.lo) + r.hi++; + return r; +} + +static inline u128 add128(u128 a, u128 b) +{ + u128 r; + r.lo = a.lo + b.lo; + r.hi = a.hi + b.hi; + if (r.lo < a.lo) + r.hi++; + return r; +} + +static inline u128 sub64(u128 a, uint64_t b) +{ + u128 r; + r.lo = a.lo - b; + r.hi = a.hi; + if (a.lo < b) + r.hi--; + return r; +} + +static inline u128 sub128(u128 a, u128 b) +{ + u128 r; + r.lo = a.lo - b.lo; + r.hi = a.hi - b.hi; + if (a.lo < b.lo) + r.hi--; + return r; +} + +/* a<= 64) { + a.hi = a.lo<<(n-64); + a.lo = 0; + } else { + a.hi = (a.hi<>(64-n)); + a.lo = a.lo<>n, 0 <= n <= 127 */ +static inline u128 rsh(u128 a, int n) +{ + if (n == 0) + return a; + if (n >= 64) { + a.lo = a.hi>>(n-64); + a.hi = 0; + } else { + a.lo = (a.lo>>n) | (a.hi<<(64-n)); + a.hi = a.hi>>n; + } + return a; +} + +/* returns a*b exactly. */ +static inline u128 mul64_128(uint64_t a, uint64_t b) +{ + u128 r; + uint64_t ahi = a>>32; + uint64_t alo = a&0xffffffff; + uint64_t bhi = b>>32; + uint64_t blo = b&0xffffffff; + uint64_t lo1 = ((ahi*blo)&0xffffffff) + ((alo*bhi)&0xffffffff) + (alo*blo>>32); + uint64_t lo2 = (alo*blo)&0xffffffff; + r.hi = ahi*bhi + (ahi*blo>>32) + (alo*bhi>>32) + (lo1>>32); + r.lo = (lo1<<32) + lo2; + return r; +} + +/* returns a*b*2^-128 - e, with error 0 <= e < 7. */ +static inline u128 mul128(u128 a, u128 b) +{ + u128 hi = mul64_128(a.hi, b.hi); + uint64_t m1 = mul64(a.hi, b.lo); + uint64_t m2 = mul64(a.lo, b.hi); + return add64(add64(hi, m1), m2); +} + +/* returns a*b % 2^128. */ +static inline u128 mul128_tail(u128 a, u128 b) +{ + u128 lo = mul64_128(a.lo, b.lo); + lo.hi += a.hi*b.lo + a.lo*b.hi; + return lo; +} + + +/* see sqrt.c for detailed comments. */ + +long double sqrtl(long double x) +{ + u128 ix, ml; + uint64_t top; + + ix = asu128(x); + top = ix.hi >> 48; + if (predict_false(top - 0x0001 >= 0x7fff - 0x0001)) { + /* x < 0x1p-16382 or inf or nan. */ + if (2*ix.hi == 0 && ix.lo == 0) + return x; + if (ix.hi == 0x7fff000000000000 && ix.lo == 0) + return x; + if (top >= 0x7fff) + return __math_invalidl(x); + /* x is subnormal, normalize it. */ + ix = asu128(x * 0x1p112); + top = ix.hi >> 48; + top -= 112; + } + + /* x = 4^e m; with int e and m in [1, 4) */ + int even = top & 1; + ml = lsh(ix, 15); + ml.hi |= 0x8000000000000000; + if (even) ml = rsh(ml, 1); + top = (top + 0x3fff) >> 1; + + /* r ~ 1/sqrt(m) */ + static const uint64_t three = 0xc0000000; + uint64_t r, s, d, u, i; + i = (ix.hi >> 42) % 128; + r = (uint32_t)__rsqrt_tab[i] << 16; + /* |r sqrt(m) - 1| < 0x1p-8 */ + s = mul32(ml.hi>>32, r); + d = mul32(s, r); + u = three - d; + r = mul32(u, r) << 1; + /* |r sqrt(m) - 1| < 0x1.7bp-16, switch to 64bit */ + r = r<<32; + s = mul64(ml.hi, r); + d = mul64(s, r); + u = (three<<32) - d; + r = mul64(u, r) << 1; + /* |r sqrt(m) - 1| < 0x1.a5p-31 */ + s = mul64(u, s) << 1; + d = mul64(s, r); + u = (three<<32) - d; + r = mul64(u, r) << 1; + /* |r sqrt(m) - 1| < 0x1.c001p-59, switch to 128bit */ + + static const u128 threel = {.hi=three<<32, .lo=0}; + u128 rl, sl, dl, ul; + rl.hi = r; + rl.lo = 0; + sl = mul128(ml, rl); + dl = mul128(sl, rl); + ul = sub128(threel, dl); + sl = mul128(ul, sl); /* repr: 3.125 */ + /* -0x1p-116 < s - sqrt(m) < 0x3.8001p-125 */ + sl = rsh(sub64(sl, 4), 125-(LDBL_MANT_DIG-1)); + /* s < sqrt(m) < s + 1 ULP + tiny */ + + long double y; + u128 d2, d1, d0; + d0 = sub128(lsh(ml, 2*(LDBL_MANT_DIG-1)-126), mul128_tail(sl,sl)); + d1 = sub128(sl, d0); + d2 = add128(add64(sl, 1), d1); + sl = add64(sl, d1.hi >> 63); + y = mkldbl(top, sl); + if (FENV_SUPPORT) { + /* handle rounding modes and inexact exception. */ + top = predict_false((d2.hi|d2.lo)==0) ? 0 : 1; + top |= ((d1.hi^d2.hi)&0x8000000000000000) >> 48; + y += mkldbl(top, (u128){0}); + } + return y; +} +#else +#error unsupported long double format +#endif From 1574627bd3bf573e86273d107338d8e674cba496 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Sat, 8 Aug 2020 20:59:26 -0400 Subject: [PATCH 074/189] prefer new socket syscalls, fallback to SYS_socketcall only if needed a number of users performing seccomp filtering have requested use of the new individual syscall numbers for socket syscalls, rather than the legacy multiplexed socketcall, since the latter has the arguments all in memory where they can't participate in filter decisions. previously, some archs used the multiplexed socketcall if it was historically all that was available, while other archs used the separate syscalls. the intent was that the latter set only include archs that have "always" had separate socket syscalls, at least going back to linux 2.6.0. however, at least powerpc, powerpc64, and sh were wrongly included in this set, and thus socket operations completely failed on old kernels for these archs. with the changes made here, the separate syscalls are always preferred, but fallback code is compiled for archs that also define SYS_socketcall. two such archs, mips (plain o32) and microblaze, define SYS_socketcall despite never having needed it, so it's now undefined by their versions of syscall_arch.h to prevent inclusion of useless fallback code. some archs, where the separate syscalls were only added after the addition of SYS_accept4, lack SYS_accept. because socket calls are always made with zeros in the unused argument positions, it suffices to just use SYS_accept4 to provide a definition of SYS_accept, and this is done to make happy the macro machinery that concatenates the socket call name onto __SC_ and SYS_. --- arch/i386/syscall_arch.h | 2 -- arch/m68k/syscall_arch.h | 1 - arch/microblaze/syscall_arch.h | 2 ++ arch/mips/syscall_arch.h | 2 ++ arch/s390x/syscall_arch.h | 2 -- src/internal/syscall.h | 32 +++++++++++++++++++++++--------- 6 files changed, 27 insertions(+), 14 deletions(-) diff --git a/arch/i386/syscall_arch.h b/arch/i386/syscall_arch.h index 69642e578..f92b7aa9f 100644 --- a/arch/i386/syscall_arch.h +++ b/arch/i386/syscall_arch.h @@ -87,5 +87,3 @@ static inline long __syscall6(long n, long a1, long a2, long a3, long a4, long a #define VDSO_CGT32_VER "LINUX_2.6" #define VDSO_CGT_SYM "__vdso_clock_gettime64" #define VDSO_CGT_VER "LINUX_2.6" - -#define SYSCALL_USE_SOCKETCALL diff --git a/arch/m68k/syscall_arch.h b/arch/m68k/syscall_arch.h index af79c3069..6a9d0ae8e 100644 --- a/arch/m68k/syscall_arch.h +++ b/arch/m68k/syscall_arch.h @@ -87,5 +87,4 @@ static inline long __syscall6(long n, long a, long b, long c, long d, long e, lo return d0; } -#define SYSCALL_USE_SOCKETCALL #define SYSCALL_IPC_BROKEN_MODE diff --git a/arch/microblaze/syscall_arch.h b/arch/microblaze/syscall_arch.h index 169013f80..61d8248e8 100644 --- a/arch/microblaze/syscall_arch.h +++ b/arch/microblaze/syscall_arch.h @@ -95,3 +95,5 @@ static inline long __syscall6(long n, long a, long b, long c, long d, long e, lo } #define SYSCALL_IPC_BROKEN_MODE + +#undef SYS_socketcall diff --git a/arch/mips/syscall_arch.h b/arch/mips/syscall_arch.h index 380a94b36..5b7c38de2 100644 --- a/arch/mips/syscall_arch.h +++ b/arch/mips/syscall_arch.h @@ -149,3 +149,5 @@ static inline long __syscall7(long n, long a, long b, long c, long d, long e, lo #define SO_SNDTIMEO_OLD 0x1005 #define SO_RCVTIMEO_OLD 0x1006 + +#undef SYS_socketcall diff --git a/arch/s390x/syscall_arch.h b/arch/s390x/syscall_arch.h index afb99852e..83cc9a27c 100644 --- a/arch/s390x/syscall_arch.h +++ b/arch/s390x/syscall_arch.h @@ -72,5 +72,3 @@ static inline long __syscall6(long n, long a, long b, long c, long d, long e, lo register long r7 __asm__("r7") = f; __asm_syscall("+r"(r2), "r"(r1), "r"(r3), "r"(r4), "r"(r5), "r"(r6), "r"(r7)); } - -#define SYSCALL_USE_SOCKETCALL diff --git a/src/internal/syscall.h b/src/internal/syscall.h index 975a0031d..d5f294d43 100644 --- a/src/internal/syscall.h +++ b/src/internal/syscall.h @@ -2,6 +2,7 @@ #define _INTERNAL_SYSCALL_H #include +#include #include #include "syscall_arch.h" @@ -57,15 +58,22 @@ hidden long __syscall_ret(unsigned long), #define __syscall_cp(...) __SYSCALL_DISP(__syscall_cp,__VA_ARGS__) #define syscall_cp(...) __syscall_ret(__syscall_cp(__VA_ARGS__)) -#ifndef SYSCALL_USE_SOCKETCALL -#define __socketcall(nm,a,b,c,d,e,f) __syscall(SYS_##nm, a, b, c, d, e, f) -#define __socketcall_cp(nm,a,b,c,d,e,f) __syscall_cp(SYS_##nm, a, b, c, d, e, f) -#else -#define __socketcall(nm,a,b,c,d,e,f) __syscall(SYS_socketcall, __SC_##nm, \ - ((long [6]){ (long)a, (long)b, (long)c, (long)d, (long)e, (long)f })) -#define __socketcall_cp(nm,a,b,c,d,e,f) __syscall_cp(SYS_socketcall, __SC_##nm, \ - ((long [6]){ (long)a, (long)b, (long)c, (long)d, (long)e, (long)f })) -#endif +static inline long __alt_socketcall(int sys, int sock, int cp, long a, long b, long c, long d, long e, long f) +{ + long r; + if (cp) r = __syscall_cp(sys, a, b, c, d, e, f); + else r = __syscall(sys, a, b, c, d, e, f); + if (r != -ENOSYS) return r; +#ifdef SYS_socketcall + if (cp) r = __syscall_cp(SYS_socketcall, sock, ((long[6]){a, b, c, d, e, f})); + else r = __syscall(SYS_socketcall, sock, ((long[6]){a, b, c, d, e, f})); +#endif + return r; +} +#define __socketcall(nm, a, b, c, d, e, f) __alt_socketcall(SYS_##nm, __SC_##nm, 0, \ + (long)(a), (long)(b), (long)(c), (long)(d), (long)(e), (long)(f)) +#define __socketcall_cp(nm, a, b, c, d, e, f) __alt_socketcall(SYS_##nm, __SC_##nm, 1, \ + (long)(a), (long)(b), (long)(c), (long)(d), (long)(e), (long)(f)) /* fixup legacy 16-bit junk */ @@ -338,6 +346,12 @@ hidden long __syscall_ret(unsigned long), #define __SC_recvmmsg 19 #define __SC_sendmmsg 20 +/* This is valid only because all socket syscalls are made via + * socketcall, which always fills unused argument slots with zeros. */ +#ifndef SYS_accept +#define SYS_accept SYS_accept4 +#endif + #ifndef SO_RCVTIMEO_OLD #define SO_RCVTIMEO_OLD 20 #endif From d167ed3943901120a4155f44b2494dfe3af2fbef Mon Sep 17 00:00:00 2001 From: Alexander Monakov Date: Tue, 11 Aug 2020 21:11:14 +0300 Subject: [PATCH 075/189] setjmp: fix x86-64 longjmp argument adjustment longjmp 'val' argument is an int, but the assembly is referencing 64-bit registers as if the argument was a long, or the caller was responsible for extending the argument. Though the psABI is not clear on this, the interpretation in GCC is that high bits may be arbitrary and the callee is responsible for sign/zero-extending the value as needed (likewise for return values: callers must anticipate that high bits may be garbage). Therefore testing %rax is a functional bug: setjmp would wrongly return zero if longjmp was called with val==0, but high bits of %rsi happened to be non-zero. Rewrite the prologue to refer to 32-bit registers. In passing, change 'test' to use %rsi, as there's no advantage to using %rax and the new form is cheaper on processors that do not perform move elimination. --- src/setjmp/x32/longjmp.s | 6 +++--- src/setjmp/x86_64/longjmp.s | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/setjmp/x32/longjmp.s b/src/setjmp/x32/longjmp.s index e175a4b96..e709acad9 100644 --- a/src/setjmp/x32/longjmp.s +++ b/src/setjmp/x32/longjmp.s @@ -5,10 +5,10 @@ .type longjmp,@function _longjmp: longjmp: - mov %rsi,%rax /* val will be longjmp return */ - test %rax,%rax + mov %esi,%eax /* val will be longjmp return */ + test %esi,%esi jnz 1f - inc %rax /* if val==0, val=1 per longjmp semantics */ + inc %eax /* if val==0, val=1 per longjmp semantics */ 1: mov (%rdi),%rbx /* rdi is the jmp_buf, restore regs from it */ mov 8(%rdi),%rbp diff --git a/src/setjmp/x86_64/longjmp.s b/src/setjmp/x86_64/longjmp.s index e175a4b96..e709acad9 100644 --- a/src/setjmp/x86_64/longjmp.s +++ b/src/setjmp/x86_64/longjmp.s @@ -5,10 +5,10 @@ .type longjmp,@function _longjmp: longjmp: - mov %rsi,%rax /* val will be longjmp return */ - test %rax,%rax + mov %esi,%eax /* val will be longjmp return */ + test %esi,%esi jnz 1f - inc %rax /* if val==0, val=1 per longjmp semantics */ + inc %eax /* if val==0, val=1 per longjmp semantics */ 1: mov (%rdi),%rbx /* rdi is the jmp_buf, restore regs from it */ mov 8(%rdi),%rbp From 694e3ca481e1922c8dc362c628a200c5b4832507 Mon Sep 17 00:00:00 2001 From: Alexander Monakov Date: Tue, 11 Aug 2020 21:11:15 +0300 Subject: [PATCH 076/189] setjmp: avoid useless REX-prefix on xor %eax, %eax --- src/setjmp/x32/setjmp.s | 2 +- src/setjmp/x86_64/setjmp.s | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/setjmp/x32/setjmp.s b/src/setjmp/x32/setjmp.s index 98f58b8d6..d95e48535 100644 --- a/src/setjmp/x32/setjmp.s +++ b/src/setjmp/x32/setjmp.s @@ -18,5 +18,5 @@ setjmp: mov %rdx,48(%rdi) mov (%rsp),%rdx /* save return addr ptr for new rip */ mov %rdx,56(%rdi) - xor %rax,%rax /* always return 0 */ + xor %eax,%eax /* always return 0 */ ret diff --git a/src/setjmp/x86_64/setjmp.s b/src/setjmp/x86_64/setjmp.s index 98f58b8d6..d95e48535 100644 --- a/src/setjmp/x86_64/setjmp.s +++ b/src/setjmp/x86_64/setjmp.s @@ -18,5 +18,5 @@ setjmp: mov %rdx,48(%rdi) mov (%rsp),%rdx /* save return addr ptr for new rip */ mov %rdx,56(%rdi) - xor %rax,%rax /* always return 0 */ + xor %eax,%eax /* always return 0 */ ret From 94875e1c98f715d13932d6eec333c94b3a094423 Mon Sep 17 00:00:00 2001 From: Alexander Monakov Date: Tue, 11 Aug 2020 21:11:16 +0300 Subject: [PATCH 077/189] setjmp: optimize x86 longjmp epilogues --- src/setjmp/i386/longjmp.s | 6 ++---- src/setjmp/x32/longjmp.s | 6 ++---- src/setjmp/x86_64/longjmp.s | 6 ++---- 3 files changed, 6 insertions(+), 12 deletions(-) diff --git a/src/setjmp/i386/longjmp.s b/src/setjmp/i386/longjmp.s index 772d28ddb..b429f1356 100644 --- a/src/setjmp/i386/longjmp.s +++ b/src/setjmp/i386/longjmp.s @@ -14,7 +14,5 @@ longjmp: mov 4(%edx),%esi mov 8(%edx),%edi mov 12(%edx),%ebp - mov 16(%edx),%ecx - mov %ecx,%esp - mov 20(%edx),%ecx - jmp *%ecx + mov 16(%edx),%esp + jmp *20(%edx) diff --git a/src/setjmp/x32/longjmp.s b/src/setjmp/x32/longjmp.s index e709acad9..bb88afa16 100644 --- a/src/setjmp/x32/longjmp.s +++ b/src/setjmp/x32/longjmp.s @@ -16,7 +16,5 @@ longjmp: mov 24(%rdi),%r13 mov 32(%rdi),%r14 mov 40(%rdi),%r15 - mov 48(%rdi),%rdx /* this ends up being the stack pointer */ - mov %rdx,%rsp - mov 56(%rdi),%rdx /* this is the instruction pointer */ - jmp *%rdx /* goto saved address without altering rsp */ + mov 48(%rdi),%rsp + jmp *56(%rdi) /* goto saved address without altering rsp */ diff --git a/src/setjmp/x86_64/longjmp.s b/src/setjmp/x86_64/longjmp.s index e709acad9..bb88afa16 100644 --- a/src/setjmp/x86_64/longjmp.s +++ b/src/setjmp/x86_64/longjmp.s @@ -16,7 +16,5 @@ longjmp: mov 24(%rdi),%r13 mov 32(%rdi),%r14 mov 40(%rdi),%r15 - mov 48(%rdi),%rdx /* this ends up being the stack pointer */ - mov %rdx,%rsp - mov 56(%rdi),%rdx /* this is the instruction pointer */ - jmp *%rdx /* goto saved address without altering rsp */ + mov 48(%rdi),%rsp + jmp *56(%rdi) /* goto saved address without altering rsp */ From 65725d7e50c2865e33438b14b4480988bc7232d2 Mon Sep 17 00:00:00 2001 From: Alexander Monakov Date: Wed, 12 Aug 2020 14:34:30 +0300 Subject: [PATCH 078/189] setjmp: optimize longjmp prologues Use a branchless sequence that is one byte shorter on 64-bit, same size on 32-bit. Thanks to Pete Cawley for suggesting this variant. --- src/setjmp/i386/longjmp.s | 6 ++---- src/setjmp/x32/longjmp.s | 8 +++----- src/setjmp/x86_64/longjmp.s | 8 +++----- 3 files changed, 8 insertions(+), 14 deletions(-) diff --git a/src/setjmp/i386/longjmp.s b/src/setjmp/i386/longjmp.s index b429f1356..8188f06bc 100644 --- a/src/setjmp/i386/longjmp.s +++ b/src/setjmp/i386/longjmp.s @@ -6,10 +6,8 @@ _longjmp: longjmp: mov 4(%esp),%edx mov 8(%esp),%eax - test %eax,%eax - jnz 1f - inc %eax -1: + cmp $1,%eax + adc $0, %al mov (%edx),%ebx mov 4(%edx),%esi mov 8(%edx),%edi diff --git a/src/setjmp/x32/longjmp.s b/src/setjmp/x32/longjmp.s index bb88afa16..1b2661c3e 100644 --- a/src/setjmp/x32/longjmp.s +++ b/src/setjmp/x32/longjmp.s @@ -5,11 +5,9 @@ .type longjmp,@function _longjmp: longjmp: - mov %esi,%eax /* val will be longjmp return */ - test %esi,%esi - jnz 1f - inc %eax /* if val==0, val=1 per longjmp semantics */ -1: + xor %eax,%eax + cmp $1,%esi /* CF = val ? 0 : 1 */ + adc %esi,%eax /* eax = val + !val */ mov (%rdi),%rbx /* rdi is the jmp_buf, restore regs from it */ mov 8(%rdi),%rbp mov 16(%rdi),%r12 diff --git a/src/setjmp/x86_64/longjmp.s b/src/setjmp/x86_64/longjmp.s index bb88afa16..1b2661c3e 100644 --- a/src/setjmp/x86_64/longjmp.s +++ b/src/setjmp/x86_64/longjmp.s @@ -5,11 +5,9 @@ .type longjmp,@function _longjmp: longjmp: - mov %esi,%eax /* val will be longjmp return */ - test %esi,%esi - jnz 1f - inc %eax /* if val==0, val=1 per longjmp semantics */ -1: + xor %eax,%eax + cmp $1,%esi /* CF = val ? 0 : 1 */ + adc %esi,%eax /* eax = val + !val */ mov (%rdi),%rbx /* rdi is the jmp_buf, restore regs from it */ mov 8(%rdi),%rbp mov 16(%rdi),%r12 From c51d3165fead23d5c3820d5840fed3b3cf8a1b3e Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Wed, 12 Aug 2020 21:00:26 +0000 Subject: [PATCH 079/189] aarch64: fix setjmp return value longjmp should set the return value of setjmp, but 64bit registers were used for the 0 check while the type is int. use the code that gcc generates for return val ? val : 1; --- src/setjmp/aarch64/longjmp.s | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/setjmp/aarch64/longjmp.s b/src/setjmp/aarch64/longjmp.s index 7c4655fa9..0af9c50ee 100644 --- a/src/setjmp/aarch64/longjmp.s +++ b/src/setjmp/aarch64/longjmp.s @@ -18,7 +18,6 @@ longjmp: ldp d12, d13, [x0,#144] ldp d14, d15, [x0,#160] - mov x0, x1 - cbnz x1, 1f - mov x0, #1 -1: br x30 + cmp w1, 0 + csinc w0, w1, wzr, ne + br x30 From 70f05c5b1775386c28540b40c9c371ed432585a8 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Mon, 17 Aug 2020 20:12:53 -0400 Subject: [PATCH 080/189] add gettid function this is a prerequisite for addition of other interfaces that use kernel tids, including futex and SIGEV_THREAD_ID. there is some ambiguity as to whether the semantic return type should be int or pid_t. either way, futex API imposes a contract that the values fit in int (excluding some upper reserved bits). glibc used pid_t, so in the interest of not having gratuitous mismatch (the underlying types are the same anyway), pid_t is used here as well. while conceptually this is a syscall, the copy stored in the thread structure is always valid in all contexts where it's valid to call libc functions, so it's used to avoid the syscall. --- include/unistd.h | 1 + src/linux/gettid.c | 8 ++++++++ 2 files changed, 9 insertions(+) create mode 100644 src/linux/gettid.c diff --git a/include/unistd.h b/include/unistd.h index 7bcbff943..07584a23e 100644 --- a/include/unistd.h +++ b/include/unistd.h @@ -190,6 +190,7 @@ int syncfs(int); int euidaccess(const char *, int); int eaccess(const char *, int); ssize_t copy_file_range(int, off_t *, int, off_t *, size_t, unsigned); +pid_t gettid(void); #endif #if defined(_LARGEFILE64_SOURCE) || defined(_GNU_SOURCE) diff --git a/src/linux/gettid.c b/src/linux/gettid.c new file mode 100644 index 000000000..70767137e --- /dev/null +++ b/src/linux/gettid.c @@ -0,0 +1,8 @@ +#define _GNU_SOURCE +#include +#include "pthread_impl.h" + +pid_t gettid(void) +{ + return __pthread_self()->tid; +} From fefe3aa11a0ea6749b5b8a3b2e0bd9b490b20376 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Sat, 22 Aug 2020 13:51:32 -0400 Subject: [PATCH 081/189] fix MUSL_LOCPATH search all path elements but the last had the final byte truncated. --- src/locale/locale_map.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/locale/locale_map.c b/src/locale/locale_map.c index 2321bac0e..e7eede629 100644 --- a/src/locale/locale_map.c +++ b/src/locale/locale_map.c @@ -67,7 +67,7 @@ const struct __locale_map *__get_locale(int cat, const char *val) if (path) for (; *path; path=z+!!*z) { z = __strchrnul(path, ':'); - l = z - path - !!*z; + l = z - path; if (l >= sizeof buf - n - 2) continue; memcpy(buf, path, l); buf[l] = '/'; From 24694b3c5e54bc832ac1909e16103e49eba5359e Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Mon, 24 Aug 2020 12:29:30 -0400 Subject: [PATCH 082/189] add tcgetwinsize and tcsetwinsize functions, move struct winsize these have been adopted for future issue of POSIX as the outcome of Austin Group issue 1151, and are simply functions performing the roles of the historical ioctls. since struct winsize is being standardized along with them, its definition is moved to the appropriate header. there is some chance this will break source files that expect struct winsize to be defined by sys/ioctl.h without including termios.h. if this happens, further changes will be needed to have sys/ioctl.h expose it too. --- include/sys/ioctl.h | 7 ------- include/termios.h | 10 ++++++++++ src/stdio/__fdopen.c | 1 + src/stdio/__stdout_write.c | 1 + src/termios/tcgetwinsize.c | 8 ++++++++ src/termios/tcsetwinsize.c | 8 ++++++++ src/unistd/isatty.c | 1 + 7 files changed, 29 insertions(+), 7 deletions(-) create mode 100644 src/termios/tcgetwinsize.c create mode 100644 src/termios/tcsetwinsize.c diff --git a/include/sys/ioctl.h b/include/sys/ioctl.h index c2ce3b484..d6a7d474e 100644 --- a/include/sys/ioctl.h +++ b/include/sys/ioctl.h @@ -47,13 +47,6 @@ extern "C" { #define TIOCSER_TEMT 1 -struct winsize { - unsigned short ws_row; - unsigned short ws_col; - unsigned short ws_xpixel; - unsigned short ws_ypixel; -}; - #define SIOCADDRT 0x890B #define SIOCDELRT 0x890C #define SIOCRTMSG 0x890D diff --git a/include/termios.h b/include/termios.h index d73c780d4..793cfc947 100644 --- a/include/termios.h +++ b/include/termios.h @@ -15,6 +15,13 @@ typedef unsigned char cc_t; typedef unsigned int speed_t; typedef unsigned int tcflag_t; +struct winsize { + unsigned short ws_row; + unsigned short ws_col; + unsigned short ws_xpixel; + unsigned short ws_ypixel; +}; + #define NCCS 32 #include @@ -27,6 +34,9 @@ int cfsetispeed (struct termios *, speed_t); int tcgetattr (int, struct termios *); int tcsetattr (int, int, const struct termios *); +int tcgetwinsize (int, struct winsize *); +int tcsetwinsize (int, const struct winsize *); + int tcsendbreak (int, int); int tcdrain (int); int tcflush (int, int); diff --git a/src/stdio/__fdopen.c b/src/stdio/__fdopen.c index 116e78e56..616f4f998 100644 --- a/src/stdio/__fdopen.c +++ b/src/stdio/__fdopen.c @@ -1,6 +1,7 @@ #include "stdio_impl.h" #include #include +#include #include #include #include diff --git a/src/stdio/__stdout_write.c b/src/stdio/__stdout_write.c index dd1ec60ff..5b413c797 100644 --- a/src/stdio/__stdout_write.c +++ b/src/stdio/__stdout_write.c @@ -1,5 +1,6 @@ #include "stdio_impl.h" #include +#include size_t __stdout_write(FILE *f, const unsigned char *buf, size_t len) { diff --git a/src/termios/tcgetwinsize.c b/src/termios/tcgetwinsize.c new file mode 100644 index 000000000..9b3a65a40 --- /dev/null +++ b/src/termios/tcgetwinsize.c @@ -0,0 +1,8 @@ +#include +#include +#include "syscall.h" + +int tcgetwinsize(int fd, struct winsize *wsz) +{ + return syscall(SYS_ioctl, fd, TIOCGWINSZ, wsz); +} diff --git a/src/termios/tcsetwinsize.c b/src/termios/tcsetwinsize.c new file mode 100644 index 000000000..e01d0e254 --- /dev/null +++ b/src/termios/tcsetwinsize.c @@ -0,0 +1,8 @@ +#include +#include +#include "syscall.h" + +int tcsetwinsize(int fd, const struct winsize *wsz) +{ + return syscall(SYS_ioctl, fd, TIOCSWINSZ, wsz); +} diff --git a/src/unistd/isatty.c b/src/unistd/isatty.c index 75a9c186a..bc220c003 100644 --- a/src/unistd/isatty.c +++ b/src/unistd/isatty.c @@ -1,6 +1,7 @@ #include #include #include +#include #include "syscall.h" int isatty(int fd) From 4180c2e77bc0bfbf36dccb1a3d99b7c568e2874a Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Mon, 24 Aug 2020 21:38:49 -0400 Subject: [PATCH 083/189] make h_errno thread-local the framework to do this always existed but it was deemed unnecessary because the only [ex-]standard functions using h_errno were not thread-safe anyway. however, some of the nonstandard res_* functions are also supposed to set h_errno to indicate the cause of error, and were unable to do so because it was not thread-safe. this change is a prerequisite for fixing them. --- src/internal/pthread_impl.h | 1 + src/network/h_errno.c | 6 ++---- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/internal/pthread_impl.h b/src/internal/pthread_impl.h index 5742dfc55..5749a3362 100644 --- a/src/internal/pthread_impl.h +++ b/src/internal/pthread_impl.h @@ -43,6 +43,7 @@ struct pthread { long off; volatile void *volatile pending; } robust_list; + int h_errno_val; volatile int timer_id; locale_t locale; volatile int killlock[1]; diff --git a/src/network/h_errno.c b/src/network/h_errno.c index 4f700ceaf..8677a92b7 100644 --- a/src/network/h_errno.c +++ b/src/network/h_errno.c @@ -1,9 +1,7 @@ #include - -#undef h_errno -int h_errno; +#include "pthread_impl.h" int *__h_errno_location(void) { - return &h_errno; + return &__pthread_self()->h_errno_val; } From 77c912963deb7f703f8c257b6bddd9ae8d977442 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Mon, 24 Aug 2020 21:56:48 -0400 Subject: [PATCH 084/189] report res_query failures, including nxdomain/nodata, via h_errno while it's not clearly documented anywhere, this is the historical behavior which some applications expect. applications which need to see the response packet in these cases, for example to distinguish between nonexistence in a secure vs insecure zone, must already use res_mkquery with res_send in order to be portable, since most if not all other implementations of res_query don't provide it. --- src/network/res_query.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/network/res_query.c b/src/network/res_query.c index 2f4da2e2e..506dc2312 100644 --- a/src/network/res_query.c +++ b/src/network/res_query.c @@ -1,3 +1,4 @@ +#define _BSD_SOURCE #include #include @@ -6,7 +7,20 @@ int res_query(const char *name, int class, int type, unsigned char *dest, int le unsigned char q[280]; int ql = __res_mkquery(0, name, class, type, 0, 0, 0, q, sizeof q); if (ql < 0) return ql; - return __res_send(q, ql, dest, len); + int r = __res_send(q, ql, dest, len); + if (r<12) { + h_errno = TRY_AGAIN; + return -1; + } + if ((dest[3] & 15) == 3) { + h_errno = HOST_NOT_FOUND; + return -1; + } + if ((dest[3] & 15) == 0 && !dest[6] && !dest[7]) { + h_errno = NO_DATA; + return -1; + } + return r; } weak_alias(res_query, res_search); From 90f13b86e74c2303a6d1590054973b79f1e3617e Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Mon, 24 Aug 2020 22:04:52 -0400 Subject: [PATCH 085/189] deduplicate TP_ADJ logic out of each arch, replace with TP_OFFSET the only part of TP_ADJ that was not uniquely determined by TLS_ABOVE_TP was the 0x7000 adjustment used mainly on mips and powerpc variants. --- arch/aarch64/pthread_arch.h | 1 - arch/arm/pthread_arch.h | 1 - arch/i386/pthread_arch.h | 2 -- arch/m68k/pthread_arch.h | 2 +- arch/microblaze/pthread_arch.h | 2 -- arch/mips/pthread_arch.h | 2 +- arch/mips64/pthread_arch.h | 2 +- arch/mipsn32/pthread_arch.h | 2 +- arch/or1k/pthread_arch.h | 1 - arch/powerpc/pthread_arch.h | 2 +- arch/powerpc64/pthread_arch.h | 2 +- arch/riscv64/pthread_arch.h | 1 - arch/s390x/pthread_arch.h | 2 -- arch/sh/pthread_arch.h | 1 - arch/x32/pthread_arch.h | 2 -- arch/x86_64/pthread_arch.h | 2 -- src/internal/pthread_impl.h | 10 ++++++++++ 17 files changed, 16 insertions(+), 21 deletions(-) diff --git a/arch/aarch64/pthread_arch.h b/arch/aarch64/pthread_arch.h index e64b126d2..f3c005c7d 100644 --- a/arch/aarch64/pthread_arch.h +++ b/arch/aarch64/pthread_arch.h @@ -7,6 +7,5 @@ static inline struct pthread *__pthread_self() #define TLS_ABOVE_TP #define GAP_ABOVE_TP 16 -#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread)) #define MC_PC pc diff --git a/arch/arm/pthread_arch.h b/arch/arm/pthread_arch.h index e689ea212..486409854 100644 --- a/arch/arm/pthread_arch.h +++ b/arch/arm/pthread_arch.h @@ -28,6 +28,5 @@ static inline pthread_t __pthread_self() #define TLS_ABOVE_TP #define GAP_ABOVE_TP 8 -#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread)) #define MC_PC arm_pc diff --git a/arch/i386/pthread_arch.h b/arch/i386/pthread_arch.h index 6f600b9e0..32570a171 100644 --- a/arch/i386/pthread_arch.h +++ b/arch/i386/pthread_arch.h @@ -5,6 +5,4 @@ static inline struct pthread *__pthread_self() return self; } -#define TP_ADJ(p) (p) - #define MC_PC gregs[REG_EIP] diff --git a/arch/m68k/pthread_arch.h b/arch/m68k/pthread_arch.h index 02d5b8a08..7c9990c28 100644 --- a/arch/m68k/pthread_arch.h +++ b/arch/m68k/pthread_arch.h @@ -6,8 +6,8 @@ static inline struct pthread *__pthread_self() #define TLS_ABOVE_TP #define GAP_ABOVE_TP 0 -#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) + 0x7000) +#define TP_OFFSET 0x7000 #define DTP_OFFSET 0x8000 #define MC_PC gregs[R_PC] diff --git a/arch/microblaze/pthread_arch.h b/arch/microblaze/pthread_arch.h index f6ba8de98..c327f4eb5 100644 --- a/arch/microblaze/pthread_arch.h +++ b/arch/microblaze/pthread_arch.h @@ -5,6 +5,4 @@ static inline struct pthread *__pthread_self() return self; } -#define TP_ADJ(p) (p) - #define MC_PC regs.pc diff --git a/arch/mips/pthread_arch.h b/arch/mips/pthread_arch.h index 1e7839ea6..c22eb34dc 100644 --- a/arch/mips/pthread_arch.h +++ b/arch/mips/pthread_arch.h @@ -12,8 +12,8 @@ static inline struct pthread *__pthread_self() #define TLS_ABOVE_TP #define GAP_ABOVE_TP 0 -#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) + 0x7000) +#define TP_OFFSET 0x7000 #define DTP_OFFSET 0x8000 #define MC_PC pc diff --git a/arch/mips64/pthread_arch.h b/arch/mips64/pthread_arch.h index 1e7839ea6..c22eb34dc 100644 --- a/arch/mips64/pthread_arch.h +++ b/arch/mips64/pthread_arch.h @@ -12,8 +12,8 @@ static inline struct pthread *__pthread_self() #define TLS_ABOVE_TP #define GAP_ABOVE_TP 0 -#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) + 0x7000) +#define TP_OFFSET 0x7000 #define DTP_OFFSET 0x8000 #define MC_PC pc diff --git a/arch/mipsn32/pthread_arch.h b/arch/mipsn32/pthread_arch.h index 1e7839ea6..c22eb34dc 100644 --- a/arch/mipsn32/pthread_arch.h +++ b/arch/mipsn32/pthread_arch.h @@ -12,8 +12,8 @@ static inline struct pthread *__pthread_self() #define TLS_ABOVE_TP #define GAP_ABOVE_TP 0 -#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) + 0x7000) +#define TP_OFFSET 0x7000 #define DTP_OFFSET 0x8000 #define MC_PC pc diff --git a/arch/or1k/pthread_arch.h b/arch/or1k/pthread_arch.h index 1b806f891..76d0a8bc1 100644 --- a/arch/or1k/pthread_arch.h +++ b/arch/or1k/pthread_arch.h @@ -13,6 +13,5 @@ static inline struct pthread *__pthread_self() #define TLS_ABOVE_TP #define GAP_ABOVE_TP 0 -#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread)) #define MC_PC regs.pc diff --git a/arch/powerpc/pthread_arch.h b/arch/powerpc/pthread_arch.h index ae0f28d6d..9697046bb 100644 --- a/arch/powerpc/pthread_arch.h +++ b/arch/powerpc/pthread_arch.h @@ -7,8 +7,8 @@ static inline struct pthread *__pthread_self() #define TLS_ABOVE_TP #define GAP_ABOVE_TP 0 -#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) + 0x7000) +#define TP_OFFSET 0x7000 #define DTP_OFFSET 0x8000 // the kernel calls the ip "nip", it's the first saved value after the 32 diff --git a/arch/powerpc64/pthread_arch.h b/arch/powerpc64/pthread_arch.h index 79c3ecd8a..e9dba43f5 100644 --- a/arch/powerpc64/pthread_arch.h +++ b/arch/powerpc64/pthread_arch.h @@ -7,8 +7,8 @@ static inline struct pthread *__pthread_self() #define TLS_ABOVE_TP #define GAP_ABOVE_TP 0 -#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) + 0x7000) +#define TP_OFFSET 0x7000 #define DTP_OFFSET 0x8000 // the kernel calls the ip "nip", it's the first saved value after the 32 diff --git a/arch/riscv64/pthread_arch.h b/arch/riscv64/pthread_arch.h index db414b170..50f0868d8 100644 --- a/arch/riscv64/pthread_arch.h +++ b/arch/riscv64/pthread_arch.h @@ -7,7 +7,6 @@ static inline struct pthread *__pthread_self() #define TLS_ABOVE_TP #define GAP_ABOVE_TP 0 -#define TP_ADJ(p) ((char *)p + sizeof(struct pthread)) #define DTP_OFFSET 0x800 diff --git a/arch/s390x/pthread_arch.h b/arch/s390x/pthread_arch.h index e2251f1fa..5d22546bb 100644 --- a/arch/s390x/pthread_arch.h +++ b/arch/s390x/pthread_arch.h @@ -9,6 +9,4 @@ static inline struct pthread *__pthread_self() return self; } -#define TP_ADJ(p) (p) - #define MC_PC psw.addr diff --git a/arch/sh/pthread_arch.h b/arch/sh/pthread_arch.h index 3ee9c1a93..c22529082 100644 --- a/arch/sh/pthread_arch.h +++ b/arch/sh/pthread_arch.h @@ -7,7 +7,6 @@ static inline struct pthread *__pthread_self() #define TLS_ABOVE_TP #define GAP_ABOVE_TP 8 -#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread)) #define MC_PC sc_pc diff --git a/arch/x32/pthread_arch.h b/arch/x32/pthread_arch.h index f640a1a15..fa4528391 100644 --- a/arch/x32/pthread_arch.h +++ b/arch/x32/pthread_arch.h @@ -5,8 +5,6 @@ static inline struct pthread *__pthread_self() return self; } -#define TP_ADJ(p) (p) - #define MC_PC gregs[REG_RIP] #define CANARY canary2 diff --git a/arch/x86_64/pthread_arch.h b/arch/x86_64/pthread_arch.h index 65e880c62..1c64a8408 100644 --- a/arch/x86_64/pthread_arch.h +++ b/arch/x86_64/pthread_arch.h @@ -5,6 +5,4 @@ static inline struct pthread *__pthread_self() return self; } -#define TP_ADJ(p) (p) - #define MC_PC gregs[REG_RIP] diff --git a/src/internal/pthread_impl.h b/src/internal/pthread_impl.h index 5749a3362..3c2bd7670 100644 --- a/src/internal/pthread_impl.h +++ b/src/internal/pthread_impl.h @@ -105,10 +105,20 @@ struct __timer { #define CANARY canary #endif +#ifndef TP_OFFSET +#define TP_OFFSET 0 +#endif + #ifndef DTP_OFFSET #define DTP_OFFSET 0 #endif +#ifdef TLS_ABOVE_TP +#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) + TP_OFFSET) +#else +#define TP_ADJ(p) (p) +#endif + #ifndef tls_mod_off_t #define tls_mod_off_t size_t #endif From 7284830c8c2da1c6192845257d8cd78bf7e20456 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Mon, 24 Aug 2020 22:23:08 -0400 Subject: [PATCH 086/189] deduplicate __pthread_self thread pointer adjustment out of each arch the adjustment made is entirely a function of TLS_ABOVE_TP and TP_OFFSET. aside from avoiding repetition of the TP_OFFSET value and arithmetic, this change makes pthread_arch.h independent of the definition of struct __pthread from pthread_impl.h. this in turn will allow inclusion of pthread_arch.h to be moved to the top of pthread_impl.h so that it can influence the definition of the structure. previously, arch files were very inconsistent about the type used for the thread pointer. this change unifies the new __get_tp interface to always use uintptr_t, which is the most correct when performing arithmetic that may involve addresses outside the actual pointed-to object (due to TP_OFFSET). --- arch/aarch64/pthread_arch.h | 8 ++++---- arch/arm/pthread_arch.h | 16 ++++++++-------- arch/i386/pthread_arch.h | 8 ++++---- arch/m68k/pthread_arch.h | 5 ++--- arch/microblaze/pthread_arch.h | 8 ++++---- arch/mips/pthread_arch.h | 8 ++++---- arch/mips64/pthread_arch.h | 8 ++++---- arch/mipsn32/pthread_arch.h | 8 ++++---- arch/or1k/pthread_arch.h | 9 ++++----- arch/powerpc/pthread_arch.h | 6 +++--- arch/powerpc64/pthread_arch.h | 6 +++--- arch/riscv64/pthread_arch.h | 6 +++--- arch/s390x/pthread_arch.h | 8 ++++---- arch/sh/pthread_arch.h | 8 ++++---- arch/x32/pthread_arch.h | 8 ++++---- arch/x86_64/pthread_arch.h | 8 ++++---- src/internal/pthread_impl.h | 2 ++ 17 files changed, 65 insertions(+), 65 deletions(-) diff --git a/arch/aarch64/pthread_arch.h b/arch/aarch64/pthread_arch.h index f3c005c7d..3909616c3 100644 --- a/arch/aarch64/pthread_arch.h +++ b/arch/aarch64/pthread_arch.h @@ -1,8 +1,8 @@ -static inline struct pthread *__pthread_self() +static inline uintptr_t __get_tp() { - char *self; - __asm__ ("mrs %0,tpidr_el0" : "=r"(self)); - return (void*)(self - sizeof(struct pthread)); + uintptr_t tp; + __asm__ ("mrs %0,tpidr_el0" : "=r"(tp)); + return tp; } #define TLS_ABOVE_TP diff --git a/arch/arm/pthread_arch.h b/arch/arm/pthread_arch.h index 486409854..157e2eae6 100644 --- a/arch/arm/pthread_arch.h +++ b/arch/arm/pthread_arch.h @@ -1,11 +1,11 @@ #if ((__ARM_ARCH_6K__ || __ARM_ARCH_6KZ__ || __ARM_ARCH_6ZK__) && !__thumb__) \ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH >= 7 -static inline pthread_t __pthread_self() +static inline uintptr_t __get_tp() { - char *p; - __asm__ ( "mrc p15,0,%0,c13,c0,3" : "=r"(p) ); - return (void *)(p-sizeof(struct pthread)); + uintptr_t tp; + __asm__ ( "mrc p15,0,%0,c13,c0,3" : "=r"(tp) ); + return tp; } #else @@ -16,12 +16,12 @@ static inline pthread_t __pthread_self() #define BLX "blx" #endif -static inline pthread_t __pthread_self() +static inline uintptr_t __get_tp() { extern hidden uintptr_t __a_gettp_ptr; - register uintptr_t p __asm__("r0"); - __asm__ ( BLX " %1" : "=r"(p) : "r"(__a_gettp_ptr) : "cc", "lr" ); - return (void *)(p-sizeof(struct pthread)); + register uintptr_t tp __asm__("r0"); + __asm__ ( BLX " %1" : "=r"(tp) : "r"(__a_gettp_ptr) : "cc", "lr" ); + return tp; } #endif diff --git a/arch/i386/pthread_arch.h b/arch/i386/pthread_arch.h index 32570a171..a639c382a 100644 --- a/arch/i386/pthread_arch.h +++ b/arch/i386/pthread_arch.h @@ -1,8 +1,8 @@ -static inline struct pthread *__pthread_self() +static inline uintptr_t __get_tp() { - struct pthread *self; - __asm__ ("movl %%gs:0,%0" : "=r" (self) ); - return self; + uintptr_t tp; + __asm__ ("movl %%gs:0,%0" : "=r" (tp) ); + return tp; } #define MC_PC gregs[REG_EIP] diff --git a/arch/m68k/pthread_arch.h b/arch/m68k/pthread_arch.h index 7c9990c28..5bea4e1ad 100644 --- a/arch/m68k/pthread_arch.h +++ b/arch/m68k/pthread_arch.h @@ -1,7 +1,6 @@ -static inline struct pthread *__pthread_self() +static inline uintptr_t __get_tp() { - uintptr_t tp = __syscall(SYS_get_thread_area); - return (pthread_t)(tp - 0x7000 - sizeof(struct pthread)); + return __syscall(SYS_get_thread_area); } #define TLS_ABOVE_TP diff --git a/arch/microblaze/pthread_arch.h b/arch/microblaze/pthread_arch.h index c327f4eb5..ff26624ef 100644 --- a/arch/microblaze/pthread_arch.h +++ b/arch/microblaze/pthread_arch.h @@ -1,8 +1,8 @@ -static inline struct pthread *__pthread_self() +static inline uintptr_t __get_tp() { - struct pthread *self; - __asm__ ("ori %0, r21, 0" : "=r" (self) ); - return self; + uintptr_t tp; + __asm__ ("ori %0, r21, 0" : "=r" (tp) ); + return tp; } #define MC_PC regs.pc diff --git a/arch/mips/pthread_arch.h b/arch/mips/pthread_arch.h index c22eb34dc..c45347ab9 100644 --- a/arch/mips/pthread_arch.h +++ b/arch/mips/pthread_arch.h @@ -1,13 +1,13 @@ -static inline struct pthread *__pthread_self() +static inline uintptr_t __get_tp() { #if __mips_isa_rev < 2 - register char *tp __asm__("$3"); + register uintptr_t tp __asm__("$3"); __asm__ (".word 0x7c03e83b" : "=r" (tp) ); #else - char *tp; + uintptr_t tp; __asm__ ("rdhwr %0, $29" : "=r" (tp) ); #endif - return (pthread_t)(tp - 0x7000 - sizeof(struct pthread)); + return tp; } #define TLS_ABOVE_TP diff --git a/arch/mips64/pthread_arch.h b/arch/mips64/pthread_arch.h index c22eb34dc..c45347ab9 100644 --- a/arch/mips64/pthread_arch.h +++ b/arch/mips64/pthread_arch.h @@ -1,13 +1,13 @@ -static inline struct pthread *__pthread_self() +static inline uintptr_t __get_tp() { #if __mips_isa_rev < 2 - register char *tp __asm__("$3"); + register uintptr_t tp __asm__("$3"); __asm__ (".word 0x7c03e83b" : "=r" (tp) ); #else - char *tp; + uintptr_t tp; __asm__ ("rdhwr %0, $29" : "=r" (tp) ); #endif - return (pthread_t)(tp - 0x7000 - sizeof(struct pthread)); + return tp; } #define TLS_ABOVE_TP diff --git a/arch/mipsn32/pthread_arch.h b/arch/mipsn32/pthread_arch.h index c22eb34dc..c45347ab9 100644 --- a/arch/mipsn32/pthread_arch.h +++ b/arch/mipsn32/pthread_arch.h @@ -1,13 +1,13 @@ -static inline struct pthread *__pthread_self() +static inline uintptr_t __get_tp() { #if __mips_isa_rev < 2 - register char *tp __asm__("$3"); + register uintptr_t tp __asm__("$3"); __asm__ (".word 0x7c03e83b" : "=r" (tp) ); #else - char *tp; + uintptr_t tp; __asm__ ("rdhwr %0, $29" : "=r" (tp) ); #endif - return (pthread_t)(tp - 0x7000 - sizeof(struct pthread)); + return tp; } #define TLS_ABOVE_TP diff --git a/arch/or1k/pthread_arch.h b/arch/or1k/pthread_arch.h index 76d0a8bc1..f75ea7e4e 100644 --- a/arch/or1k/pthread_arch.h +++ b/arch/or1k/pthread_arch.h @@ -1,14 +1,13 @@ -/* or1k use variant I, but with the twist that tp points to the end of TCB */ -static inline struct pthread *__pthread_self() +static inline uintptr_t __get_tp() { #ifdef __clang__ - char *tp; + uintptr_t tp; __asm__ ("l.ori %0, r10, 0" : "=r" (tp) ); #else - register char *tp __asm__("r10"); + register uintptr_t tp __asm__("r10"); __asm__ ("" : "=r" (tp) ); #endif - return (struct pthread *) (tp - sizeof(struct pthread)); + return tp; } #define TLS_ABOVE_TP diff --git a/arch/powerpc/pthread_arch.h b/arch/powerpc/pthread_arch.h index 9697046bb..a0947763b 100644 --- a/arch/powerpc/pthread_arch.h +++ b/arch/powerpc/pthread_arch.h @@ -1,8 +1,8 @@ -static inline struct pthread *__pthread_self() +static inline uintptr_t __get_tp() { - register char *tp __asm__("r2"); + register uintptr_t tp __asm__("r2"); __asm__ ("" : "=r" (tp) ); - return (pthread_t)(tp - 0x7000 - sizeof(struct pthread)); + return tp; } #define TLS_ABOVE_TP diff --git a/arch/powerpc64/pthread_arch.h b/arch/powerpc64/pthread_arch.h index e9dba43f5..08a557d20 100644 --- a/arch/powerpc64/pthread_arch.h +++ b/arch/powerpc64/pthread_arch.h @@ -1,8 +1,8 @@ -static inline struct pthread *__pthread_self() +static inline uintptr_t __get_tp() { - register char *tp __asm__("r13"); + register uintptr_t tp __asm__("r13"); __asm__ ("" : "=r" (tp) ); - return (pthread_t)(tp - 0x7000 - sizeof(struct pthread)); + return tp; } #define TLS_ABOVE_TP diff --git a/arch/riscv64/pthread_arch.h b/arch/riscv64/pthread_arch.h index 50f0868d8..a20d7fba0 100644 --- a/arch/riscv64/pthread_arch.h +++ b/arch/riscv64/pthread_arch.h @@ -1,8 +1,8 @@ -static inline struct pthread *__pthread_self() +static inline uintptr_t __get_tp() { - char *tp; + uintptr_t tp; __asm__ __volatile__("mv %0, tp" : "=r"(tp)); - return (void *)(tp - sizeof(struct pthread)); + return tp; } #define TLS_ABOVE_TP diff --git a/arch/s390x/pthread_arch.h b/arch/s390x/pthread_arch.h index 5d22546bb..e54fec3fe 100644 --- a/arch/s390x/pthread_arch.h +++ b/arch/s390x/pthread_arch.h @@ -1,12 +1,12 @@ -static inline struct pthread *__pthread_self() +static inline uintptr_t __get_tp() { - struct pthread *self; + uintptr_t tp; __asm__ ( "ear %0, %%a0\n" "sllg %0, %0, 32\n" "ear %0, %%a1\n" - : "=r"(self)); - return self; + : "=r"(tp)); + return tp; } #define MC_PC psw.addr diff --git a/arch/sh/pthread_arch.h b/arch/sh/pthread_arch.h index c22529082..0fcf70d27 100644 --- a/arch/sh/pthread_arch.h +++ b/arch/sh/pthread_arch.h @@ -1,8 +1,8 @@ -static inline struct pthread *__pthread_self() +static inline uintptr_t __get_tp() { - char *self; - __asm__ ("stc gbr,%0" : "=r" (self) ); - return (struct pthread *) (self - sizeof(struct pthread)); + uintptr_t tp; + __asm__ ("stc gbr,%0" : "=r" (tp) ); + return tp; } #define TLS_ABOVE_TP diff --git a/arch/x32/pthread_arch.h b/arch/x32/pthread_arch.h index fa4528391..6e2495da4 100644 --- a/arch/x32/pthread_arch.h +++ b/arch/x32/pthread_arch.h @@ -1,8 +1,8 @@ -static inline struct pthread *__pthread_self() +static inline uintptr_t __get_tp() { - struct pthread *self; - __asm__ ("mov %%fs:0,%0" : "=r" (self) ); - return self; + uintptr_t tp; + __asm__ ("mov %%fs:0,%0" : "=r" (tp) ); + return tp; } #define MC_PC gregs[REG_RIP] diff --git a/arch/x86_64/pthread_arch.h b/arch/x86_64/pthread_arch.h index 1c64a8408..c8c63f2e7 100644 --- a/arch/x86_64/pthread_arch.h +++ b/arch/x86_64/pthread_arch.h @@ -1,8 +1,8 @@ -static inline struct pthread *__pthread_self() +static inline uintptr_t __get_tp() { - struct pthread *self; - __asm__ ("mov %%fs:0,%0" : "=r" (self) ); - return self; + uintptr_t tp; + __asm__ ("mov %%fs:0,%0" : "=r" (tp) ); + return tp; } #define MC_PC gregs[REG_RIP] diff --git a/src/internal/pthread_impl.h b/src/internal/pthread_impl.h index 3c2bd7670..58e061364 100644 --- a/src/internal/pthread_impl.h +++ b/src/internal/pthread_impl.h @@ -115,8 +115,10 @@ struct __timer { #ifdef TLS_ABOVE_TP #define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) + TP_OFFSET) +#define __pthread_self() ((pthread_t)(__get_tp() - sizeof(struct __pthread) - TP_OFFSET)) #else #define TP_ADJ(p) (p) +#define __pthread_self() ((pthread_t)__get_tp()) #endif #ifndef tls_mod_off_t From 47f84dd6fc62dfb8c7977b4ceaa4cc09eac8f69d Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Mon, 24 Aug 2020 22:45:51 -0400 Subject: [PATCH 087/189] remove redundant pthread struct members repeated for layout purposes dtv_copy, canary2, and canary_at_end existed solely to match multiple ABI and asm-accessed layouts simultaneously. now that pthread_arch.h can be included before struct __pthread is defined, the struct layout can depend on macros defined by pthread_arch.h. --- arch/powerpc/pthread_arch.h | 2 -- arch/powerpc64/pthread_arch.h | 2 -- arch/x32/pthread_arch.h | 2 +- ldso/dynlink.c | 2 +- src/env/__init_tls.c | 2 +- src/env/__stack_chk_fail.c | 2 +- src/internal/pthread_impl.h | 23 ++++++++++++++--------- src/thread/pthread_create.c | 2 +- 8 files changed, 19 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/pthread_arch.h b/arch/powerpc/pthread_arch.h index a0947763b..42e88b07e 100644 --- a/arch/powerpc/pthread_arch.h +++ b/arch/powerpc/pthread_arch.h @@ -14,5 +14,3 @@ static inline uintptr_t __get_tp() // the kernel calls the ip "nip", it's the first saved value after the 32 // GPRs. #define MC_PC gregs[32] - -#define CANARY canary_at_end diff --git a/arch/powerpc64/pthread_arch.h b/arch/powerpc64/pthread_arch.h index 08a557d20..1b7b90797 100644 --- a/arch/powerpc64/pthread_arch.h +++ b/arch/powerpc64/pthread_arch.h @@ -14,5 +14,3 @@ static inline uintptr_t __get_tp() // the kernel calls the ip "nip", it's the first saved value after the 32 // GPRs. #define MC_PC gp_regs[32] - -#define CANARY canary_at_end diff --git a/arch/x32/pthread_arch.h b/arch/x32/pthread_arch.h index 6e2495da4..c1e7716d5 100644 --- a/arch/x32/pthread_arch.h +++ b/arch/x32/pthread_arch.h @@ -7,6 +7,6 @@ static inline uintptr_t __get_tp() #define MC_PC gregs[REG_RIP] -#define CANARY canary2 +#define CANARY_PAD #define tls_mod_off_t unsigned long long diff --git a/ldso/dynlink.c b/ldso/dynlink.c index d3d4ddd28..f74747433 100644 --- a/ldso/dynlink.c +++ b/ldso/dynlink.c @@ -1579,7 +1579,7 @@ static void install_new_tls(void) /* Install new dtv for each thread. */ for (j=0, td=self; !j || td!=self; j++, td=td->next) { - td->dtv = td->dtv_copy = newdtv[j]; + td->dtv = newdtv[j]; } __tl_unlock(); diff --git a/src/env/__init_tls.c b/src/env/__init_tls.c index 772baba32..a93141ed3 100644 --- a/src/env/__init_tls.c +++ b/src/env/__init_tls.c @@ -67,7 +67,7 @@ void *__copy_tls(unsigned char *mem) } #endif dtv[0] = libc.tls_cnt; - td->dtv = td->dtv_copy = dtv; + td->dtv = dtv; return td; } diff --git a/src/env/__stack_chk_fail.c b/src/env/__stack_chk_fail.c index e32596d10..bf5a280ad 100644 --- a/src/env/__stack_chk_fail.c +++ b/src/env/__stack_chk_fail.c @@ -9,7 +9,7 @@ void __init_ssp(void *entropy) if (entropy) memcpy(&__stack_chk_guard, entropy, sizeof(uintptr_t)); else __stack_chk_guard = (uintptr_t)&__stack_chk_guard * 1103515245; - __pthread_self()->CANARY = __stack_chk_guard; + __pthread_self()->canary = __stack_chk_guard; } void __stack_chk_fail(void) diff --git a/src/internal/pthread_impl.h b/src/internal/pthread_impl.h index 58e061364..4d709bbc9 100644 --- a/src/internal/pthread_impl.h +++ b/src/internal/pthread_impl.h @@ -11,16 +11,25 @@ #include "atomic.h" #include "futex.h" +#include "pthread_arch.h" + #define pthread __pthread struct pthread { /* Part 1 -- these fields may be external or * internal (accessed via asm) ABI. Do not change. */ struct pthread *self; +#ifndef TLS_ABOVE_TP uintptr_t *dtv; +#endif struct pthread *prev, *next; /* non-ABI */ uintptr_t sysinfo; - uintptr_t canary, canary2; +#ifndef TLS_ABOVE_TP +#ifdef CANARY_PAD + uintptr_t canary_pad; +#endif + uintptr_t canary; +#endif /* Part 2 -- implementation details, non-ABI. */ int tid; @@ -52,8 +61,10 @@ struct pthread { /* Part 3 -- the positions of these fields relative to * the end of the structure is external and internal ABI. */ - uintptr_t canary_at_end; - uintptr_t *dtv_copy; +#ifdef TLS_ABOVE_TP + uintptr_t canary; + uintptr_t *dtv; +#endif }; enum { @@ -99,12 +110,6 @@ struct __timer { #define _b_waiters2 __u.__vi[4] #define _b_inst __u.__p[3] -#include "pthread_arch.h" - -#ifndef CANARY -#define CANARY canary -#endif - #ifndef TP_OFFSET #define TP_OFFSET 0 #endif diff --git a/src/thread/pthread_create.c b/src/thread/pthread_create.c index 10f1b7d8c..557441559 100644 --- a/src/thread/pthread_create.c +++ b/src/thread/pthread_create.c @@ -314,7 +314,7 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att new->detach_state = DT_JOINABLE; } new->robust_list.head = &new->robust_list.head; - new->CANARY = self->CANARY; + new->canary = self->canary; new->sysinfo = self->sysinfo; /* Setup argument structure for the new thread on its stack. From 78aa8083a8b1b9dde8e5bcf9fca3b4eeb803a19d Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Thu, 27 Aug 2020 18:50:02 -0400 Subject: [PATCH 088/189] configure: add further -Werror=... options to detected CFLAGS these four warning options were overlooked previously, likely because they're not part of GCC's -Wall. they all detect constraint violations (invalid C at the source level) and should always be on in -Werror form. --- configure | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/configure b/configure index 18fda9afc..5b63e9b1c 100755 --- a/configure +++ b/configure @@ -522,6 +522,10 @@ tryflag CFLAGS_AUTO -Werror=implicit-function-declaration tryflag CFLAGS_AUTO -Werror=implicit-int tryflag CFLAGS_AUTO -Werror=pointer-sign tryflag CFLAGS_AUTO -Werror=pointer-arith +tryflag CFLAGS_AUTO -Werror=int-conversion +tryflag CFLAGS_AUTO -Werror=incompatible-pointer-types +tryflag CFLAGS_AUTO -Werror=discarded-qualifiers +tryflag CFLAGS_AUTO -Werror=discarded-array-qualifiers # # GCC ignores unused arguements by default, but Clang needs this extra From 81937823b4193261bf4c8adc03584f3619f6e095 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Thu, 27 Aug 2020 20:39:41 -0400 Subject: [PATCH 089/189] configure: use additive warnings instead of subtracting from -Wall -Wall varies too much by compiler and version. rather than trying to track all the unwanted style warnings that need to be subtracted, just enable wanted warnings. also, move -Wno-pointer-to-int-cast outside --enable-warnings conditional so that it always applies, since it's turning off a nuisance warning that's on-by-default with most compilers. --- configure | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/configure b/configure index 5b63e9b1c..9fc74a9b2 100755 --- a/configure +++ b/configure @@ -508,10 +508,13 @@ fi # # GCC defines -w as overriding any -W options, regardless of order, but # clang has a bunch of annoying warnings enabled by default and needs -w -# to start from a clean slate. So use -w if building with clang. +# to start from a clean slate. So use -w if building with clang. Also +# turn off a common on-by-default cast warning regardless of compiler. # test "$cc_family" = clang && tryflag CFLAGS_AUTO -w +tryflag CFLAGS_AUTO -Wno-pointer-to-int-cast + # # Even with -std=c99, gcc accepts some constructs which are constraint # violations. We want to treat these as errors regardless of whether @@ -535,14 +538,17 @@ tryflag CFLAGS_AUTO -Werror=discarded-array-qualifiers test "$cc_family" = clang && tryflag CFLAGS_AUTO -Qunused-arguments if test "x$warnings" = xyes ; then -tryflag CFLAGS_AUTO -Wall -tryflag CFLAGS_AUTO -Wno-parentheses -tryflag CFLAGS_AUTO -Wno-uninitialized -tryflag CFLAGS_AUTO -Wno-missing-braces -tryflag CFLAGS_AUTO -Wno-unused-value -tryflag CFLAGS_AUTO -Wno-unused-but-set-variable -tryflag CFLAGS_AUTO -Wno-unknown-pragmas -tryflag CFLAGS_AUTO -Wno-pointer-to-int-cast +tryflag CFLAGS_AUTO -Waddress +tryflag CFLAGS_AUTO -Warray-bounds +tryflag CFLAGS_AUTO -Wchar-subscripts +tryflag CFLAGS_AUTO -Wduplicate-decl-specifier +tryflag CFLAGS_AUTO -Winit-self +tryflag CFLAGS_AUTO -Wreturn-type +tryflag CFLAGS_AUTO -Wsequence-point +tryflag CFLAGS_AUTO -Wstrict-aliasing +tryflag CFLAGS_AUTO -Wunused-function +tryflag CFLAGS_AUTO -Wunused-label +tryflag CFLAGS_AUTO -Wunused-variable fi # Determine if the compiler produces position-independent code (PIC) From 063dde6476996e050a2a873d3e12d33f55b8ea70 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Thu, 27 Aug 2020 20:43:47 -0400 Subject: [PATCH 090/189] configure: enable warnings by default now that -Wall is not used and we control which warnings are enabled, it makes sense to have the wanted ones on by default. hopefully this will also discourage manually adding -Wall to CFLAGS and making incorrect changes or bug reports based on the compiler's output. --- configure | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configure b/configure index 9fc74a9b2..947adf419 100755 --- a/configure +++ b/configure @@ -30,7 +30,7 @@ System types: Optional features: --enable-optimize=... optimize listed components for speed over size [auto] --enable-debug build with debugging information [disabled] - --enable-warnings build with recommended warnings flags [disabled] + --disable-warnings build with recommended warnings flags [enabled] --enable-wrapper=... build given musl toolchain wrapper [auto] --disable-shared inhibit building shared library [enabled] --disable-static inhibit building static library [enabled] @@ -136,7 +136,7 @@ build= target= optimize=auto debug=no -warnings=no +warnings=yes shared=auto static=yes wrapper=auto From cdaca1e830b72df9941d25ed1cb4b8875509608b Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Sun, 30 Aug 2020 16:47:40 -0400 Subject: [PATCH 091/189] fix regression with applications that expect struct winsize in ioctl.h putting the (simple) definition in alltypes.h seems like the best solution here. making sys/ioctl.h implicitly include termios.h is probably excess namespace pollution. --- include/alltypes.h.in | 2 ++ include/sys/ioctl.h | 2 ++ include/termios.h | 8 +------- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/include/alltypes.h.in b/include/alltypes.h.in index d9ff462e1..d47aeea9a 100644 --- a/include/alltypes.h.in +++ b/include/alltypes.h.in @@ -77,6 +77,8 @@ TYPEDEF struct __sigset_t { unsigned long __bits[128/sizeof(long)]; } sigset_t; STRUCT iovec { void *iov_base; size_t iov_len; }; +STRUCT winsize { unsigned short ws_row, ws_col, ws_xpixel, ws_ypixel; }; + TYPEDEF unsigned socklen_t; TYPEDEF unsigned short sa_family_t; diff --git a/include/sys/ioctl.h b/include/sys/ioctl.h index d6a7d474e..a9a2346ee 100644 --- a/include/sys/ioctl.h +++ b/include/sys/ioctl.h @@ -4,6 +4,8 @@ extern "C" { #endif +#define __NEED_struct_winsize + #include #include diff --git a/include/termios.h b/include/termios.h index 793cfc947..cbb533010 100644 --- a/include/termios.h +++ b/include/termios.h @@ -8,6 +8,7 @@ extern "C" { #include #define __NEED_pid_t +#define __NEED_struct_winsize #include @@ -15,13 +16,6 @@ typedef unsigned char cc_t; typedef unsigned int speed_t; typedef unsigned int tcflag_t; -struct winsize { - unsigned short ws_row; - unsigned short ws_col; - unsigned short ws_xpixel; - unsigned short ws_ypixel; -}; - #define NCCS 32 #include From cbafd788fb57bf8e7610736fd24ab9102d8d5809 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Sun, 30 Aug 2020 16:49:48 -0400 Subject: [PATCH 092/189] clean up overinclusion in files using TIOCGWINSZ now that struct winsize is available via sys/ioctl.h once again, including termios.h is not needed. --- src/stdio/__fdopen.c | 1 - src/stdio/__stdout_write.c | 1 - src/unistd/isatty.c | 1 - 3 files changed, 3 deletions(-) diff --git a/src/stdio/__fdopen.c b/src/stdio/__fdopen.c index 616f4f998..116e78e56 100644 --- a/src/stdio/__fdopen.c +++ b/src/stdio/__fdopen.c @@ -1,7 +1,6 @@ #include "stdio_impl.h" #include #include -#include #include #include #include diff --git a/src/stdio/__stdout_write.c b/src/stdio/__stdout_write.c index 5b413c797..dd1ec60ff 100644 --- a/src/stdio/__stdout_write.c +++ b/src/stdio/__stdout_write.c @@ -1,6 +1,5 @@ #include "stdio_impl.h" #include -#include size_t __stdout_write(FILE *f, const unsigned char *buf, size_t len) { diff --git a/src/unistd/isatty.c b/src/unistd/isatty.c index bc220c003..75a9c186a 100644 --- a/src/unistd/isatty.c +++ b/src/unistd/isatty.c @@ -1,7 +1,6 @@ #include #include #include -#include #include "syscall.h" int isatty(int fd) From b54bcf9f9ac8267f151ae406f4d689a750ad0be7 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Sun, 30 Aug 2020 21:30:37 -0400 Subject: [PATCH 093/189] restore h_errno ABI compatibility with ancient binaries prior to commit e68c51ac46a9f273927aef8dcebc89912ab19ece, h_errno was actually an external data object not a macro. bring back the symbol, and use it as the storage for the main thread's h_errno. technically this still doesn't provide full compatibility if the application was multithreaded, but at the time there were no res_* functions (and they did not set h_errno anyway), so any use of h_errno would have been via thread-unsafe functions. thus a solution that just fixes single-threaded applications seems acceptable. --- src/network/h_errno.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/network/h_errno.c b/src/network/h_errno.c index 8677a92b7..638f77180 100644 --- a/src/network/h_errno.c +++ b/src/network/h_errno.c @@ -1,7 +1,11 @@ #include #include "pthread_impl.h" +#undef h_errno +int h_errno; + int *__h_errno_location(void) { + if (!__pthread_self()->stack) return &h_errno; return &__pthread_self()->h_errno_val; } From 4f2465451271a6770bcb12e975ed33c3b86a6e63 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Sun, 30 Aug 2020 21:37:12 -0400 Subject: [PATCH 094/189] fix i386 __set_thread_area fallback this code is only needed for pre-2.6 kernels, which are not actually supported anyway, and was never tested. the fallback path using SYS_modify_ldt failed to clear the upper bits of %eax (all ones due to SYS_set_thread_area's return value being an error) before modifying %al to attempt a new syscall. --- src/thread/i386/__set_thread_area.s | 1 + 1 file changed, 1 insertion(+) diff --git a/src/thread/i386/__set_thread_area.s b/src/thread/i386/__set_thread_area.s index c2c21dd5d..aa6852beb 100644 --- a/src/thread/i386/__set_thread_area.s +++ b/src/thread/i386/__set_thread_area.s @@ -28,6 +28,7 @@ __set_thread_area: ret 2: mov %ebx,%ecx + xor %eax,%eax xor %ebx,%ebx xor %edx,%edx mov %ebx,(%esp) From 0e81c82d528e3ea66c786f17f0a971e7ae6a693f Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Thu, 3 Sep 2020 17:30:22 -0400 Subject: [PATCH 095/189] fix missing newline in herror output --- src/network/herror.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/network/herror.c b/src/network/herror.c index 65f25ff3f..87f8cff4f 100644 --- a/src/network/herror.c +++ b/src/network/herror.c @@ -4,5 +4,5 @@ void herror(const char *msg) { - fprintf(stderr, "%s%s%s", msg?msg:"", msg?": ":"", hstrerror(h_errno)); + fprintf(stderr, "%s%s%s\n", msg?msg:"", msg?": ":"", hstrerror(h_errno)); } From 503c808a52d24c2621fd6add44fc4715ea032c3b Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Thu, 3 Sep 2020 17:37:11 -0400 Subject: [PATCH 096/189] fix missing O_LARGEFILE values on x86_64, x32, and mips64 prior to commit 685e40bb09f5f24a2af54ea09c97328808f76990, x86_64 was correctly passing O_LARGEFILE to SYS_open; it was removed (defined to 0 in the public header, and changed to use the public definition) as part of that change, probably out of a mistaken belief that it's not needed. however, on a mixed system with 32-bit and 64-bit binaries, it's important that all files be opened with O_LARGEFILE, even if the opening process is 64-bit, in case a descriptor is passed to a 32-bit process. otherwise, attempts to access past 2GB in the 32-bit process could produce EOVERFLOW. most 64-bit archs added later got this right alread, except for mips64. x32 was also affected. there are now fixed. --- arch/mips64/bits/fcntl.h | 2 +- arch/x32/bits/fcntl.h | 2 +- arch/x86_64/bits/fcntl.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/mips64/bits/fcntl.h b/arch/mips64/bits/fcntl.h index 3bcec15e0..5da1eef80 100644 --- a/arch/mips64/bits/fcntl.h +++ b/arch/mips64/bits/fcntl.h @@ -13,7 +13,7 @@ #define O_ASYNC 010000 #define O_DIRECT 0100000 -#define O_LARGEFILE 0 +#define O_LARGEFILE 020000 #define O_NOATIME 01000000 #define O_PATH 010000000 #define O_TMPFILE 020200000 diff --git a/arch/x32/bits/fcntl.h b/arch/x32/bits/fcntl.h index 1b88ad391..08627f812 100644 --- a/arch/x32/bits/fcntl.h +++ b/arch/x32/bits/fcntl.h @@ -13,7 +13,7 @@ #define O_ASYNC 020000 #define O_DIRECT 040000 -#define O_LARGEFILE 0 +#define O_LARGEFILE 0100000 #define O_NOATIME 01000000 #define O_PATH 010000000 #define O_TMPFILE 020200000 diff --git a/arch/x86_64/bits/fcntl.h b/arch/x86_64/bits/fcntl.h index 1b88ad391..08627f812 100644 --- a/arch/x86_64/bits/fcntl.h +++ b/arch/x86_64/bits/fcntl.h @@ -13,7 +13,7 @@ #define O_ASYNC 020000 #define O_DIRECT 040000 -#define O_LARGEFILE 0 +#define O_LARGEFILE 0100000 #define O_NOATIME 01000000 #define O_PATH 010000000 #define O_TMPFILE 020200000 From 1ff05e67b22aeb7c79b839299ee9d918b8e28fb0 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Thu, 3 Sep 2020 17:58:17 -0400 Subject: [PATCH 097/189] make generic bits/fcntl.h shareable with 64-bit archs the fcntl file locking command macro values in the existing generic bits/fcntl.h were the "64" variants, requiring 64-bit archs that use the "plain" variants to have their own bits/fcntl.h, even if they otherwise use the common definitions for everything. since commit 7cc79d10afd43811a486fd5e9fcdf8e45ac599e0 exposed __LONG_MAX to all bits headers, we can now make the generic one common between 32- and 64-bit archs. --- arch/generic/bits/fcntl.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/generic/bits/fcntl.h b/arch/generic/bits/fcntl.h index ae233cc00..730a98cfe 100644 --- a/arch/generic/bits/fcntl.h +++ b/arch/generic/bits/fcntl.h @@ -30,9 +30,15 @@ #define F_SETSIG 10 #define F_GETSIG 11 +#if __LONG_MAX == 0x7fffffffL #define F_GETLK 12 #define F_SETLK 13 #define F_SETLKW 14 +#else +#define F_GETLK 5 +#define F_SETLK 6 +#define F_SETLKW 7 +#endif #define F_SETOWN_EX 15 #define F_GETOWN_EX 16 From 7ec23ce81e3f9f8db8d8d7fc555d36d1c3e6401b Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Thu, 3 Sep 2020 18:02:55 -0400 Subject: [PATCH 098/189] use generic bits/fcntl.h for x86_64 and riscv64 these were only using a custom version because they needed the "non-64" variants of the file locking command macros. --- arch/riscv64/bits/fcntl.h | 38 ------------------------------------- arch/x86_64/bits/fcntl.h | 40 --------------------------------------- 2 files changed, 78 deletions(-) delete mode 100644 arch/riscv64/bits/fcntl.h delete mode 100644 arch/x86_64/bits/fcntl.h diff --git a/arch/riscv64/bits/fcntl.h b/arch/riscv64/bits/fcntl.h deleted file mode 100644 index ecb4d18fd..000000000 --- a/arch/riscv64/bits/fcntl.h +++ /dev/null @@ -1,38 +0,0 @@ -#define O_CREAT 0100 -#define O_EXCL 0200 -#define O_NOCTTY 0400 -#define O_TRUNC 01000 -#define O_APPEND 02000 -#define O_NONBLOCK 04000 -#define O_DSYNC 010000 -#define O_SYNC 04010000 -#define O_RSYNC 04010000 -#define O_DIRECTORY 0200000 -#define O_NOFOLLOW 0400000 -#define O_CLOEXEC 02000000 - -#define O_ASYNC 020000 -#define O_DIRECT 040000 -#define O_LARGEFILE 0100000 -#define O_NOATIME 01000000 -#define O_PATH 010000000 -#define O_TMPFILE 020200000 -#define O_NDELAY O_NONBLOCK - -#define F_DUPFD 0 -#define F_GETFD 1 -#define F_SETFD 2 -#define F_GETFL 3 -#define F_SETFL 4 -#define F_GETLK 5 -#define F_SETLK 6 -#define F_SETLKW 7 -#define F_SETOWN 8 -#define F_GETOWN 9 -#define F_SETSIG 10 -#define F_GETSIG 11 - -#define F_SETOWN_EX 15 -#define F_GETOWN_EX 16 - -#define F_GETOWNER_UIDS 17 diff --git a/arch/x86_64/bits/fcntl.h b/arch/x86_64/bits/fcntl.h deleted file mode 100644 index 08627f812..000000000 --- a/arch/x86_64/bits/fcntl.h +++ /dev/null @@ -1,40 +0,0 @@ -#define O_CREAT 0100 -#define O_EXCL 0200 -#define O_NOCTTY 0400 -#define O_TRUNC 01000 -#define O_APPEND 02000 -#define O_NONBLOCK 04000 -#define O_DSYNC 010000 -#define O_SYNC 04010000 -#define O_RSYNC 04010000 -#define O_DIRECTORY 0200000 -#define O_NOFOLLOW 0400000 -#define O_CLOEXEC 02000000 - -#define O_ASYNC 020000 -#define O_DIRECT 040000 -#define O_LARGEFILE 0100000 -#define O_NOATIME 01000000 -#define O_PATH 010000000 -#define O_TMPFILE 020200000 -#define O_NDELAY O_NONBLOCK - -#define F_DUPFD 0 -#define F_GETFD 1 -#define F_SETFD 2 -#define F_GETFL 3 -#define F_SETFL 4 - -#define F_SETOWN 8 -#define F_GETOWN 9 -#define F_SETSIG 10 -#define F_GETSIG 11 - -#define F_GETLK 5 -#define F_SETLK 6 -#define F_SETLKW 7 - -#define F_SETOWN_EX 15 -#define F_GETOWN_EX 16 - -#define F_GETOWNER_UIDS 17 From dfd54be83d1d783912b2fe969de42a91598d2ed2 Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Sat, 15 Feb 2020 09:42:37 +0000 Subject: [PATCH 099/189] netinet/tcp.h: update tcp_info for linux v5.5 see linux commit 480274787d7e3458bc5a7cfbbbe07033984ad711 tcp: add TCP_INFO status for failed client TFO --- include/netinet/tcp.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/include/netinet/tcp.h b/include/netinet/tcp.h index 44a007aaf..db3904ae1 100644 --- a/include/netinet/tcp.h +++ b/include/netinet/tcp.h @@ -181,6 +181,13 @@ struct tcphdr { #define TCP_CA_Recovery 3 #define TCP_CA_Loss 4 +enum tcp_fastopen_client_fail { + TFO_STATUS_UNSPEC, + TFO_COOKIE_UNAVAILABLE, + TFO_DATA_NOT_ACKED, + TFO_SYN_RETRANSMITTED, +}; + struct tcp_info { uint8_t tcpi_state; uint8_t tcpi_ca_state; @@ -189,7 +196,7 @@ struct tcp_info { uint8_t tcpi_backoff; uint8_t tcpi_options; uint8_t tcpi_snd_wscale : 4, tcpi_rcv_wscale : 4; - uint8_t tcpi_delivery_rate_app_limited : 1; + uint8_t tcpi_delivery_rate_app_limited : 1, tcpi_fastopen_client_fail : 2; uint32_t tcpi_rto; uint32_t tcpi_ato; uint32_t tcpi_snd_mss; From 98d0efa5e740b93a9ee9c1778f7859899bc29dea Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Tue, 31 Mar 2020 18:32:51 +0000 Subject: [PATCH 100/189] add pidfd_getfd and openat2 syscall numbers from linux v5.6 also added clone3 on sh and m68k, on sh it's still missing (not yet wired up), but reserved so safe to add. see linux commit fddb5d430ad9fa91b49b1d34d0202ffe2fa0e179 open: introduce openat2(2) syscall linux commit 9a2cef09c801de54feecd912303ace5c27237f12 arch: wire up pidfd_getfd syscall linux commit 8649c322f75c96e7ced2fec201e123b2b073bf09 pid: Implement pidfd_getfd syscall linux commit e8bb2a2a1d51511e6b3f7e08125d52ec73c11139 m68k: Wire up clone3() syscall --- arch/aarch64/bits/syscall.h.in | 2 ++ arch/arm/bits/syscall.h.in | 2 ++ arch/i386/bits/syscall.h.in | 2 ++ arch/m68k/bits/syscall.h.in | 3 +++ arch/microblaze/bits/syscall.h.in | 2 ++ arch/mips/bits/syscall.h.in | 2 ++ arch/mips64/bits/syscall.h.in | 2 ++ arch/mipsn32/bits/syscall.h.in | 2 ++ arch/or1k/bits/syscall.h.in | 2 ++ arch/powerpc/bits/syscall.h.in | 2 ++ arch/powerpc64/bits/syscall.h.in | 2 ++ arch/riscv64/bits/syscall.h.in | 2 ++ arch/s390x/bits/syscall.h.in | 2 ++ arch/sh/bits/syscall.h.in | 3 +++ arch/x32/bits/syscall.h.in | 2 ++ arch/x86_64/bits/syscall.h.in | 2 ++ 16 files changed, 34 insertions(+) diff --git a/arch/aarch64/bits/syscall.h.in b/arch/aarch64/bits/syscall.h.in index 93648afdf..169510fd3 100644 --- a/arch/aarch64/bits/syscall.h.in +++ b/arch/aarch64/bits/syscall.h.in @@ -289,4 +289,6 @@ #define __NR_fspick 433 #define __NR_pidfd_open 434 #define __NR_clone3 435 +#define __NR_openat2 437 +#define __NR_pidfd_getfd 438 diff --git a/arch/arm/bits/syscall.h.in b/arch/arm/bits/syscall.h.in index 11d677635..729597c2c 100644 --- a/arch/arm/bits/syscall.h.in +++ b/arch/arm/bits/syscall.h.in @@ -389,6 +389,8 @@ #define __NR_fspick 433 #define __NR_pidfd_open 434 #define __NR_clone3 435 +#define __NR_openat2 437 +#define __NR_pidfd_getfd 438 #define __ARM_NR_breakpoint 0x0f0001 #define __ARM_NR_cacheflush 0x0f0002 diff --git a/arch/i386/bits/syscall.h.in b/arch/i386/bits/syscall.h.in index 1ae4e48a8..40721c6c9 100644 --- a/arch/i386/bits/syscall.h.in +++ b/arch/i386/bits/syscall.h.in @@ -426,4 +426,6 @@ #define __NR_fspick 433 #define __NR_pidfd_open 434 #define __NR_clone3 435 +#define __NR_openat2 437 +#define __NR_pidfd_getfd 438 diff --git a/arch/m68k/bits/syscall.h.in b/arch/m68k/bits/syscall.h.in index ddfa72e4d..f2f838cc0 100644 --- a/arch/m68k/bits/syscall.h.in +++ b/arch/m68k/bits/syscall.h.in @@ -405,3 +405,6 @@ #define __NR_fsmount 432 #define __NR_fspick 433 #define __NR_pidfd_open 434 +#define __NR_clone3 435 +#define __NR_openat2 437 +#define __NR_pidfd_getfd 438 diff --git a/arch/microblaze/bits/syscall.h.in b/arch/microblaze/bits/syscall.h.in index 963386a83..245729207 100644 --- a/arch/microblaze/bits/syscall.h.in +++ b/arch/microblaze/bits/syscall.h.in @@ -427,4 +427,6 @@ #define __NR_fspick 433 #define __NR_pidfd_open 434 #define __NR_clone3 435 +#define __NR_openat2 437 +#define __NR_pidfd_getfd 438 diff --git a/arch/mips/bits/syscall.h.in b/arch/mips/bits/syscall.h.in index 86251bf31..567759418 100644 --- a/arch/mips/bits/syscall.h.in +++ b/arch/mips/bits/syscall.h.in @@ -408,4 +408,6 @@ #define __NR_fspick 4433 #define __NR_pidfd_open 4434 #define __NR_clone3 4435 +#define __NR_openat2 4437 +#define __NR_pidfd_getfd 4438 diff --git a/arch/mips64/bits/syscall.h.in b/arch/mips64/bits/syscall.h.in index 9b406e9a4..6f3934669 100644 --- a/arch/mips64/bits/syscall.h.in +++ b/arch/mips64/bits/syscall.h.in @@ -338,4 +338,6 @@ #define __NR_fspick 5433 #define __NR_pidfd_open 5434 #define __NR_clone3 5435 +#define __NR_openat2 5437 +#define __NR_pidfd_getfd 5438 diff --git a/arch/mipsn32/bits/syscall.h.in b/arch/mipsn32/bits/syscall.h.in index 2ad48d10d..e24086f67 100644 --- a/arch/mipsn32/bits/syscall.h.in +++ b/arch/mipsn32/bits/syscall.h.in @@ -362,4 +362,6 @@ #define __NR_fspick 6433 #define __NR_pidfd_open 6434 #define __NR_clone3 6435 +#define __NR_openat2 6437 +#define __NR_pidfd_getfd 6438 diff --git a/arch/or1k/bits/syscall.h.in b/arch/or1k/bits/syscall.h.in index e9c925e41..0d54d247b 100644 --- a/arch/or1k/bits/syscall.h.in +++ b/arch/or1k/bits/syscall.h.in @@ -311,4 +311,6 @@ #define __NR_fspick 433 #define __NR_pidfd_open 434 #define __NR_clone3 435 +#define __NR_openat2 437 +#define __NR_pidfd_getfd 438 diff --git a/arch/powerpc/bits/syscall.h.in b/arch/powerpc/bits/syscall.h.in index 8d4f79b52..70f7a3798 100644 --- a/arch/powerpc/bits/syscall.h.in +++ b/arch/powerpc/bits/syscall.h.in @@ -415,4 +415,6 @@ #define __NR_fspick 433 #define __NR_pidfd_open 434 #define __NR_clone3 435 +#define __NR_openat2 437 +#define __NR_pidfd_getfd 438 diff --git a/arch/powerpc64/bits/syscall.h.in b/arch/powerpc64/bits/syscall.h.in index b935864c4..3b9f8a446 100644 --- a/arch/powerpc64/bits/syscall.h.in +++ b/arch/powerpc64/bits/syscall.h.in @@ -387,4 +387,6 @@ #define __NR_fspick 433 #define __NR_pidfd_open 434 #define __NR_clone3 435 +#define __NR_openat2 437 +#define __NR_pidfd_getfd 438 diff --git a/arch/riscv64/bits/syscall.h.in b/arch/riscv64/bits/syscall.h.in index 0043eeba3..da4fd73dd 100644 --- a/arch/riscv64/bits/syscall.h.in +++ b/arch/riscv64/bits/syscall.h.in @@ -289,6 +289,8 @@ #define __NR_fspick 433 #define __NR_pidfd_open 434 #define __NR_clone3 435 +#define __NR_openat2 437 +#define __NR_pidfd_getfd 438 #define __NR_sysriscv __NR_arch_specific_syscall #define __NR_riscv_flush_icache (__NR_sysriscv + 15) diff --git a/arch/s390x/bits/syscall.h.in b/arch/s390x/bits/syscall.h.in index e89f37829..9406c9377 100644 --- a/arch/s390x/bits/syscall.h.in +++ b/arch/s390x/bits/syscall.h.in @@ -352,4 +352,6 @@ #define __NR_fspick 433 #define __NR_pidfd_open 434 #define __NR_clone3 435 +#define __NR_openat2 437 +#define __NR_pidfd_getfd 438 diff --git a/arch/sh/bits/syscall.h.in b/arch/sh/bits/syscall.h.in index 0102ddaf8..0f09c9035 100644 --- a/arch/sh/bits/syscall.h.in +++ b/arch/sh/bits/syscall.h.in @@ -398,4 +398,7 @@ #define __NR_fsmount 432 #define __NR_fspick 433 #define __NR_pidfd_open 434 +#define __NR_clone3 435 +#define __NR_openat2 437 +#define __NR_pidfd_getfd 438 diff --git a/arch/x32/bits/syscall.h.in b/arch/x32/bits/syscall.h.in index f47bdee5b..17a4011ec 100644 --- a/arch/x32/bits/syscall.h.in +++ b/arch/x32/bits/syscall.h.in @@ -298,6 +298,8 @@ #define __NR_fspick (0x40000000 + 433) #define __NR_pidfd_open (0x40000000 + 434) #define __NR_clone3 (0x40000000 + 435) +#define __NR_openat2 (0x40000000 + 437) +#define __NR_pidfd_getfd (0x40000000 + 438) #define __NR_rt_sigaction (0x40000000 + 512) diff --git a/arch/x86_64/bits/syscall.h.in b/arch/x86_64/bits/syscall.h.in index 6a646ad34..65126e1f7 100644 --- a/arch/x86_64/bits/syscall.h.in +++ b/arch/x86_64/bits/syscall.h.in @@ -345,4 +345,6 @@ #define __NR_fspick 433 #define __NR_pidfd_open 434 #define __NR_clone3 435 +#define __NR_openat2 437 +#define __NR_pidfd_getfd 438 From 163b734a8b03254966543d8e30366a0571602f2e Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Tue, 31 Mar 2020 19:57:53 +0000 Subject: [PATCH 101/189] netinet/in.h: add IPPROTO_ macros from linux v5.6 add IPPROTO_ETHERNET and IPPROTO_MPTCP, see linux commit 2677625387056136e256c743e3285b4fe3da87bb seg6: fix SRv6 L2 tunnels to use IANA-assigned protocol number linux commit faf391c3826cd29feae02078ca2022d2f912f7cc tcp: Define IPPROTO_MPTCP --- include/netinet/in.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/netinet/in.h b/include/netinet/in.h index 103d2e044..36a2013ad 100644 --- a/include/netinet/in.h +++ b/include/netinet/in.h @@ -101,8 +101,10 @@ uint16_t ntohs(uint16_t); #define IPPROTO_MH 135 #define IPPROTO_UDPLITE 136 #define IPPROTO_MPLS 137 +#define IPPROTO_ETHERNET 143 #define IPPROTO_RAW 255 -#define IPPROTO_MAX 256 +#define IPPROTO_MPTCP 262 +#define IPPROTO_MAX 263 #define IN6_IS_ADDR_UNSPECIFIED(a) \ (((uint32_t *) (a))[0] == 0 && ((uint32_t *) (a))[1] == 0 && \ From 93882b343c0ee7d25499f3c0e609739ae536b711 Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Tue, 31 Mar 2020 20:07:33 +0000 Subject: [PATCH 102/189] netinet/tcp.h: update for linux v5.6 TCP_NLA_TIMEOUT_REHASH queries timeout-triggered rehash attempts, tcpm_ifindex limits the scope of TCP_MD5SIG* sockopt to a device. see linux commit 32efcc06d2a15fa87585614d12d6c2308cc2d3f3 tcp: export count for rehash attempts linux commit 6b102db50cdde3ba2f78631ed21222edf3a5fb51 net: Add device index to tcp_md5sig --- include/netinet/tcp.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/netinet/tcp.h b/include/netinet/tcp.h index db3904ae1..92550dcca 100644 --- a/include/netinet/tcp.h +++ b/include/netinet/tcp.h @@ -78,6 +78,7 @@ enum { TCP_NLA_DSACK_DUPS, TCP_NLA_REORD_SEEN, TCP_NLA_SRTT, + TCP_NLA_TIMEOUT_REHASH, }; #if defined(_GNU_SOURCE) || defined(_BSD_SOURCE) @@ -247,14 +248,15 @@ struct tcp_info { #define TCP_MD5SIG_MAXKEYLEN 80 -#define TCP_MD5SIG_FLAG_PREFIX 1 +#define TCP_MD5SIG_FLAG_PREFIX 0x1 +#define TCP_MD5SIG_FLAG_IFINDEX 0x2 struct tcp_md5sig { struct sockaddr_storage tcpm_addr; uint8_t tcpm_flags; uint8_t tcpm_prefixlen; uint16_t tcpm_keylen; - uint32_t __tcpm_pad; + int tcpm_ifindex; uint8_t tcpm_key[TCP_MD5SIG_MAXKEYLEN]; }; From bcd7c15df79e9e68b1c08a737a8660502173134d Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Tue, 31 Mar 2020 20:19:26 +0000 Subject: [PATCH 103/189] netinet/udp.h: add TCP_ENCAP_ESPINTCP from linux v5.6 The use of TCP_ in udp.h is not known, fortunately udp.h is not specified by posix so there are no strict namespace rules, added in linux commit e27cca96cd68fa2c6814c90f9a1cfd36bb68c593 xfrm: add espintcp (RFC 8229) --- include/netinet/udp.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/netinet/udp.h b/include/netinet/udp.h index ffd890796..40c3f2034 100644 --- a/include/netinet/udp.h +++ b/include/netinet/udp.h @@ -35,6 +35,7 @@ struct udphdr { #define UDP_ENCAP_GTP0 4 #define UDP_ENCAP_GTP1U 5 #define UDP_ENCAP_RXRPC 6 +#define TCP_ENCAP_ESPINTCP 7 #define SOL_UDP 17 From d659ca0203c9598fa9976090e85aa5434ebeb957 Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Tue, 31 Mar 2020 20:26:53 +0000 Subject: [PATCH 104/189] sys/prctl.h: add PR_{SET,GET}_IO_FLUSHER from linux v5.6 needed for storage drivers with userspace component that may run in the IO path, see linux commit 8d19f1c8e1937baf74e1962aae9f90fa3aeab463 prctl: PR_{G,S}ET_IO_FLUSHER to support controlling memory reclaim --- include/sys/prctl.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/sys/prctl.h b/include/sys/prctl.h index d9c846e9c..4b9fcc050 100644 --- a/include/sys/prctl.h +++ b/include/sys/prctl.h @@ -158,6 +158,9 @@ struct prctl_mm_map { #define PR_GET_TAGGED_ADDR_CTRL 56 #define PR_TAGGED_ADDR_ENABLE (1UL << 0) +#define PR_SET_IO_FLUSHER 57 +#define PR_GET_IO_FLUSHER 58 + int prctl (int, ...); #ifdef __cplusplus From bc5c24bb3bc41456ff5a33361c0500d9a357ee19 Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Tue, 31 Mar 2020 20:37:23 +0000 Subject: [PATCH 105/189] sys/random.h: add GRND_INSECURE from linux v5.6 added in linux commit 75551dbf112c992bc6c99a972990b3f272247e23 random: add GRND_INSECURE to return best-effort non-cryptographic bytes --- include/sys/random.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/sys/random.h b/include/sys/random.h index 4ee7bf2cc..59e40ab89 100644 --- a/include/sys/random.h +++ b/include/sys/random.h @@ -10,6 +10,7 @@ extern "C" { #define GRND_NONBLOCK 0x0001 #define GRND_RANDOM 0x0002 +#define GRND_INSECURE 0x0004 ssize_t getrandom(void *, size_t, unsigned); From 2f91b9578e78ee3021490041bd8946c861817131 Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Tue, 31 Mar 2020 20:42:14 +0000 Subject: [PATCH 106/189] sched.h: add CLONE_NEWTIME from linux v5.6 reuses a bit from CSIGNAL so it can only be used with unshare and clone3, added in linux commit 769071ac9f20b6a447410c7eaa55d1a5233ef40c ns: Introduce Time Namespace --- include/sched.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/sched.h b/include/sched.h index 822f464ef..fda4b4846 100644 --- a/include/sched.h +++ b/include/sched.h @@ -49,6 +49,7 @@ int sched_yield(void); #ifdef _GNU_SOURCE #define CSIGNAL 0x000000ff +#define CLONE_NEWTIME 0x00000080 #define CLONE_VM 0x00000100 #define CLONE_FS 0x00000200 #define CLONE_FILES 0x00000400 From d788c53315cda8165a80eecb3e1b7f05fa49e0c3 Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Tue, 31 Mar 2020 20:57:46 +0000 Subject: [PATCH 107/189] aarch64: add HWCAP2_ macros from linux v5.3 these were missed before, added in linux commit 1201937491822b61641c1878ebcd16a93aed4540 arm64: Expose ARMv8.5 CondM capability to userspace linux commit ca9503fc9e9812aa6258e55d44edb03eb30fc46f arm64: Expose FRINT capabilities to userspace --- arch/aarch64/bits/hwcap.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/aarch64/bits/hwcap.h b/arch/aarch64/bits/hwcap.h index a7484028e..cb4429185 100644 --- a/arch/aarch64/bits/hwcap.h +++ b/arch/aarch64/bits/hwcap.h @@ -38,3 +38,5 @@ #define HWCAP2_SVEBITPERM (1 << 4) #define HWCAP2_SVESHA3 (1 << 5) #define HWCAP2_SVESM4 (1 << 6) +#define HWCAP2_FLAGM2 (1 << 7) +#define HWCAP2_FRINT (1 << 8) From 438438ffe1d6782dd9d191888ca2ded98efef9ee Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Tue, 31 Mar 2020 20:57:46 +0000 Subject: [PATCH 108/189] aarch64: add new HWCAP2_ macros from linux v5.6 added in linux commit 1a50ec0b3b2e9a83f1b1245ea37a853aac2f741c arm64: Implement archrandom.h for ARMv8.5-RNG linux commit d4209d8b717311d114b5d47ba7f8249fd44e97c2 arm64: cpufeature: Export matrix and other features to userspace --- arch/aarch64/bits/hwcap.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/aarch64/bits/hwcap.h b/arch/aarch64/bits/hwcap.h index cb4429185..7ab73f99b 100644 --- a/arch/aarch64/bits/hwcap.h +++ b/arch/aarch64/bits/hwcap.h @@ -40,3 +40,11 @@ #define HWCAP2_SVESM4 (1 << 6) #define HWCAP2_FLAGM2 (1 << 7) #define HWCAP2_FRINT (1 << 8) +#define HWCAP2_SVEI8MM (1 << 9) +#define HWCAP2_SVEF32MM (1 << 10) +#define HWCAP2_SVEF64MM (1 << 11) +#define HWCAP2_SVEBF16 (1 << 12) +#define HWCAP2_I8MM (1 << 13) +#define HWCAP2_BF16 (1 << 14) +#define HWCAP2_DGH (1 << 15) +#define HWCAP2_RNG (1 << 16) From 7858b155d8817a0c28d2e53e54c2ad05d1e26ef5 Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Mon, 13 Jul 2020 19:38:27 +0000 Subject: [PATCH 109/189] sys/fanotify.h: update to linux v5.7 see linux commit 9e2ba2c34f1922ca1e0c7d31b30ace5842c2e7d1 fanotify: send FAN_DIR_MODIFY event flavor with dir inode and name linux commit 44d705b0370b1d581f46ff23e5d33e8b5ff8ec58 fanotify: report name info for FAN_DIR_MODIFY event --- include/sys/fanotify.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/sys/fanotify.h b/include/sys/fanotify.h index b637c8f58..757667909 100644 --- a/include/sys/fanotify.h +++ b/include/sys/fanotify.h @@ -55,8 +55,9 @@ struct fanotify_response { #define FAN_OPEN_PERM 0x10000 #define FAN_ACCESS_PERM 0x20000 #define FAN_OPEN_EXEC_PERM 0x40000 -#define FAN_ONDIR 0x40000000 +#define FAN_DIR_MODIFY 0x00080000 #define FAN_EVENT_ON_CHILD 0x08000000 +#define FAN_ONDIR 0x40000000 #define FAN_CLOSE (FAN_CLOSE_WRITE | FAN_CLOSE_NOWRITE) #define FAN_MOVE (FAN_MOVED_FROM | FAN_MOVED_TO) #define FAN_CLOEXEC 0x01 @@ -88,6 +89,7 @@ struct fanotify_response { #define FAN_ALL_OUTGOING_EVENTS (FAN_ALL_EVENTS | FAN_ALL_PERM_EVENTS | FAN_Q_OVERFLOW) #define FANOTIFY_METADATA_VERSION 3 #define FAN_EVENT_INFO_TYPE_FID 1 +#define FAN_EVENT_INFO_TYPE_DFID_NAME 2 #define FAN_ALLOW 0x01 #define FAN_DENY 0x02 #define FAN_AUDIT 0x10 From ef6d1d327b64be8f0c13016b0a9fa72fb557aecf Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Mon, 13 Jul 2020 19:43:44 +0000 Subject: [PATCH 110/189] sys/mman.h: add MREMAP_DONTUNMAP from linux v5.7 it remaps anon mappings without unmapping the original. chromeos plans to use it with userfaultfd, see: linux commit e346b3813067d4b17383f975f197a9aa28a3b077 mm/mremap: add MREMAP_DONTUNMAP to mremap() --- include/sys/mman.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/sys/mman.h b/include/sys/mman.h index 3bade7272..4d603e910 100644 --- a/include/sys/mman.h +++ b/include/sys/mman.h @@ -101,6 +101,7 @@ extern "C" { #ifdef _GNU_SOURCE #define MREMAP_MAYMOVE 1 #define MREMAP_FIXED 2 +#define MREMAP_DONTUNMAP 4 #define MLOCK_ONFAULT 0x01 From 8edcde6daca4f2dcf337ba807014d29d09c4be36 Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Mon, 13 Jul 2020 19:59:41 +0000 Subject: [PATCH 111/189] netinet/tcp.h: update to linux v5.7 add TCP_NLA_BYTES_NOTSENT and new tcp_zerocopy_receive fields, see linux commit c8856c051454909e5059df4e81c77b9c366c5515 tcp-zerocopy: Return inq along with tcp receive zerocopy. linux commit 33946518d493cdf10aedb4a483f1aa41948a3dab tcp-zerocopy: Return sk_err (if set) along with tcp receive zerocopy. linux commit e08ab0b377a1489760533424437c5f4be7f484a4 tcp: add bytes not sent to SCM_TIMESTAMPING_OPT_STATS --- include/netinet/tcp.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/netinet/tcp.h b/include/netinet/tcp.h index 92550dcca..b7b997f5f 100644 --- a/include/netinet/tcp.h +++ b/include/netinet/tcp.h @@ -79,6 +79,7 @@ enum { TCP_NLA_REORD_SEEN, TCP_NLA_SRTT, TCP_NLA_TIMEOUT_REHASH, + TCP_NLA_BYTES_NOTSENT, }; #if defined(_GNU_SOURCE) || defined(_BSD_SOURCE) @@ -284,6 +285,8 @@ struct tcp_zerocopy_receive { uint64_t address; uint32_t length; uint32_t recv_skip_hint; + uint32_t inq; + int32_t err; }; #endif From 2083c3a3698258f1aa0c732660225f7d9dee745f Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Tue, 28 Jul 2020 21:17:13 +0000 Subject: [PATCH 112/189] bits/syscall.h: add __NR_faccessat2 from linux v5.8 the linux faccessat syscall lacks a flag argument that is necessary to implement the posix api, see linux commit c8ffd8bcdd28296a198f237cc595148a8d4adfbe vfs: add faccessat2 syscall --- arch/aarch64/bits/syscall.h.in | 1 + arch/arm/bits/syscall.h.in | 1 + arch/i386/bits/syscall.h.in | 1 + arch/m68k/bits/syscall.h.in | 1 + arch/microblaze/bits/syscall.h.in | 1 + arch/mips/bits/syscall.h.in | 1 + arch/mips64/bits/syscall.h.in | 1 + arch/mipsn32/bits/syscall.h.in | 1 + arch/or1k/bits/syscall.h.in | 1 + arch/powerpc/bits/syscall.h.in | 1 + arch/powerpc64/bits/syscall.h.in | 1 + arch/riscv64/bits/syscall.h.in | 1 + arch/s390x/bits/syscall.h.in | 1 + arch/sh/bits/syscall.h.in | 1 + arch/x32/bits/syscall.h.in | 1 + arch/x86_64/bits/syscall.h.in | 1 + 16 files changed, 16 insertions(+) diff --git a/arch/aarch64/bits/syscall.h.in b/arch/aarch64/bits/syscall.h.in index 169510fd3..ac3eaf803 100644 --- a/arch/aarch64/bits/syscall.h.in +++ b/arch/aarch64/bits/syscall.h.in @@ -291,4 +291,5 @@ #define __NR_clone3 435 #define __NR_openat2 437 #define __NR_pidfd_getfd 438 +#define __NR_faccessat2 439 diff --git a/arch/arm/bits/syscall.h.in b/arch/arm/bits/syscall.h.in index 729597c2c..5b4e67910 100644 --- a/arch/arm/bits/syscall.h.in +++ b/arch/arm/bits/syscall.h.in @@ -391,6 +391,7 @@ #define __NR_clone3 435 #define __NR_openat2 437 #define __NR_pidfd_getfd 438 +#define __NR_faccessat2 439 #define __ARM_NR_breakpoint 0x0f0001 #define __ARM_NR_cacheflush 0x0f0002 diff --git a/arch/i386/bits/syscall.h.in b/arch/i386/bits/syscall.h.in index 40721c6c9..fb562db5e 100644 --- a/arch/i386/bits/syscall.h.in +++ b/arch/i386/bits/syscall.h.in @@ -428,4 +428,5 @@ #define __NR_clone3 435 #define __NR_openat2 437 #define __NR_pidfd_getfd 438 +#define __NR_faccessat2 439 diff --git a/arch/m68k/bits/syscall.h.in b/arch/m68k/bits/syscall.h.in index f2f838cc0..93703b46c 100644 --- a/arch/m68k/bits/syscall.h.in +++ b/arch/m68k/bits/syscall.h.in @@ -408,3 +408,4 @@ #define __NR_clone3 435 #define __NR_openat2 437 #define __NR_pidfd_getfd 438 +#define __NR_faccessat2 439 diff --git a/arch/microblaze/bits/syscall.h.in b/arch/microblaze/bits/syscall.h.in index 245729207..1e78dfde6 100644 --- a/arch/microblaze/bits/syscall.h.in +++ b/arch/microblaze/bits/syscall.h.in @@ -429,4 +429,5 @@ #define __NR_clone3 435 #define __NR_openat2 437 #define __NR_pidfd_getfd 438 +#define __NR_faccessat2 439 diff --git a/arch/mips/bits/syscall.h.in b/arch/mips/bits/syscall.h.in index 567759418..5b2066ef1 100644 --- a/arch/mips/bits/syscall.h.in +++ b/arch/mips/bits/syscall.h.in @@ -410,4 +410,5 @@ #define __NR_clone3 4435 #define __NR_openat2 4437 #define __NR_pidfd_getfd 4438 +#define __NR_faccessat2 4439 diff --git a/arch/mips64/bits/syscall.h.in b/arch/mips64/bits/syscall.h.in index 6f3934669..30cb321f7 100644 --- a/arch/mips64/bits/syscall.h.in +++ b/arch/mips64/bits/syscall.h.in @@ -340,4 +340,5 @@ #define __NR_clone3 5435 #define __NR_openat2 5437 #define __NR_pidfd_getfd 5438 +#define __NR_faccessat2 5439 diff --git a/arch/mipsn32/bits/syscall.h.in b/arch/mipsn32/bits/syscall.h.in index e24086f67..12eae0346 100644 --- a/arch/mipsn32/bits/syscall.h.in +++ b/arch/mipsn32/bits/syscall.h.in @@ -364,4 +364,5 @@ #define __NR_clone3 6435 #define __NR_openat2 6437 #define __NR_pidfd_getfd 6438 +#define __NR_faccessat2 6439 diff --git a/arch/or1k/bits/syscall.h.in b/arch/or1k/bits/syscall.h.in index 0d54d247b..bc9def13b 100644 --- a/arch/or1k/bits/syscall.h.in +++ b/arch/or1k/bits/syscall.h.in @@ -313,4 +313,5 @@ #define __NR_clone3 435 #define __NR_openat2 437 #define __NR_pidfd_getfd 438 +#define __NR_faccessat2 439 diff --git a/arch/powerpc/bits/syscall.h.in b/arch/powerpc/bits/syscall.h.in index 70f7a3798..2d4c5dfce 100644 --- a/arch/powerpc/bits/syscall.h.in +++ b/arch/powerpc/bits/syscall.h.in @@ -417,4 +417,5 @@ #define __NR_clone3 435 #define __NR_openat2 437 #define __NR_pidfd_getfd 438 +#define __NR_faccessat2 439 diff --git a/arch/powerpc64/bits/syscall.h.in b/arch/powerpc64/bits/syscall.h.in index 3b9f8a446..2a5c70341 100644 --- a/arch/powerpc64/bits/syscall.h.in +++ b/arch/powerpc64/bits/syscall.h.in @@ -389,4 +389,5 @@ #define __NR_clone3 435 #define __NR_openat2 437 #define __NR_pidfd_getfd 438 +#define __NR_faccessat2 439 diff --git a/arch/riscv64/bits/syscall.h.in b/arch/riscv64/bits/syscall.h.in index da4fd73dd..439712a49 100644 --- a/arch/riscv64/bits/syscall.h.in +++ b/arch/riscv64/bits/syscall.h.in @@ -291,6 +291,7 @@ #define __NR_clone3 435 #define __NR_openat2 437 #define __NR_pidfd_getfd 438 +#define __NR_faccessat2 439 #define __NR_sysriscv __NR_arch_specific_syscall #define __NR_riscv_flush_icache (__NR_sysriscv + 15) diff --git a/arch/s390x/bits/syscall.h.in b/arch/s390x/bits/syscall.h.in index 9406c9377..4c04abc5f 100644 --- a/arch/s390x/bits/syscall.h.in +++ b/arch/s390x/bits/syscall.h.in @@ -354,4 +354,5 @@ #define __NR_clone3 435 #define __NR_openat2 437 #define __NR_pidfd_getfd 438 +#define __NR_faccessat2 439 diff --git a/arch/sh/bits/syscall.h.in b/arch/sh/bits/syscall.h.in index 0f09c9035..3942dea2a 100644 --- a/arch/sh/bits/syscall.h.in +++ b/arch/sh/bits/syscall.h.in @@ -401,4 +401,5 @@ #define __NR_clone3 435 #define __NR_openat2 437 #define __NR_pidfd_getfd 438 +#define __NR_faccessat2 439 diff --git a/arch/x32/bits/syscall.h.in b/arch/x32/bits/syscall.h.in index 17a4011ec..e4c4bd06a 100644 --- a/arch/x32/bits/syscall.h.in +++ b/arch/x32/bits/syscall.h.in @@ -300,6 +300,7 @@ #define __NR_clone3 (0x40000000 + 435) #define __NR_openat2 (0x40000000 + 437) #define __NR_pidfd_getfd (0x40000000 + 438) +#define __NR_faccessat2 (0x40000000 + 439) #define __NR_rt_sigaction (0x40000000 + 512) diff --git a/arch/x86_64/bits/syscall.h.in b/arch/x86_64/bits/syscall.h.in index 65126e1f7..12a869801 100644 --- a/arch/x86_64/bits/syscall.h.in +++ b/arch/x86_64/bits/syscall.h.in @@ -347,4 +347,5 @@ #define __NR_clone3 435 #define __NR_openat2 437 #define __NR_pidfd_getfd 438 +#define __NR_faccessat2 439 From 7e627e6c46edf75a0b2f31d097012844754f015e Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Wed, 9 Sep 2020 07:25:52 +0000 Subject: [PATCH 113/189] elf.h: add .note.gnu.property related definitions On x86 and aarch64 GNU properties may be used to mark ELF objects. --- include/elf.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/elf.h b/include/elf.h index 549f92c1a..b5e7befb0 100644 --- a/include/elf.h +++ b/include/elf.h @@ -603,6 +603,7 @@ typedef struct { #define PT_GNU_EH_FRAME 0x6474e550 #define PT_GNU_STACK 0x6474e551 #define PT_GNU_RELRO 0x6474e552 +#define PT_GNU_PROPERTY 0x6474e553 #define PT_LOSUNW 0x6ffffffa #define PT_SUNWBSS 0x6ffffffa #define PT_SUNWSTACK 0x6ffffffb @@ -1085,6 +1086,7 @@ typedef struct { #define NT_GNU_BUILD_ID 3 #define NT_GNU_GOLD_VERSION 4 +#define NT_GNU_PROPERTY_TYPE_0 5 From 5744cd57bbae753bf2653c4b7fc549b1065a439a Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Wed, 9 Sep 2020 07:33:47 +0000 Subject: [PATCH 114/189] netinet/if_ether.h: add ETH_P_MRP from linux v5.8 Ethernet protocol number for media redundancy protocol, see linux commit 4714d13791f831d253852c8b5d657270becb8b2a bridge: uapi: mrp: Add mrp attributes. --- include/netinet/if_ether.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/netinet/if_ether.h b/include/netinet/if_ether.h index a08485e7f..55a2ff1b1 100644 --- a/include/netinet/if_ether.h +++ b/include/netinet/if_ether.h @@ -59,6 +59,7 @@ #define ETH_P_PREAUTH 0x88C7 #define ETH_P_TIPC 0x88CA #define ETH_P_LLDP 0x88CC +#define ETH_P_MRP 0x88E3 #define ETH_P_MACSEC 0x88E5 #define ETH_P_8021AH 0x88E7 #define ETH_P_MVRP 0x88F5 From d50d1e2d5e854bc33ffd90dfe1e4f83c75320723 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Wed, 9 Sep 2020 21:55:11 -0400 Subject: [PATCH 115/189] use new SYS_faccessat2 syscall to implement faccessat with flags commit 0a05eace163cee9b08571d2ff9d90f5e82d9c228 implemented AT_EACCESS for faccessat with a horrible hack, creating a child process to change switch uid/gid and perform the access probe without making potentially irreversible changes to the caller's credentials. this was due to the syscall lacking a flags argument. linux 5.8 introduced a new syscall, SYS_faccessat2, fixing this deficiency. use it if any flags are passed, and fallback to the old strategy on ENOSYS. continue using the old syscall when there are no flags. --- src/unistd/faccessat.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/unistd/faccessat.c b/src/unistd/faccessat.c index 76bbd4c72..557503eb6 100644 --- a/src/unistd/faccessat.c +++ b/src/unistd/faccessat.c @@ -25,12 +25,17 @@ static int checker(void *p) int faccessat(int fd, const char *filename, int amode, int flag) { - if (!flag || (flag==AT_EACCESS && getuid()==geteuid() && getgid()==getegid())) - return syscall(SYS_faccessat, fd, filename, amode, flag); + if (flag) { + int ret = __syscall(SYS_faccessat2, fd, filename, amode, flag); + if (ret != -ENOSYS) return __syscall_ret(ret); + } - if (flag != AT_EACCESS) + if (flag & ~AT_EACCESS) return __syscall_ret(-EINVAL); + if (!flag || (getuid()==geteuid() && getgid()==getegid())) + return syscall(SYS_faccessat, fd, filename, amode); + char stack[1024]; sigset_t set; pid_t pid; From 78b2a1b327cbedad3853e5f63d7170ed883c76b9 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Thu, 17 Sep 2020 15:09:46 -0400 Subject: [PATCH 116/189] avoid set*id/setrlimit misbehavior and hang in vforked/cloned child taking the deprecated/dropped vfork spec strictly, doing pretty much anything but execve in the child is wrong and undefined. however, these are commonly needed operations to setup the child state before exec, and historical implementations tolerated them. for single-threaded parents, these operations already worked as expected in the vforked child. however, due to the need for __synccall to synchronize id/resource limit changes among all threads, calling these functions in the vforked child of a multithreaded parent caused a misdirected broadcast signaling of all threads in the parent. these signals could kill the parent entirely if the synccall signal handler had never been installed in the parent, or could be ignored if it had, or could signal/kill one or more utterly wrong processes if the parent already terminated (due to vfork semantics, only possible via fatal signal) and the parent tids were recycled. in any case, the expected number of semaphore posts would never happen, so the child would permanently hang (with all signals blocked) waiting for them. to mitigate this, and also make the normal usage case work as intended, treat the condition where the caller's actual tid does not match the tid in its thread structure as single-threaded, and bypass the entire synccall broadcast operation. --- src/thread/synccall.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/thread/synccall.c b/src/thread/synccall.c index 648a6ad45..d58c851fc 100644 --- a/src/thread/synccall.c +++ b/src/thread/synccall.c @@ -63,7 +63,8 @@ void __synccall(void (*func)(void *), void *ctx) sem_init(&target_sem, 0, 0); sem_init(&caller_sem, 0, 0); - if (!libc.threads_minus_1) goto single_threaded; + if (!libc.threads_minus_1 || __syscall(SYS_gettid) != self->tid) + goto single_threaded; callback = func; context = ctx; From 83930eb799095fb43d4303bb5f41940e51f1e318 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Mon, 28 Sep 2020 18:38:27 -0400 Subject: [PATCH 117/189] fix fork of processes with active async io contexts previously, if a file descriptor had aio operations pending in the parent before fork, attempting to close it in the child would attempt to cancel a thread belonging to the parent. this could deadlock, fail, or crash the whole process of the cancellation signal handler was not yet installed in the parent. in addition, further use of aio from the child could malfunction or deadlock. POSIX specifies that async io operations are not inherited by the child on fork, so clear the entire aio fd map in the child, and take the aio map lock (with signals blocked) across the fork so that the lock is kept in a consistent state. --- src/aio/aio.c | 14 ++++++++++++++ src/internal/pthread_impl.h | 2 ++ src/process/fork.c | 3 +++ 3 files changed, 19 insertions(+) diff --git a/src/aio/aio.c b/src/aio/aio.c index 6d34fa869..f59679c3d 100644 --- a/src/aio/aio.c +++ b/src/aio/aio.c @@ -392,6 +392,20 @@ int __aio_close(int fd) return fd; } +void __aio_atfork(int who) +{ + if (who<0) { + pthread_rwlock_rdlock(&maplock); + return; + } + if (who>0 && map) for (int a=0; a<(-1U/2+1)>>24; a++) + if (map[a]) for (int b=0; b<256; b++) + if (map[a][b]) for (int c=0; c<256; c++) + if (map[a][b][c]) for (int d=0; d<256; d++) + map[a][b][c][d] = 0; + pthread_rwlock_unlock(&maplock); +} + weak_alias(aio_cancel, aio_cancel64); weak_alias(aio_error, aio_error64); weak_alias(aio_fsync, aio_fsync64); diff --git a/src/internal/pthread_impl.h b/src/internal/pthread_impl.h index 4d709bbc9..358ad1ce4 100644 --- a/src/internal/pthread_impl.h +++ b/src/internal/pthread_impl.h @@ -162,6 +162,8 @@ extern hidden void *__pthread_tsd_main[]; extern hidden volatile int __aio_fut; extern hidden volatile int __eintr_valid_flag; +extern hidden void __aio_atfork(int); + hidden int __clone(int (*)(void *), void *, int, void *, ...); hidden int __set_thread_area(void *); hidden int __libc_sigaction(int, const struct sigaction *, struct sigaction *); diff --git a/src/process/fork.c b/src/process/fork.c index 7e984ff8c..dbaa94025 100644 --- a/src/process/fork.c +++ b/src/process/fork.c @@ -10,6 +10,7 @@ static void dummy(int x) } weak_alias(dummy, __fork_handler); +weak_alias(dummy, __aio_atfork); pid_t fork(void) { @@ -17,6 +18,7 @@ pid_t fork(void) sigset_t set; __fork_handler(-1); __block_all_sigs(&set); + __aio_atfork(-1); #ifdef SYS_fork ret = __syscall(SYS_fork); #else @@ -32,6 +34,7 @@ pid_t fork(void) libc.threads_minus_1 = 0; if (libc.need_locks) libc.need_locks = -1; } + __aio_atfork(!ret); __restore_sigs(&set); __fork_handler(!ret); return __syscall_ret(ret); From 36a73770cbfc4d46cfc3559e46bbbfeb05bb828b Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Mon, 28 Sep 2020 19:30:19 -0400 Subject: [PATCH 118/189] move __abort_lock to its own file and drop pointless weak_alias trick the dummy definition of __abort_lock in sigaction.c was performing exactly the same role that putting the lock in its own source file could and should have been used to achieve. while we're moving it, give it a proper declaration. --- src/exit/abort.c | 2 -- src/exit/abort_lock.c | 3 +++ src/internal/pthread_impl.h | 2 ++ src/signal/sigaction.c | 6 ------ 4 files changed, 5 insertions(+), 8 deletions(-) create mode 100644 src/exit/abort_lock.c diff --git a/src/exit/abort.c b/src/exit/abort.c index e1980f10a..f21f458ec 100644 --- a/src/exit/abort.c +++ b/src/exit/abort.c @@ -6,8 +6,6 @@ #include "lock.h" #include "ksigaction.h" -hidden volatile int __abort_lock[1]; - _Noreturn void abort(void) { raise(SIGABRT); diff --git a/src/exit/abort_lock.c b/src/exit/abort_lock.c new file mode 100644 index 000000000..3af72c7b6 --- /dev/null +++ b/src/exit/abort_lock.c @@ -0,0 +1,3 @@ +#include "pthread_impl.h" + +volatile int __abort_lock[1]; diff --git a/src/internal/pthread_impl.h b/src/internal/pthread_impl.h index 358ad1ce4..10e298815 100644 --- a/src/internal/pthread_impl.h +++ b/src/internal/pthread_impl.h @@ -196,6 +196,8 @@ hidden void __tl_sync(pthread_t); extern hidden volatile int __thread_list_lock; +extern hidden volatile int __abort_lock[1]; + extern hidden unsigned __default_stacksize; extern hidden unsigned __default_guardsize; diff --git a/src/signal/sigaction.c b/src/signal/sigaction.c index c109bea0c..a4737404c 100644 --- a/src/signal/sigaction.c +++ b/src/signal/sigaction.c @@ -7,12 +7,6 @@ #include "lock.h" #include "ksigaction.h" -static volatile int dummy_lock[1] = { 0 }; - -extern hidden volatile int __abort_lock[1]; - -weak_alias(dummy_lock, __abort_lock); - static int unmask_done; static unsigned long handler_set[_NSIG/(8*sizeof(long))]; From 5c6d05dd3eda35a4f1102008588be74a248cc90e Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Mon, 28 Sep 2020 19:32:34 -0400 Subject: [PATCH 119/189] fix missing synchronization of fork with abort if the multithreaded parent forked while another thread was calling sigaction for SIGABRT or calling abort, the child could inherit a lock state in which future calls to abort will deadlock, or in which the disposition for SIGABRT has already been reset to SIG_DFL. this is nonconforming since abort is AS-safe and permitted to be called concurrently with fork or in the MT-forked child. --- src/process/fork.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/process/fork.c b/src/process/fork.c index dbaa94025..17fb87ada 100644 --- a/src/process/fork.c +++ b/src/process/fork.c @@ -3,6 +3,7 @@ #include #include "syscall.h" #include "libc.h" +#include "lock.h" #include "pthread_impl.h" static void dummy(int x) @@ -19,6 +20,7 @@ pid_t fork(void) __fork_handler(-1); __block_all_sigs(&set); __aio_atfork(-1); + LOCK(__abort_lock); #ifdef SYS_fork ret = __syscall(SYS_fork); #else @@ -34,6 +36,7 @@ pid_t fork(void) libc.threads_minus_1 = 0; if (libc.need_locks) libc.need_locks = -1; } + UNLOCK(__abort_lock); __aio_atfork(!ret); __restore_sigs(&set); __fork_handler(!ret); From 6e8b5bef186364d1ebeb52897c624e9b4810866c Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Tue, 29 Sep 2020 14:02:06 -0400 Subject: [PATCH 120/189] drop use of pthread_once in mutexattr kernel support tests this makes the code slightly smaller and eliminates these functions from relevance to possible future changes to multithreaded fork. the barrier of a_store isn't technically needed here, but a_store is used anyway for internal consistency of the memory model. --- src/thread/pthread_mutexattr_setprotocol.c | 19 +++++++++---------- src/thread/pthread_mutexattr_setrobust.c | 20 +++++++++----------- 2 files changed, 18 insertions(+), 21 deletions(-) diff --git a/src/thread/pthread_mutexattr_setprotocol.c b/src/thread/pthread_mutexattr_setprotocol.c index 511cc32d8..8b80c1ce9 100644 --- a/src/thread/pthread_mutexattr_setprotocol.c +++ b/src/thread/pthread_mutexattr_setprotocol.c @@ -1,24 +1,23 @@ #include "pthread_impl.h" #include "syscall.h" -static pthread_once_t check_pi_once; -static int check_pi_result; - -static void check_pi() -{ - volatile int lk = 0; - check_pi_result = -__syscall(SYS_futex, &lk, FUTEX_LOCK_PI, 0, 0); -} +static volatile int check_pi_result = -1; int pthread_mutexattr_setprotocol(pthread_mutexattr_t *a, int protocol) { + int r; switch (protocol) { case PTHREAD_PRIO_NONE: a->__attr &= ~8; return 0; case PTHREAD_PRIO_INHERIT: - pthread_once(&check_pi_once, check_pi); - if (check_pi_result) return check_pi_result; + r = check_pi_result; + if (r < 0) { + volatile int lk = 0; + r = -__syscall(SYS_futex, &lk, FUTEX_LOCK_PI, 0, 0); + a_store(&check_pi_result, r); + } + if (r) return r; a->__attr |= 8; return 0; case PTHREAD_PRIO_PROTECT: diff --git a/src/thread/pthread_mutexattr_setrobust.c b/src/thread/pthread_mutexattr_setrobust.c index 04db92a62..30a9ac3be 100644 --- a/src/thread/pthread_mutexattr_setrobust.c +++ b/src/thread/pthread_mutexattr_setrobust.c @@ -1,22 +1,20 @@ #include "pthread_impl.h" #include "syscall.h" -static pthread_once_t check_robust_once; -static int check_robust_result; - -static void check_robust() -{ - void *p; - size_t l; - check_robust_result = -__syscall(SYS_get_robust_list, 0, &p, &l); -} +static volatile int check_robust_result = -1; int pthread_mutexattr_setrobust(pthread_mutexattr_t *a, int robust) { if (robust > 1U) return EINVAL; if (robust) { - pthread_once(&check_robust_once, check_robust); - if (check_robust_result) return check_robust_result; + int r = check_robust_result; + if (r < 0) { + void *p; + size_t l; + r = -__syscall(SYS_get_robust_list, 0, &p, &l); + a_store(&check_robust_result, r); + } + if (r) return r; a->__attr |= 4; return 0; } From 38f8db0f98e334458f620b7e9591fc23fb52e982 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Tue, 29 Sep 2020 18:42:05 -0400 Subject: [PATCH 121/189] fix stale lock when allocation of ctor queue fails during dlopen queue_ctors should not be called with the init_fini_lock held, since it may longjmp out on allocation failure. this introduces a minor TOCTOU race with p->constructed, but one already exists further down anyway, and by design it's okay to run through the queue more than once anyway. the only reason we bother to check p->constructed at all is to avoid spurious failure of dlopen when the library is already fully loaded and constructed. --- ldso/dynlink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ldso/dynlink.c b/ldso/dynlink.c index f74747433..15e9e4f94 100644 --- a/ldso/dynlink.c +++ b/ldso/dynlink.c @@ -2055,8 +2055,9 @@ void *dlopen(const char *file, int mode) load_deps(p); extend_bfs_deps(p); pthread_mutex_lock(&init_fini_lock); - if (!p->constructed) ctor_queue = queue_ctors(p); + int constructed = p->constructed; pthread_mutex_unlock(&init_fini_lock); + if (!constructed) ctor_queue = queue_ctors(p); if (!p->relocated && (mode & RTLD_LAZY)) { prepare_lazy(p); for (i=0; p->deps[i]; i++) From d5b3af7d8c0d22210e13ef5aeaf6880dd137227c Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Tue, 29 Sep 2020 19:16:19 -0400 Subject: [PATCH 122/189] ldso: use pthread_t rather than kernel tid to track ctor visitor commit 188759bbee057aa94db2bbb7cf7f5855f3b9ab53 documented the intent to allow recursive dlopen based on tracking ctor_visitor, but used a kernel tid rather than the pthread_t to identify the caller. as a result, it would not behave as intended under fork by a ctor, where the child tid would not match. --- ldso/dynlink.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ldso/dynlink.c b/ldso/dynlink.c index 15e9e4f94..af983692b 100644 --- a/ldso/dynlink.c +++ b/ldso/dynlink.c @@ -78,7 +78,7 @@ struct dso { struct dso **deps, *needed_by; size_t ndeps_direct; size_t next_dep; - int ctor_visitor; + pthread_t ctor_visitor; char *rpath_orig, *rpath; struct tls_module tls; size_t tls_id; @@ -1378,7 +1378,7 @@ void __libc_exit_fini() { struct dso *p; size_t dyn[DYN_CNT]; - int self = __pthread_self()->tid; + pthread_t self = __pthread_self(); /* Take both locks before setting shutting_down, so that * either lock is sufficient to read its value. The lock @@ -1470,7 +1470,7 @@ static void do_init_fini(struct dso **queue) { struct dso *p; size_t dyn[DYN_CNT], i; - int self = __pthread_self()->tid; + pthread_t self = __pthread_self(); pthread_mutex_lock(&init_fini_lock); for (i=0; (p=queue[i]); i++) { From 4e2e251f7ff9e07372abee4061bed0472e8191b8 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Wed, 30 Sep 2020 13:24:28 -0400 Subject: [PATCH 123/189] rename fork source file this is in preparation for implementing _Fork from POSIX-future, factored as a separate commit to improve readability of history. --- src/process/{fork.c => _Fork.c} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/process/{fork.c => _Fork.c} (100%) diff --git a/src/process/fork.c b/src/process/_Fork.c similarity index 100% rename from src/process/fork.c rename to src/process/_Fork.c From b97094b6e153a02044fffbaf086182bef3adabc1 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Wed, 30 Sep 2020 13:32:59 -0400 Subject: [PATCH 124/189] implement _Fork and refactor fork using it the _Fork interface is defined for future issue of POSIX as the outcome of Austin Group issue 62, which drops the AS-safety requirement for fork, and provides an AS-safe replacement that does not run the registered atfork handlers. --- include/unistd.h | 1 + src/process/_Fork.c | 11 ++--------- src/process/fork.c | 13 +++++++++++++ 3 files changed, 16 insertions(+), 9 deletions(-) create mode 100644 src/process/fork.c diff --git a/include/unistd.h b/include/unistd.h index 07584a23e..130640260 100644 --- a/include/unistd.h +++ b/include/unistd.h @@ -82,6 +82,7 @@ unsigned sleep(unsigned); int pause(void); pid_t fork(void); +pid_t _Fork(void); int execve(const char *, char *const [], char *const []); int execv(const char *, char *const []); int execle(const char *, const char *, ...); diff --git a/src/process/_Fork.c b/src/process/_Fork.c index 17fb87ada..1f41197c1 100644 --- a/src/process/_Fork.c +++ b/src/process/_Fork.c @@ -1,23 +1,17 @@ #include -#include #include #include "syscall.h" #include "libc.h" #include "lock.h" #include "pthread_impl.h" -static void dummy(int x) -{ -} - -weak_alias(dummy, __fork_handler); +static void dummy(int x) { } weak_alias(dummy, __aio_atfork); -pid_t fork(void) +pid_t _Fork(void) { pid_t ret; sigset_t set; - __fork_handler(-1); __block_all_sigs(&set); __aio_atfork(-1); LOCK(__abort_lock); @@ -39,6 +33,5 @@ pid_t fork(void) UNLOCK(__abort_lock); __aio_atfork(!ret); __restore_sigs(&set); - __fork_handler(!ret); return __syscall_ret(ret); } diff --git a/src/process/fork.c b/src/process/fork.c new file mode 100644 index 000000000..a12da01ab --- /dev/null +++ b/src/process/fork.c @@ -0,0 +1,13 @@ +#include +#include "libc.h" + +static void dummy(int x) { } +weak_alias(dummy, __fork_handler); + +pid_t fork(void) +{ + __fork_handler(-1); + pid_t ret = _Fork(); + __fork_handler(!ret); + return ret; +} From 847bfbf982aba5ef2286e5e18dee5536fb8624c7 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Thu, 1 Oct 2020 12:18:18 -0400 Subject: [PATCH 125/189] fix unintended observability of SIGABRT disposition reset via sigaction the existing abort locking logic in sigaction only accounted for attempts to change the disposition, not attempts to observe the change made by abort. unfortunately the change is still observable in at least one other place: inheritance of signal dispositions across exec and posix_spawn. fixing these is a separate task and it's not even clear whether a complete fix is possible. --- src/signal/sigaction.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/signal/sigaction.c b/src/signal/sigaction.c index a4737404c..7232d4b90 100644 --- a/src/signal/sigaction.c +++ b/src/signal/sigaction.c @@ -21,6 +21,13 @@ int __libc_sigaction(int sig, const struct sigaction *restrict sa, struct sigact { struct k_sigaction ksa, ksa_old; unsigned long set[_NSIG/(8*sizeof(long))]; + /* Doing anything with the disposition of SIGABRT requires a lock, + * so that it cannot be changed while abort is terminating the + * process and so any change made by abort can't be observed. */ + if (sig == SIGABRT) { + __block_all_sigs(&set); + LOCK(__abort_lock); + } if (sa) { if ((uintptr_t)sa->sa_handler > 1UL) { a_or_l(handler_set+(sig-1)/(8*sizeof(long)), @@ -44,21 +51,13 @@ int __libc_sigaction(int sig, const struct sigaction *restrict sa, struct sigact a_store(&__eintr_valid_flag, 1); } } - /* Changing the disposition of SIGABRT to anything but - * SIG_DFL requires a lock, so that it cannot be changed - * while abort is terminating the process after simply - * calling raise(SIGABRT) failed to do so. */ - if (sa->sa_handler != SIG_DFL && sig == SIGABRT) { - __block_all_sigs(&set); - LOCK(__abort_lock); - } ksa.handler = sa->sa_handler; ksa.flags = sa->sa_flags | SA_RESTORER; ksa.restorer = (sa->sa_flags & SA_SIGINFO) ? __restore_rt : __restore; memcpy(&ksa.mask, &sa->sa_mask, _NSIG/8); } int r = __syscall(SYS_rt_sigaction, sig, sa?&ksa:0, old?&ksa_old:0, _NSIG/8); - if (sig == SIGABRT && sa && sa->sa_handler != SIG_DFL) { + if (sig == SIGABRT) { UNLOCK(__abort_lock); __restore_sigs(&set); } From 15f705384496ad02d34f60088fbdb37e6ffbf3bf Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Thu, 1 Oct 2020 14:53:12 -0400 Subject: [PATCH 126/189] fix posix_spawn interaction with fork and abort by taking lock this change prevents the child created concurrently with abort from seeing the SIGABRT disposition change from SIG_IGN to SIG_DFL (other changes are not visible anyway) and prevents leaking the write end of the child pipe to children created by fork in another thread, which may block return of posix_spawn indefinitely if the forked child does not exit or exec. along with other changes, this suggests that __abort_lock should perhaps eventually be renamed to reflect that it's becoming a broader lock on related "process lifetime" state. --- src/process/posix_spawn.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/process/posix_spawn.c b/src/process/posix_spawn.c index 29652197c..728551b36 100644 --- a/src/process/posix_spawn.c +++ b/src/process/posix_spawn.c @@ -6,6 +6,7 @@ #include #include #include "syscall.h" +#include "lock.h" #include "pthread_impl.h" #include "fdop.h" @@ -170,9 +171,6 @@ int posix_spawn(pid_t *restrict res, const char *restrict path, int ec=0, cs; struct args args; - if (pipe2(args.p, O_CLOEXEC)) - return errno; - pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs); args.path = path; @@ -182,9 +180,20 @@ int posix_spawn(pid_t *restrict res, const char *restrict path, args.envp = envp; pthread_sigmask(SIG_BLOCK, SIGALL_SET, &args.oldmask); + /* The lock guards both against seeing a SIGABRT disposition change + * by abort and against leaking the pipe fd to fork-without-exec. */ + LOCK(__abort_lock); + + if (pipe2(args.p, O_CLOEXEC)) { + UNLOCK(__abort_lock); + ec = errno; + goto fail; + } + pid = __clone(child, stack+sizeof stack, CLONE_VM|CLONE_VFORK|SIGCHLD, &args); close(args.p[1]); + UNLOCK(__abort_lock); if (pid > 0) { if (read(args.p[0], &ec, sizeof ec) != sizeof ec) ec = 0; @@ -197,6 +206,7 @@ int posix_spawn(pid_t *restrict res, const char *restrict path, if (!ec && res) *res = pid; +fail: pthread_sigmask(SIG_SETMASK, &args.oldmask, 0); pthread_setcancelstate(cs, 0); From a206620d8879ee9cf4a7c0e7a652cc70181f27c1 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Sat, 10 Oct 2020 20:04:18 -0400 Subject: [PATCH 127/189] fix getgrouplist when nscd reports an empty list commit 500c6886c654fd45e4926990fee2c61d816be197 broke this by fixing the behavior of fread to conform to the C standard; getgroupslist was assuming the old behavior, that a request to read 1 member of length 0 would return 1, not 0. --- src/passwd/getgrouplist.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/passwd/getgrouplist.c b/src/passwd/getgrouplist.c index 43e518245..301824cec 100644 --- a/src/passwd/getgrouplist.c +++ b/src/passwd/getgrouplist.c @@ -31,7 +31,8 @@ int getgrouplist(const char *user, gid_t gid, gid_t *groups, int *ngroups) if (resp[INITGRFOUND]) { nscdbuf = calloc(resp[INITGRNGRPS], sizeof(uint32_t)); if (!nscdbuf) goto cleanup; - if (!fread(nscdbuf, sizeof(*nscdbuf)*resp[INITGRNGRPS], 1, f)) { + size_t nbytes = sizeof(*nscdbuf)*resp[INITGRNGRPS]; + if (nbytes && !fread(nscdbuf, nbytes, 1, f)) { if (!ferror(f)) errno = EIO; goto cleanup; } From 0c34754c65944c566e2ac1ddb2a1d47359ab565d Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Wed, 14 Oct 2020 10:55:55 -0400 Subject: [PATCH 128/189] remove incorrect fflush from assert failure handler assert is not specified to flush open stdio streams, and doing so can block indefinitely waiting for a lock already held or an output operation to a file that can't accept more output until an unsatisfiable condition is met. --- src/exit/assert.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/exit/assert.c b/src/exit/assert.c index 49b0dc3ec..94edd8272 100644 --- a/src/exit/assert.c +++ b/src/exit/assert.c @@ -4,6 +4,5 @@ _Noreturn void __assert_fail(const char *expr, const char *file, int line, const char *func) { fprintf(stderr, "Assertion failed: %s (%s: %s: %d)\n", expr, file, func, line); - fflush(NULL); abort(); } From 8412e858aa121b009ffa8d0fe76bac3d68fd6e8a Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Wed, 14 Oct 2020 19:04:19 -0400 Subject: [PATCH 129/189] remove unused SIGTIMER handler in timer_create this was leftover from when the actual SIGEV_THREAD timer logic was in the signal handler. commit 5b74eed3b301e2227385f3bf26d3bb7c2d822cf8 replaced that with use of sigwaitinfo, with the actual signal left blocked, so the no-op signal handler was no longer serving any purpose. the signal disposition reset to SIG_DFL is still needed, however, in case we inherited SIG_IGN from a foreign-libc process. --- src/time/timer_create.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/time/timer_create.c b/src/time/timer_create.c index 455d49fc5..2b1ea170e 100644 --- a/src/time/timer_create.c +++ b/src/time/timer_create.c @@ -32,15 +32,10 @@ static void cleanup_fromsig(void *p) longjmp(p, 1); } -static void timer_handler(int sig, siginfo_t *si, void *ctx) -{ -} - static void install_handler() { struct sigaction sa = { - .sa_sigaction = timer_handler, - .sa_flags = SA_SIGINFO | SA_RESTART + .sa_handler = SIG_DFL, }; __libc_sigaction(SIGTIMER, &sa, 0); } From 9cca937eb99622b76127b7aaa9e74ec50837e146 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Wed, 14 Oct 2020 19:07:27 -0400 Subject: [PATCH 130/189] drop use of pthread_once in timer_create this makes the code slightly smaller and eliminates timer_create from relevance to possible future changes to multithreaded fork. the barrier of a_store isn't technically needed here, but a_store is used anyway for internal consistency of the memory model. --- src/time/timer_create.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/src/time/timer_create.c b/src/time/timer_create.c index 2b1ea170e..5ddfda278 100644 --- a/src/time/timer_create.c +++ b/src/time/timer_create.c @@ -2,6 +2,7 @@ #include #include #include "pthread_impl.h" +#include "atomic.h" struct ksigevent { union sigval sigev_value; @@ -32,14 +33,6 @@ static void cleanup_fromsig(void *p) longjmp(p, 1); } -static void install_handler() -{ - struct sigaction sa = { - .sa_handler = SIG_DFL, - }; - __libc_sigaction(SIGTIMER, &sa, 0); -} - static void *start(void *arg) { pthread_t self = __pthread_self(); @@ -66,7 +59,7 @@ static void *start(void *arg) int timer_create(clockid_t clk, struct sigevent *restrict evp, timer_t *restrict res) { - static pthread_once_t once = PTHREAD_ONCE_INIT; + volatile static int init = 0; pthread_t td; pthread_attr_t attr; int r; @@ -90,7 +83,11 @@ int timer_create(clockid_t clk, struct sigevent *restrict evp, timer_t *restrict *res = (void *)(intptr_t)timerid; break; case SIGEV_THREAD: - pthread_once(&once, install_handler); + if (!init) { + struct sigaction sa = { .sa_handler = SIG_DFL }; + __libc_sigaction(SIGTIMER, &sa, 0); + a_store(&init, 1); + } if (evp->sigev_notify_attributes) attr = *evp->sigev_notify_attributes; else From 58e554fdd9a1a1a303f08221000836963e95c035 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Wed, 14 Oct 2020 19:36:03 -0400 Subject: [PATCH 131/189] remove long-unused struct __timer from pthread_impl.h commit 3990c5c6a40440cdb14746ac080d0ecf8d5d6733 removed the last reference. --- src/internal/pthread_impl.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/internal/pthread_impl.h b/src/internal/pthread_impl.h index 10e298815..394dcb19a 100644 --- a/src/internal/pthread_impl.h +++ b/src/internal/pthread_impl.h @@ -73,11 +73,6 @@ enum { DT_DETACHED, }; -struct __timer { - int timerid; - pthread_t thread; -}; - #define __SU (sizeof(size_t)/sizeof(int)) #define _a_stacksize __u.__s[0] From c4e0dcc97555d3ca44edd1698954802364044a9a Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Mon, 28 Sep 2020 18:47:13 -0400 Subject: [PATCH 132/189] move aio implementation details to a proper internal header also fix the lack of declaration (and thus hidden visibility) in __stdio_close's use of __aio_close. --- src/aio/aio.c | 1 + src/aio/aio_suspend.c | 1 + src/include/unistd.h | 1 - src/internal/aio_impl.h | 9 +++++++++ src/internal/pthread_impl.h | 3 --- src/process/_Fork.c | 1 + src/stdio/__stdio_close.c | 1 + src/unistd/close.c | 1 + 8 files changed, 14 insertions(+), 4 deletions(-) create mode 100644 src/internal/aio_impl.h diff --git a/src/aio/aio.c b/src/aio/aio.c index f59679c3d..b488e3d64 100644 --- a/src/aio/aio.c +++ b/src/aio/aio.c @@ -9,6 +9,7 @@ #include "syscall.h" #include "atomic.h" #include "pthread_impl.h" +#include "aio_impl.h" /* The following is a threads-based implementation of AIO with minimal * dependence on implementation details. Most synchronization is diff --git a/src/aio/aio_suspend.c b/src/aio/aio_suspend.c index 34b66f878..1c1060e34 100644 --- a/src/aio/aio_suspend.c +++ b/src/aio/aio_suspend.c @@ -3,6 +3,7 @@ #include #include "atomic.h" #include "pthread_impl.h" +#include "aio_impl.h" int aio_suspend(const struct aiocb *const cbs[], int cnt, const struct timespec *ts) { diff --git a/src/include/unistd.h b/src/include/unistd.h index 1b4605c7c..7b52a9249 100644 --- a/src/include/unistd.h +++ b/src/include/unistd.h @@ -8,7 +8,6 @@ extern char **__environ; hidden int __dup3(int, int, int); hidden int __mkostemps(char *, int, int); hidden int __execvpe(const char *, char *const *, char *const *); -hidden int __aio_close(int); hidden off_t __lseek(int, off_t, int); #endif diff --git a/src/internal/aio_impl.h b/src/internal/aio_impl.h new file mode 100644 index 000000000..a86576654 --- /dev/null +++ b/src/internal/aio_impl.h @@ -0,0 +1,9 @@ +#ifndef AIO_IMPL_H +#define AIO_IMPL_H + +extern hidden volatile int __aio_fut; + +extern hidden int __aio_close(int); +extern hidden void __aio_atfork(int); + +#endif diff --git a/src/internal/pthread_impl.h b/src/internal/pthread_impl.h index 394dcb19a..1322a6a81 100644 --- a/src/internal/pthread_impl.h +++ b/src/internal/pthread_impl.h @@ -154,11 +154,8 @@ hidden int __pthread_key_delete_impl(pthread_key_t); extern hidden volatile size_t __pthread_tsd_size; extern hidden void *__pthread_tsd_main[]; -extern hidden volatile int __aio_fut; extern hidden volatile int __eintr_valid_flag; -extern hidden void __aio_atfork(int); - hidden int __clone(int (*)(void *), void *, int, void *, ...); hidden int __set_thread_area(void *); hidden int __libc_sigaction(int, const struct sigaction *, struct sigaction *); diff --git a/src/process/_Fork.c b/src/process/_Fork.c index 1f41197c1..da0638681 100644 --- a/src/process/_Fork.c +++ b/src/process/_Fork.c @@ -4,6 +4,7 @@ #include "libc.h" #include "lock.h" #include "pthread_impl.h" +#include "aio_impl.h" static void dummy(int x) { } weak_alias(dummy, __aio_atfork); diff --git a/src/stdio/__stdio_close.c b/src/stdio/__stdio_close.c index 79452bdb6..302913285 100644 --- a/src/stdio/__stdio_close.c +++ b/src/stdio/__stdio_close.c @@ -1,4 +1,5 @@ #include "stdio_impl.h" +#include "aio_impl.h" static int dummy(int fd) { diff --git a/src/unistd/close.c b/src/unistd/close.c index 5b38e0194..a2105f506 100644 --- a/src/unistd/close.c +++ b/src/unistd/close.c @@ -1,5 +1,6 @@ #include #include +#include "aio_impl.h" #include "syscall.h" static int dummy(int fd) From 6803c9523473c9d60d7ee26fc956cabf72d88bb6 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Wed, 14 Oct 2020 20:31:23 -0400 Subject: [PATCH 133/189] remove unused weak definition of __tl_sync in membarrier.c --- src/linux/membarrier.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/linux/membarrier.c b/src/linux/membarrier.c index 9ebe906ed..343f7360e 100644 --- a/src/linux/membarrier.c +++ b/src/linux/membarrier.c @@ -9,13 +9,8 @@ static void dummy_0(void) { } -static void dummy_1(pthread_t t) -{ -} - weak_alias(dummy_0, __tl_lock); weak_alias(dummy_0, __tl_unlock); -weak_alias(dummy_1, __tl_sync); static sem_t barrier_sem; From ab5b83287aa8330331ffc15be3046772a438d163 Mon Sep 17 00:00:00 2001 From: Julien Ramseier Date: Sun, 18 Oct 2020 12:15:06 -0400 Subject: [PATCH 134/189] update crypt_blowfish to support $2b$ prefix Merge changes from Solar Designer's crypt_blowfish v1.3. This makes crypt_blowfish fully compatible with OpenBSD's bcrypt by adding support for the $2b$ prefix (which behaves the same as crypt_blowfish's $2y$). --- src/crypt/crypt_blowfish.c | 38 +++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/src/crypt/crypt_blowfish.c b/src/crypt/crypt_blowfish.c index d3f798517..d722607b0 100644 --- a/src/crypt/crypt_blowfish.c +++ b/src/crypt/crypt_blowfish.c @@ -15,7 +15,7 @@ * No copyright is claimed, and the software is hereby placed in the public * domain. In case this attempt to disclaim copyright and place the software * in the public domain is deemed null and void, then the software is - * Copyright (c) 1998-2012 Solar Designer and it is hereby released to the + * Copyright (c) 1998-2014 Solar Designer and it is hereby released to the * general public under the following terms: * * Redistribution and use in source and binary forms, with or without @@ -31,12 +31,12 @@ * you place this code and any modifications you make under a license * of your choice. * - * This implementation is mostly compatible with OpenBSD's bcrypt.c (prefix - * "$2a$") by Niels Provos , and uses some of his - * ideas. The password hashing algorithm was designed by David Mazieres - * . For more information on the level of compatibility, - * please refer to the comments in BF_set_key() below and to the included - * crypt(3) man page. + * This implementation is fully compatible with OpenBSD's bcrypt.c for prefix + * "$2b$", originally by Niels Provos , and it uses + * some of his ideas. The password hashing algorithm was designed by David + * Mazieres . For information on the level of + * compatibility for bcrypt hash prefixes other than "$2b$", please refer to + * the comments in BF_set_key() below and to the included crypt(3) man page. * * There's a paper on the algorithm that explains its design decisions: * @@ -533,6 +533,7 @@ static void BF_set_key(const char *key, BF_key expanded, BF_key initial, * Valid combinations of settings are: * * Prefix "$2a$": bug = 0, safety = 0x10000 + * Prefix "$2b$": bug = 0, safety = 0 * Prefix "$2x$": bug = 1, safety = 0 * Prefix "$2y$": bug = 0, safety = 0 */ @@ -596,12 +597,14 @@ static void BF_set_key(const char *key, BF_key expanded, BF_key initial, initial[0] ^= sign; } +static const unsigned char flags_by_subtype[26] = { + 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 4, 0 +}; + static char *BF_crypt(const char *key, const char *setting, char *output, BF_word min) { - static const unsigned char flags_by_subtype[26] = - {2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 4, 0}; struct { BF_ctx ctx; BF_key expanded_key; @@ -746,9 +749,11 @@ char *__crypt_blowfish(const char *key, const char *setting, char *output) { const char *test_key = "8b \xd0\xc1\xd2\xcf\xcc\xd8"; const char *test_setting = "$2a$00$abcdefghijklmnopqrstuu"; - static const char test_hash[2][34] = - {"VUrPmXD6q/nVSSp7pNDhCR9071IfIRe\0\x55", /* $2x$ */ - "i1D709vfamulimlGcq0qq3UvuUasvEa\0\x55"}; /* $2a$, $2y$ */ + static const char test_hashes[2][34] = { + "i1D709vfamulimlGcq0qq3UvuUasvEa\0\x55", /* 'a', 'b', 'y' */ + "VUrPmXD6q/nVSSp7pNDhCR9071IfIRe\0\x55", /* 'x' */ + }; + const char *test_hash = test_hashes[0]; char *retval; const char *p; int ok; @@ -768,8 +773,11 @@ char *__crypt_blowfish(const char *key, const char *setting, char *output) * detected by the self-test. */ memcpy(buf.s, test_setting, sizeof(buf.s)); - if (retval) + if (retval) { + unsigned int flags = flags_by_subtype[setting[2] - 'a']; + test_hash = test_hashes[flags & 1]; buf.s[2] = setting[2]; + } memset(buf.o, 0x55, sizeof(buf.o)); buf.o[sizeof(buf.o) - 1] = 0; p = BF_crypt(test_key, buf.s, buf.o, 1); @@ -777,7 +785,7 @@ char *__crypt_blowfish(const char *key, const char *setting, char *output) ok = (p == buf.o && !memcmp(p, buf.s, 7 + 22) && !memcmp(p + (7 + 22), - test_hash[buf.s[2] & 1], + test_hash, 31 + 1 + 1 + 1)); { From 0f3da189722b30d0737a90db08342a4ea9e0c1db Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Sat, 24 Oct 2020 13:34:29 -0400 Subject: [PATCH 135/189] lift sigaction abort locking to fix posix_spawn child deadlock commit 25ea9f712c30c32957de493d4711ee39d0bbb024 introduced a deadlock to the posix_spawn child whereby, if abort was called in the parent and ended up taking the abort lock to terminate the process, the __libc_sigaction calls in the child would wait forever to obtain a lock that would not be released. this could be fixed by having abort set the abort lock as the exit futex address, but it's cleaner to just remove the SIGABRT special handling from the internal __libc_sigaction and lift it to the public sigaction function. nothing but the posix_spawn child calls __libc_sigaction on SIGABRT, and since commit b7bc966522d73e1dc420b5ee6fc7a2e78099a08c the abort lock is held at the time of __clone, which precludes the child inheriting a kernel-level signal disposition inconsistent with the disposition on the abstract machine. this means it's fine to inspect and modify the disposition in the child without a lock. --- src/signal/sigaction.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/src/signal/sigaction.c b/src/signal/sigaction.c index 7232d4b90..2203471b2 100644 --- a/src/signal/sigaction.c +++ b/src/signal/sigaction.c @@ -20,14 +20,6 @@ volatile int __eintr_valid_flag; int __libc_sigaction(int sig, const struct sigaction *restrict sa, struct sigaction *restrict old) { struct k_sigaction ksa, ksa_old; - unsigned long set[_NSIG/(8*sizeof(long))]; - /* Doing anything with the disposition of SIGABRT requires a lock, - * so that it cannot be changed while abort is terminating the - * process and so any change made by abort can't be observed. */ - if (sig == SIGABRT) { - __block_all_sigs(&set); - LOCK(__abort_lock); - } if (sa) { if ((uintptr_t)sa->sa_handler > 1UL) { a_or_l(handler_set+(sig-1)/(8*sizeof(long)), @@ -57,10 +49,6 @@ int __libc_sigaction(int sig, const struct sigaction *restrict sa, struct sigact memcpy(&ksa.mask, &sa->sa_mask, _NSIG/8); } int r = __syscall(SYS_rt_sigaction, sig, sa?&ksa:0, old?&ksa_old:0, _NSIG/8); - if (sig == SIGABRT) { - UNLOCK(__abort_lock); - __restore_sigs(&set); - } if (old && !r) { old->sa_handler = ksa_old.handler; old->sa_flags = ksa_old.flags; @@ -71,11 +59,26 @@ int __libc_sigaction(int sig, const struct sigaction *restrict sa, struct sigact int __sigaction(int sig, const struct sigaction *restrict sa, struct sigaction *restrict old) { + unsigned long set[_NSIG/(8*sizeof(long))]; + if (sig-32U < 3 || sig-1U >= _NSIG-1) { errno = EINVAL; return -1; } - return __libc_sigaction(sig, sa, old); + + /* Doing anything with the disposition of SIGABRT requires a lock, + * so that it cannot be changed while abort is terminating the + * process and so any change made by abort can't be observed. */ + if (sig == SIGABRT) { + __block_all_sigs(&set); + LOCK(__abort_lock); + } + int r = __libc_sigaction(sig, sa, old); + if (sig == SIGABRT) { + UNLOCK(__abort_lock); + __restore_sigs(&set); + } + return r; } weak_alias(__sigaction, sigaction); From fdf8adce110a23007e683f3d43a09b9ce1311bc7 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Mon, 26 Oct 2020 15:56:25 -0400 Subject: [PATCH 136/189] fix pthread_cond_wait paired with with priority-inheritance mutex pthread_cond_wait arranged for requeued waiters to wake when the mutex is unlocked by temporarily adjusting the mutex's waiter count. commit 54ca677983d47529bab8752315ac1a2b49888870 broke this when introducing PI mutexes by repurposing the waiter count field of the mutex structure. since then, for PI mutexes, the waiter count adjustment was misinterpreted by the mutex locking code as indicating that the mutex is non a non-recoverable state. it would be possible to special-case PI mutexes here, but instead just drop all adjustment of the waiters count, and instead use the lock word waiters bit for all mutex types. since the mutex is either held by the caller or in unrecoverable state at the time the bit is set, it will necessarily still be set at the time of any subsequent valid unlock operation, and this will produce the desired effect of waking the next waiter. if waiter counts are entirely dropped at some point in the future this code should still work without modification. --- src/thread/pthread_cond_timedwait.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/thread/pthread_cond_timedwait.c b/src/thread/pthread_cond_timedwait.c index d15012406..f5f37af16 100644 --- a/src/thread/pthread_cond_timedwait.c +++ b/src/thread/pthread_cond_timedwait.c @@ -146,14 +146,13 @@ int __pthread_cond_timedwait(pthread_cond_t *restrict c, pthread_mutex_t *restri if (oldstate == WAITING) goto done; - if (!node.next) a_inc(&m->_m_waiters); - /* Unlock the barrier that's holding back the next waiter, and * either wake it or requeue it to the mutex. */ - if (node.prev) - unlock_requeue(&node.prev->barrier, &m->_m_lock, m->_m_type & 128); - else - a_dec(&m->_m_waiters); + if (node.prev) { + int val = m->_m_lock; + if (val>0) a_cas(&m->_m_lock, val, val|0x80000000); + unlock_requeue(&node.prev->barrier, &m->_m_lock, m->_m_type & (8|128)); + } /* Since a signal was consumed, cancellation is not permitted. */ if (e == ECANCELED) e = 0; From f89507b9f6b8aefea36b1bcdfc443c37f2e9592d Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Mon, 26 Oct 2020 18:06:18 -0400 Subject: [PATCH 137/189] fix reintroduction of errno clobbering by atfork handlers commit bd153422f28634bb6e53f13f80beb8289d405267 reintroduced the bug fixed in c21051e90cd27a0b26be0ac66950b7396a156ba1 by refactoring the __syscall_ret into _Fork where it once again runs before the atfork handlers are called. since _Fork is a public interface that sets errno, this can't be fixed the way it was fixed last time without making new internal interfaces. instead, just save errno, and restore it only on error to ensure that a value of 0 is never restored. --- src/process/fork.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/process/fork.c b/src/process/fork.c index a12da01ab..8d34a9c4b 100644 --- a/src/process/fork.c +++ b/src/process/fork.c @@ -1,4 +1,5 @@ #include +#include #include "libc.h" static void dummy(int x) { } @@ -8,6 +9,8 @@ pid_t fork(void) { __fork_handler(-1); pid_t ret = _Fork(); + int errno_save = errno; __fork_handler(!ret); + if (ret<0) errno = errno_save; return ret; } From 29a6efdd26e002eeb34314b456d4344c3b24a059 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Tue, 27 Oct 2020 00:45:59 -0400 Subject: [PATCH 138/189] avoid __synccall for setrlimit on kernels with prlimit syscall resource limits have been process-wide since linux 2.6.10, and the prlimit syscall was added in 2.6.36, so prlimit can be assumed to set the resource limits correctly for the whole process. --- src/misc/setrlimit.c | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/src/misc/setrlimit.c b/src/misc/setrlimit.c index 7a66ab297..8340aee09 100644 --- a/src/misc/setrlimit.c +++ b/src/misc/setrlimit.c @@ -6,25 +6,8 @@ #define MIN(a, b) ((a)<(b) ? (a) : (b)) #define FIX(x) do{ if ((x)>=SYSCALL_RLIM_INFINITY) (x)=RLIM_INFINITY; }while(0) -static int __setrlimit(int resource, const struct rlimit *rlim) -{ - unsigned long k_rlim[2]; - struct rlimit tmp; - if (SYSCALL_RLIM_INFINITY != RLIM_INFINITY) { - tmp = *rlim; - FIX(tmp.rlim_cur); - FIX(tmp.rlim_max); - rlim = &tmp; - } - int ret = __syscall(SYS_prlimit64, 0, resource, rlim, 0); - if (ret != -ENOSYS) return ret; - k_rlim[0] = MIN(rlim->rlim_cur, MIN(-1UL, SYSCALL_RLIM_INFINITY)); - k_rlim[1] = MIN(rlim->rlim_max, MIN(-1UL, SYSCALL_RLIM_INFINITY)); - return __syscall(SYS_setrlimit, resource, k_rlim); -} - struct ctx { - const struct rlimit *rlim; + unsigned long lim[2]; int res; int err; }; @@ -33,12 +16,26 @@ static void do_setrlimit(void *p) { struct ctx *c = p; if (c->err>0) return; - c->err = -__setrlimit(c->res, c->rlim); + c->err = -__syscall(SYS_setrlimit, c->res, c->lim); } int setrlimit(int resource, const struct rlimit *rlim) { - struct ctx c = { .res = resource, .rlim = rlim, .err = -1 }; + struct rlimit tmp; + if (SYSCALL_RLIM_INFINITY != RLIM_INFINITY) { + tmp = *rlim; + FIX(tmp.rlim_cur); + FIX(tmp.rlim_max); + rlim = &tmp; + } + int ret = __syscall(SYS_prlimit64, 0, resource, rlim, 0); + if (ret != -ENOSYS) return __syscall_ret(ret); + + struct ctx c = { + .lim[0] = MIN(rlim->rlim_cur, MIN(-1UL, SYSCALL_RLIM_INFINITY)), + .lim[1] = MIN(rlim->rlim_max, MIN(-1UL, SYSCALL_RLIM_INFINITY)), + .res = resource, .err = -1 + }; __synccall(do_setrlimit, &c); if (c.err) { if (c.err>0) errno = c.err; From 8e886495b78a13a73d2247a7ad1e35dde887cbfa Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Tue, 27 Oct 2020 00:59:54 -0400 Subject: [PATCH 139/189] fix setgroups behavior in multithreaded process this function is outside the scope of the standards, but logically should behave like the set*id functions whose effects are process-global. --- src/linux/setgroups.c | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/src/linux/setgroups.c b/src/linux/setgroups.c index 1248fdbfd..47142f141 100644 --- a/src/linux/setgroups.c +++ b/src/linux/setgroups.c @@ -1,8 +1,36 @@ #define _GNU_SOURCE #include +#include #include "syscall.h" +#include "libc.h" + +struct ctx { + size_t count; + const gid_t *list; + int ret; +}; + +static void do_setgroups(void *p) +{ + struct ctx *c = p; + if (c->ret<0) return; + int ret = __syscall(SYS_setgroups, c->count, c->list); + if (ret && !c->ret) { + /* If one thread fails to set groups after another has already + * succeeded, forcibly killing the process is the only safe + * thing to do. State is inconsistent and dangerous. Use + * SIGKILL because it is uncatchable. */ + __block_all_sigs(0); + __syscall(SYS_kill, __syscall(SYS_getpid), SIGKILL); + } + c->ret = ret; +} int setgroups(size_t count, const gid_t list[]) { - return syscall(SYS_setgroups, count, list); + /* ret is initially nonzero so that failure of the first thread does not + * trigger the safety kill above. */ + struct ctx c = { .count = count, .list = list, .ret = 1 }; + __synccall(do_setgroups, &c); + return __syscall_ret(c.ret); } From 0e9dc98daa7d35f796b5622030b555e2acdf1aae Mon Sep 17 00:00:00 2001 From: rcombs Date: Sat, 24 Oct 2020 14:31:14 -0500 Subject: [PATCH 140/189] ldso: notify the debugger when we're doing a dlopen Otherwise lldb doesn't notice the new library and stack traces containing it get cut off unhelpfully. --- ldso/dynlink.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ldso/dynlink.c b/ldso/dynlink.c index af983692b..f9ac0100c 100644 --- a/ldso/dynlink.c +++ b/ldso/dynlink.c @@ -1947,7 +1947,7 @@ void __dls3(size_t *sp, size_t *auxv) debug.bp = dl_debug_state; debug.head = head; debug.base = ldso.base; - debug.state = 0; + debug.state = RT_CONSISTENT; _dl_debug_state(); if (replace_argv0) argv[0] = replace_argv0; @@ -1996,6 +1996,9 @@ void *dlopen(const char *file, int mode) pthread_rwlock_wrlock(&lock); __inhibit_ptc(); + debug.state = RT_ADD; + _dl_debug_state(); + p = 0; if (shutting_down) { error("Cannot dlopen while program is exiting."); @@ -2089,9 +2092,10 @@ void *dlopen(const char *file, int mode) update_tls_size(); if (tls_cnt != orig_tls_cnt) install_new_tls(); - _dl_debug_state(); orig_tail = tail; end: + debug.state = RT_CONSISTENT; + _dl_debug_state(); __release_ptc(); if (p) gencnt++; pthread_rwlock_unlock(&lock); From 3397f8d4b823450619433ac84cb2f7de153b599b Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Tue, 27 Oct 2020 01:12:56 -0400 Subject: [PATCH 141/189] refactor setxid return path to use __syscall_ret this avoids some spurious negation and duplicated errno logic, and brings the code in line with the newly-added multithreaded setgroups. --- src/unistd/setxid.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/src/unistd/setxid.c b/src/unistd/setxid.c index 0239f8afa..487c1a160 100644 --- a/src/unistd/setxid.c +++ b/src/unistd/setxid.c @@ -1,20 +1,19 @@ #include -#include +#include #include "syscall.h" #include "libc.h" -#include "pthread_impl.h" struct ctx { int id, eid, sid; - int nr, err; + int nr, ret; }; static void do_setxid(void *p) { struct ctx *c = p; - if (c->err>0) return; - int ret = -__syscall(c->nr, c->id, c->eid, c->sid); - if (ret && !c->err) { + if (c->ret<0) return; + int ret = __syscall(c->nr, c->id, c->eid, c->sid); + if (ret && !c->ret) { /* If one thread fails to set ids after another has already * succeeded, forcibly killing the process is the only safe * thing to do. State is inconsistent and dangerous. Use @@ -22,18 +21,14 @@ static void do_setxid(void *p) __block_all_sigs(0); __syscall(SYS_kill, __syscall(SYS_getpid), SIGKILL); } - c->err = ret; + c->ret = ret; } int __setxid(int nr, int id, int eid, int sid) { - /* err is initially nonzero so that failure of the first thread does not + /* ret is initially nonzero so that failure of the first thread does not * trigger the safety kill above. */ - struct ctx c = { .nr = nr, .id = id, .eid = eid, .sid = sid, .err = -1 }; + struct ctx c = { .nr = nr, .id = id, .eid = eid, .sid = sid, .ret = 1 }; __synccall(do_setxid, &c); - if (c.err) { - if (c.err>0) errno = c.err; - return -1; - } - return 0; + return __syscall_ret(c.ret); } From 4f16f151bf2890c43d4909cc4adab5acfc9ab81d Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Wed, 28 Oct 2020 16:13:45 -0400 Subject: [PATCH 142/189] fix sem_close unmapping of still-referenced semaphore sem_open is required to return the same sem_t pointer for all references to the same named semaphore when it's opened more than once in the same process. thus we keep a table of all the mapped semaphores and their reference counts. the code path for sem_close checked the reference count, but then proceeded to unmap the semaphore regardless of whether the count had reached zero. add an immediate unlock-and-return for the nonzero refcnt case so the property of performing the munmap syscall after releasing the lock can be preserved. --- src/thread/sem_open.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/thread/sem_open.c b/src/thread/sem_open.c index de8555c5a..6fb0c5b25 100644 --- a/src/thread/sem_open.c +++ b/src/thread/sem_open.c @@ -163,10 +163,12 @@ int sem_close(sem_t *sem) int i; LOCK(lock); for (i=0; i Date: Sun, 30 Jun 2019 21:55:20 -0400 Subject: [PATCH 143/189] add support for SIGEV_THREAD_ID timers This is like SIGEV_SIGNAL, but targeted to a particular thread's tid, rather than the process. --- include/signal.h | 16 +++++++++++++--- src/time/timer_create.c | 8 ++++++-- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/include/signal.h b/include/signal.h index fbdf667b2..9ed929e4f 100644 --- a/include/signal.h +++ b/include/signal.h @@ -180,14 +180,24 @@ struct sigevent { union sigval sigev_value; int sigev_signo; int sigev_notify; - void (*sigev_notify_function)(union sigval); - pthread_attr_t *sigev_notify_attributes; - char __pad[56-3*sizeof(long)]; + union { + char __pad[64 - 2*sizeof(int) - sizeof(union sigval)]; + pid_t sigev_notify_thread_id; + struct { + void (*sigev_notify_function)(union sigval); + pthread_attr_t *sigev_notify_attributes; + } __sev_thread; + } __sev_fields; }; +#define sigev_notify_thread_id __sev_fields.sigev_notify_thread_id +#define sigev_notify_function __sev_fields.__sev_thread.sigev_notify_function +#define sigev_notify_attributes __sev_fields.__sev_thread.sigev_notify_attributes + #define SIGEV_SIGNAL 0 #define SIGEV_NONE 1 #define SIGEV_THREAD 2 +#define SIGEV_THREAD_ID 4 int __libc_current_sigrtmin(void); int __libc_current_sigrtmax(void); diff --git a/src/time/timer_create.c b/src/time/timer_create.c index 5ddfda278..4bef23905 100644 --- a/src/time/timer_create.c +++ b/src/time/timer_create.c @@ -71,11 +71,15 @@ int timer_create(clockid_t clk, struct sigevent *restrict evp, timer_t *restrict switch (evp ? evp->sigev_notify : SIGEV_SIGNAL) { case SIGEV_NONE: case SIGEV_SIGNAL: + case SIGEV_THREAD_ID: if (evp) { ksev.sigev_value = evp->sigev_value; ksev.sigev_signo = evp->sigev_signo; ksev.sigev_notify = evp->sigev_notify; - ksev.sigev_tid = 0; + if (evp->sigev_notify == SIGEV_THREAD_ID) + ksev.sigev_tid = evp->sigev_notify_thread_id; + else + ksev.sigev_tid = 0; ksevp = &ksev; } if (syscall(SYS_timer_create, clk, ksevp, &timerid) < 0) @@ -107,7 +111,7 @@ int timer_create(clockid_t clk, struct sigevent *restrict evp, timer_t *restrict ksev.sigev_value.sival_ptr = 0; ksev.sigev_signo = SIGTIMER; - ksev.sigev_notify = 4; /* SIGEV_THREAD_ID */ + ksev.sigev_notify = SIGEV_THREAD_ID; ksev.sigev_tid = td->tid; if (syscall(SYS_timer_create, clk, &ksev, &timerid) < 0) timerid = -1; From bd1b2fe702a853425a178a81bd48f5b5ffaaa2cc Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Fri, 30 Oct 2020 11:21:06 -0400 Subject: [PATCH 144/189] fix missing-wake regression in pthread_cond_wait the reasoning in commit 2d0bbe6c788938d1332609c014eeebc1dff966ac was not entirely correct. while it's true that setting the waiters flag ensures that the next unlock will perform a wake, it's possible that the wake is consumed by a mutex waiter that has no relationship with the condvar wait queue being processed, which then takes the mutex. when that thread subsequently unlocks, it sees no waiters, and leaves the rest of the condvar queue stuck. bring back the waiter count adjustment, but skip it for PI mutexes, for which a successful lock-after-waiting always sets the waiters bit. if future changes are made to bring this same waiters-bit contract to all lock types, this can be reverted. --- src/thread/pthread_cond_timedwait.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/thread/pthread_cond_timedwait.c b/src/thread/pthread_cond_timedwait.c index f5f37af16..a0cd4904a 100644 --- a/src/thread/pthread_cond_timedwait.c +++ b/src/thread/pthread_cond_timedwait.c @@ -146,12 +146,17 @@ int __pthread_cond_timedwait(pthread_cond_t *restrict c, pthread_mutex_t *restri if (oldstate == WAITING) goto done; + if (!node.next && !(m->_m_type & 8)) + a_inc(&m->_m_waiters); + /* Unlock the barrier that's holding back the next waiter, and * either wake it or requeue it to the mutex. */ if (node.prev) { int val = m->_m_lock; if (val>0) a_cas(&m->_m_lock, val, val|0x80000000); unlock_requeue(&node.prev->barrier, &m->_m_lock, m->_m_type & (8|128)); + } else if (!!(m->_m_type & 8)) { + a_dec(&m->_m_waiters); } /* Since a signal was consumed, cancellation is not permitted. */ From c9682c03c50a6bf2bf88dc55bb6b1e76763f2485 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Fri, 30 Oct 2020 16:50:08 -0400 Subject: [PATCH 145/189] fix erroneous pthread_cond_wait mutex waiter count logic due to typo introduced in commit 27b2fc9d6db956359727a66c262f1e69995660aa. --- src/thread/pthread_cond_timedwait.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/thread/pthread_cond_timedwait.c b/src/thread/pthread_cond_timedwait.c index a0cd4904a..6b761455c 100644 --- a/src/thread/pthread_cond_timedwait.c +++ b/src/thread/pthread_cond_timedwait.c @@ -155,7 +155,7 @@ int __pthread_cond_timedwait(pthread_cond_t *restrict c, pthread_mutex_t *restri int val = m->_m_lock; if (val>0) a_cas(&m->_m_lock, val, val|0x80000000); unlock_requeue(&node.prev->barrier, &m->_m_lock, m->_m_type & (8|128)); - } else if (!!(m->_m_type & 8)) { + } else if (!(m->_m_type & 8)) { a_dec(&m->_m_waiters); } From 95694af8541dc390e9b0f5f3e6d460c9bcde6590 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Mon, 2 Nov 2020 23:25:12 -0500 Subject: [PATCH 146/189] fix vector types in aarch64 register file structures the ABI type for the vector registers in fpregset_t, struct fpsimd_context, and struct user_fpsimd_struct is __uint128_t, which was presumably originally not used because it's a nonstandard type, but its existence is mandated by the aarch64 psABI. use of the wrong type here broke software using these structures, and encouraged incorrect fixes with casts rather than reinterpretation of representation. --- arch/aarch64/bits/signal.h | 4 ++-- arch/aarch64/bits/user.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/aarch64/bits/signal.h b/arch/aarch64/bits/signal.h index b71261f56..5098c7341 100644 --- a/arch/aarch64/bits/signal.h +++ b/arch/aarch64/bits/signal.h @@ -11,7 +11,7 @@ typedef unsigned long greg_t; typedef unsigned long gregset_t[34]; typedef struct { - long double vregs[32]; + __uint128_t vregs[32]; unsigned int fpsr; unsigned int fpcr; } fpregset_t; @@ -34,7 +34,7 @@ struct fpsimd_context { struct _aarch64_ctx head; unsigned int fpsr; unsigned int fpcr; - long double vregs[32]; + __uint128_t vregs[32]; }; struct esr_context { struct _aarch64_ctx head; diff --git a/arch/aarch64/bits/user.h b/arch/aarch64/bits/user.h index d12cdf7fe..8a1002aa6 100644 --- a/arch/aarch64/bits/user.h +++ b/arch/aarch64/bits/user.h @@ -6,7 +6,7 @@ struct user_regs_struct { }; struct user_fpsimd_struct { - long double vregs[32]; + __uint128_t vregs[32]; unsigned int fpsr; unsigned int fpcr; }; From 40977c7b74f4c1fed1d9abae8e79cd592f5f081d Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Tue, 10 Nov 2020 14:29:05 -0500 Subject: [PATCH 147/189] dlerror: don't gratuitously hold freebuf_queue lock while freeing thread-local buffers allocated for dlerror need to be queued for free at a later time when the owning thread exits, since malloc may be replaced by application code and the exiting context is not valid to call application code from. the code to process queue of pending frees, introduced in commit aa5a9d15e09851f7b4a1668e9dbde0f6234abada, gratuitously held the lock for the entire duration of queue processing, updating the global queue pointer after each free, despite there being no logical requirement that all frees finish before another thread can access the queue. instead, immediately claim the whole queue for freeing and release the lock, then walk the list and perform frees without the lock held. the change is unlikely to make any meaningful difference to performance, but it eliminates one point where the allocator is called under an internal lock. since the allocator may be application-provided, such calls are undesirable because they allow application code to impede forward progress of libc functions in other threads arbitrarily long, and to induce deadlock if it calls a libc function that requires the same lock. the change also eliminates a lock ordering consideration that's an impediment upcoming work with multithreaded fork. --- src/ldso/dlerror.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/ldso/dlerror.c b/src/ldso/dlerror.c index 3fcc77795..d8bbfc03b 100644 --- a/src/ldso/dlerror.c +++ b/src/ldso/dlerror.c @@ -35,13 +35,16 @@ void __dl_thread_cleanup(void) hidden void __dl_vseterr(const char *fmt, va_list ap) { LOCK(freebuf_queue_lock); - while (freebuf_queue) { - void **p = freebuf_queue; - freebuf_queue = *p; - free(p); - } + void **q = freebuf_queue; + freebuf_queue = 0; UNLOCK(freebuf_queue_lock); + while (q) { + void **p = *q; + free(q); + q = p; + } + va_list ap2; va_copy(ap2, ap); pthread_t self = __pthread_self(); From 7d6de0d1fac29cd6539e6d462ffa4a8bcac3e584 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Tue, 10 Nov 2020 19:32:09 -0500 Subject: [PATCH 148/189] drop use of getdelim/stdio in dynamic linker the only place stdio was used here was for reading the ldso path file, taking advantage of getdelim to automatically allocate and resize the buffer. the motivation for use here was that, with shared libraries, stdio is already available anyway and free to use. this has long been a nuisance to users because getdelim's use of realloc here triggered a valgrind bug, but removing it doesn't really fix that; on some archs even calling the valgrind-interposed malloc at this point will crash. the actual motivation for this change is moving towards getting rid of use of application-provided malloc in parts of libc where it would be called with libc-internal locks held, leading to the possibility of deadlock if the malloc implementation doesn't follow unwritten rules about which libc functions are safe for it to call. since getdelim is required to produce a pointer as if by malloc (i.e. that can be passed to reallor or free), it necessarily must use the public malloc. instead of performing a realloc loop as the path file is read, first query its size with fstat and allocate only once. this produces slightly different truncation behavior when racing with writes to a file, but neither behavior is or could be made safe anyway; on a live system, ldso path files should be replaced by atomic rename only. the change should also reduce memory waste. --- ldso/dynlink.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/ldso/dynlink.c b/ldso/dynlink.c index f9ac0100c..502e52c51 100644 --- a/ldso/dynlink.c +++ b/ldso/dynlink.c @@ -1,6 +1,5 @@ #define _GNU_SOURCE #define SYSCALL_NO_TLS 1 -#include #include #include #include @@ -556,6 +555,20 @@ static void reclaim_gaps(struct dso *dso) } } +static ssize_t read_loop(int fd, void *p, size_t n) +{ + for (size_t i=0; i=0) { + size_t n = 0; + if (!fstat(fd, &st)) n = st.st_size; + if ((sys_path = malloc(n+1))) + sys_path[n] = 0; + if (!sys_path || read_loop(fd, sys_path, n)<0) { free(sys_path); sys_path = ""; } - fclose(f); + close(fd); } else if (errno != ENOENT) { sys_path = ""; } From ac3d6b173679622c91e8da8c282877600f8d02bd Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Wed, 11 Nov 2020 00:22:34 -0500 Subject: [PATCH 149/189] give libc access to its own malloc even if public malloc is interposed allowing the application to replace malloc (since commit c9f415d7ea2dace5bf77f6518b6afc36bb7a5732) has brought multiple headaches where it's used from various critical sections in libc components. for example: - the thread-local message buffers allocated for dlerror can't be freed at thread exit time because application code would then run in the context of a non-existant thread. this was handled in commit aa5a9d15e09851f7b4a1668e9dbde0f6234abada by queuing them for free later. - the dynamic linker has to be careful not to pass memory allocated at early startup time (necessarily using its own malloc) to realloc or free after redoing relocations with the application and all libraries present. bugs in this area were fixed several times, at least in commits 0c5c8f5da6e36fe4ab704bee0cd981837859e23f and 2f1f51ae7b2d78247568e7fdb8462f3c19e469a4 and possibly others. - by calling the allocator from contexts where libc-internal locks are held, we impose undocumented requirements on alternate malloc implementations not to call into any libc function that might attempt to take these locks; if they do, deadlock results. - work to make fork of a multithreaded parent give the child an unrestricted execution environment is blocked by lock order issues as long as the application-provided allocator can be called with libc-internal locks held. these problems are all fixed by giving libc internals access to the original, non-replaced allocator, for use where needed. it can't be used everywhere, as some interfaces like str[n]dup, open_[w]memstream, getline/getdelim, etc. are required to provide the called memory obtained as if by (the public) malloc. and there are a number of libc interfaces that are "pure library" code, not part of some internal singleton, and where using the application's choice of malloc implementation is preferable -- things like glob, regex, etc. one might expect there to be significant cost to static-linked programs, pulling in two malloc implementations, one of them mostly-unused, if malloc is replaced. however, in almost all of the places where malloc is used internally, care has been taken already not to pull in realloc/free (i.e. to link with just the bump allocator). this size optimization carries over automatically. the newly-exposed internal allocator functions are obtained by renaming the actual definitions, then adding new wrappers around them with the public names. technically __libc_realloc and __libc_free could be aliases rather than needing a layer of wrapper, but this would almost surely break certain instrumentation (valgrind) and the size and performance difference is negligible. __libc_calloc needs to be handled specially since calloc is designed to work with either the internal or the replaced malloc. as a bonus, this change also eliminates the longstanding ugly dependency of the static bump allocator on order of object files in libc.a, by making it so there's only one definition of the malloc function and having it in the same source file as the bump allocator. --- src/include/stdlib.h | 6 ++++++ src/malloc/free.c | 6 ++++++ src/malloc/libc_calloc.c | 4 ++++ src/malloc/lite_malloc.c | 14 +++++++++++++- src/malloc/mallocng/glue.h | 4 ++++ src/malloc/oldmalloc/malloc.c | 4 ++++ src/malloc/realloc.c | 6 ++++++ 7 files changed, 43 insertions(+), 1 deletion(-) create mode 100644 src/malloc/free.c create mode 100644 src/malloc/libc_calloc.c create mode 100644 src/malloc/realloc.c diff --git a/src/include/stdlib.h b/src/include/stdlib.h index d38a5417f..e9da20158 100644 --- a/src/include/stdlib.h +++ b/src/include/stdlib.h @@ -9,4 +9,10 @@ hidden int __mkostemps(char *, int, int); hidden int __ptsname_r(int, char *, size_t); hidden char *__randname(char *); +hidden void *__libc_malloc(size_t); +hidden void *__libc_malloc_impl(size_t); +hidden void *__libc_calloc(size_t, size_t); +hidden void *__libc_realloc(void *, size_t); +hidden void __libc_free(void *); + #endif diff --git a/src/malloc/free.c b/src/malloc/free.c new file mode 100644 index 000000000..f17a952cb --- /dev/null +++ b/src/malloc/free.c @@ -0,0 +1,6 @@ +#include + +void free(void *p) +{ + return __libc_free(p); +} diff --git a/src/malloc/libc_calloc.c b/src/malloc/libc_calloc.c new file mode 100644 index 000000000..d25eabea4 --- /dev/null +++ b/src/malloc/libc_calloc.c @@ -0,0 +1,4 @@ +#define calloc __libc_calloc +#define malloc __libc_malloc + +#include "calloc.c" diff --git a/src/malloc/lite_malloc.c b/src/malloc/lite_malloc.c index f8931ba59..0f4616173 100644 --- a/src/malloc/lite_malloc.c +++ b/src/malloc/lite_malloc.c @@ -100,4 +100,16 @@ static void *__simple_malloc(size_t n) return p; } -weak_alias(__simple_malloc, malloc); +weak_alias(__simple_malloc, __libc_malloc_impl); + +void *__libc_malloc(size_t n) +{ + return __libc_malloc_impl(n); +} + +static void *default_malloc(size_t n) +{ + return __libc_malloc_impl(n); +} + +weak_alias(default_malloc, malloc); diff --git a/src/malloc/mallocng/glue.h b/src/malloc/mallocng/glue.h index 16acd1ea3..8d7d9a3b9 100644 --- a/src/malloc/mallocng/glue.h +++ b/src/malloc/mallocng/glue.h @@ -20,6 +20,10 @@ #define is_allzero __malloc_allzerop #define dump_heap __dump_heap +#define malloc __libc_malloc_impl +#define realloc __libc_realloc +#define free __libc_free + #if USE_REAL_ASSERT #include #else diff --git a/src/malloc/oldmalloc/malloc.c b/src/malloc/oldmalloc/malloc.c index c0997ad85..0c082bce1 100644 --- a/src/malloc/oldmalloc/malloc.c +++ b/src/malloc/oldmalloc/malloc.c @@ -10,6 +10,10 @@ #include "pthread_impl.h" #include "malloc_impl.h" +#define malloc __libc_malloc +#define realloc __libc_realloc +#define free __libc_free + #if defined(__GNUC__) && defined(__PIC__) #define inline inline __attribute__((always_inline)) #endif diff --git a/src/malloc/realloc.c b/src/malloc/realloc.c new file mode 100644 index 000000000..fb0e8b7c4 --- /dev/null +++ b/src/malloc/realloc.c @@ -0,0 +1,6 @@ +#include + +void *realloc(void *p, size_t n) +{ + return __libc_realloc(p, n); +} From f524af2816aaa1749b4d25a2a8fcdaa15e6b2774 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Wed, 11 Nov 2020 13:08:42 -0500 Subject: [PATCH 150/189] convert malloc use under libc-internal locks to use internal allocator this change lifts undocumented restrictions on calls by replacement mallocs to libc functions that might take these locks, and sets the stage for lifting restrictions on the child execution environment after multithreaded fork. care is taken to #define macros to replace all four functions (malloc, calloc, realloc, free) even if not all of them will be used, using an undefined symbol name for the ones intended not to be used so that any inadvertent future use will be caught at compile time rather than directed to the wrong implementation. --- ldso/dynlink.c | 5 +++++ src/aio/aio.c | 5 +++++ src/exit/atexit.c | 5 +++++ src/ldso/dlerror.c | 5 +++++ src/locale/dcngettext.c | 5 +++++ src/locale/locale_map.c | 6 ++++++ src/thread/sem_open.c | 5 +++++ src/time/__tz.c | 5 +++++ 8 files changed, 41 insertions(+) diff --git a/ldso/dynlink.c b/ldso/dynlink.c index 502e52c51..61714f40f 100644 --- a/ldso/dynlink.c +++ b/ldso/dynlink.c @@ -23,6 +23,11 @@ #include "libc.h" #include "dynlink.h" +#define malloc __libc_malloc +#define calloc __libc_calloc +#define realloc __libc_realloc +#define free __libc_free + static void error(const char *, ...); #define MAXP2(a,b) (-(-(a)&-(b))) diff --git a/src/aio/aio.c b/src/aio/aio.c index b488e3d64..e004f98bc 100644 --- a/src/aio/aio.c +++ b/src/aio/aio.c @@ -11,6 +11,11 @@ #include "pthread_impl.h" #include "aio_impl.h" +#define malloc __libc_malloc +#define calloc __libc_calloc +#define realloc __libc_realloc +#define free __libc_free + /* The following is a threads-based implementation of AIO with minimal * dependence on implementation details. Most synchronization is * performed with pthread primitives, but atomics and futex operations diff --git a/src/exit/atexit.c b/src/exit/atexit.c index 160d277ae..fcd940faf 100644 --- a/src/exit/atexit.c +++ b/src/exit/atexit.c @@ -3,6 +3,11 @@ #include "libc.h" #include "lock.h" +#define malloc __libc_malloc +#define calloc __libc_calloc +#define realloc undef +#define free undef + /* Ensure that at least 32 atexit handlers can be registered without malloc */ #define COUNT 32 diff --git a/src/ldso/dlerror.c b/src/ldso/dlerror.c index d8bbfc03b..c782ca6c3 100644 --- a/src/ldso/dlerror.c +++ b/src/ldso/dlerror.c @@ -5,6 +5,11 @@ #include "dynlink.h" #include "lock.h" +#define malloc __libc_malloc +#define calloc __libc_calloc +#define realloc __libc_realloc +#define free __libc_free + char *dlerror() { pthread_t self = __pthread_self(); diff --git a/src/locale/dcngettext.c b/src/locale/dcngettext.c index 4c3043938..39a98e830 100644 --- a/src/locale/dcngettext.c +++ b/src/locale/dcngettext.c @@ -11,6 +11,11 @@ #include "pleval.h" #include "lock.h" +#define malloc __libc_malloc +#define calloc __libc_calloc +#define realloc undef +#define free undef + struct binding { struct binding *next; int dirlen; diff --git a/src/locale/locale_map.c b/src/locale/locale_map.c index e7eede629..94f1b04e8 100644 --- a/src/locale/locale_map.c +++ b/src/locale/locale_map.c @@ -1,10 +1,16 @@ #include #include #include +#include #include "locale_impl.h" #include "libc.h" #include "lock.h" +#define malloc __libc_malloc +#define calloc undef +#define realloc undef +#define free undef + const char *__lctrans_impl(const char *msg, const struct __locale_map *lm) { const char *trans = 0; diff --git a/src/thread/sem_open.c b/src/thread/sem_open.c index 6fb0c5b25..dad8f1779 100644 --- a/src/thread/sem_open.c +++ b/src/thread/sem_open.c @@ -13,6 +13,11 @@ #include #include "lock.h" +#define malloc __libc_malloc +#define calloc __libc_calloc +#define realloc undef +#define free undef + static struct { ino_t ino; sem_t *sem; diff --git a/src/time/__tz.c b/src/time/__tz.c index 49a7371eb..3044d2067 100644 --- a/src/time/__tz.c +++ b/src/time/__tz.c @@ -7,6 +7,11 @@ #include "libc.h" #include "lock.h" +#define malloc __libc_malloc +#define calloc undef +#define realloc undef +#define free undef + long __timezone = 0; int __daylight = 0; char *__tzname[2] = { 0, 0 }; From 52c29eabfa770fb1de88e0135632526d37b71d65 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Wed, 11 Nov 2020 13:37:33 -0500 Subject: [PATCH 151/189] lift child restrictions after multi-threaded fork as the outcome of Austin Group tracker issue #62, future editions of POSIX have dropped the requirement that fork be AS-safe. this allows but does not require implementations to synchronize fork with internal locks and give forked children of multithreaded parents a partly or fully unrestricted execution environment where they can continue to use the standard library (per POSIX, they can only portably use AS-safe functions). up until recently, taking this allowance did not seem desirable. however, commit 8ed2bd8bfcb4ea6448afb55a941f4b5b2b0398c0 exposed the extent to which applications and libraries are depending on the ability to use malloc and other non-AS-safe interfaces in MT-forked children, by converting latent very-low-probability catastrophic state corruption into predictable deadlock. dealing with the fallout has been a huge burden for users/distros. while it looks like most of the non-portable usage in applications could be fixed given sufficient effort, at least some of it seems to occur in language runtimes which are exposing the ability to run unrestricted code in the child as part of the contract with the programmer. any attempt at fixing such contracts is not just a technical problem but a social one, and is probably not tractable. this patch extends the fork function to take locks for all libc singletons in the parent, and release or reset those locks in the child, so that when the underlying fork operation takes place, the state protected by these locks is consistent and ready for the child to use. locking is skipped in the case where the parent is single-threaded so as not to interfere with legacy AS-safety property of fork in single-threaded programs. lock order is mostly arbitrary, but the malloc locks (including bump allocator in case it's used) must be taken after the locks on any subsystems that might use malloc, and non-AS-safe locks cannot be taken while the thread list lock is held, imposing a requirement that it be taken last. --- ldso/dynlink.c | 19 ++++++++++ src/exit/at_quick_exit.c | 2 + src/exit/atexit.c | 2 + src/internal/fork_impl.h | 19 ++++++++++ src/ldso/dlerror.c | 2 + src/locale/dcngettext.c | 5 ++- src/locale/locale_map.c | 5 ++- src/malloc/lite_malloc.c | 5 ++- src/malloc/mallocng/glue.h | 14 ++++++- src/malloc/oldmalloc/malloc.c | 19 ++++++++++ src/misc/syslog.c | 2 + src/prng/random.c | 2 + src/process/fork.c | 70 +++++++++++++++++++++++++++++++++++ src/stdio/ofl.c | 2 + src/thread/sem_open.c | 2 + src/thread/vmlock.c | 2 + src/time/__tz.c | 2 + 17 files changed, 170 insertions(+), 4 deletions(-) create mode 100644 src/internal/fork_impl.h diff --git a/ldso/dynlink.c b/ldso/dynlink.c index 61714f40f..6b868c841 100644 --- a/ldso/dynlink.c +++ b/ldso/dynlink.c @@ -20,6 +20,7 @@ #include #include #include "pthread_impl.h" +#include "fork_impl.h" #include "libc.h" #include "dynlink.h" @@ -1426,6 +1427,17 @@ void __libc_exit_fini() } } +void __ldso_atfork(int who) +{ + if (who<0) { + pthread_rwlock_wrlock(&lock); + pthread_mutex_lock(&init_fini_lock); + } else { + pthread_mutex_unlock(&init_fini_lock); + pthread_rwlock_unlock(&lock); + } +} + static struct dso **queue_ctors(struct dso *dso) { size_t cnt, qpos, spos, i; @@ -1484,6 +1496,13 @@ static struct dso **queue_ctors(struct dso *dso) } queue[qpos] = 0; for (i=0; imark = 0; + for (i=0; ictor_visitor && queue[i]->ctor_visitor->tid < 0) { + error("State of %s is inconsistent due to multithreaded fork\n", + queue[i]->name); + free(queue); + if (runtime) longjmp(*rtld_fail, 1); + } return queue; } diff --git a/src/exit/at_quick_exit.c b/src/exit/at_quick_exit.c index d3ce6522d..e4b5d78db 100644 --- a/src/exit/at_quick_exit.c +++ b/src/exit/at_quick_exit.c @@ -1,12 +1,14 @@ #include #include "libc.h" #include "lock.h" +#include "fork_impl.h" #define COUNT 32 static void (*funcs[COUNT])(void); static int count; static volatile int lock[1]; +volatile int *const __at_quick_exit_lockptr = lock; void __funcs_on_quick_exit() { diff --git a/src/exit/atexit.c b/src/exit/atexit.c index fcd940faf..854e9fddb 100644 --- a/src/exit/atexit.c +++ b/src/exit/atexit.c @@ -2,6 +2,7 @@ #include #include "libc.h" #include "lock.h" +#include "fork_impl.h" #define malloc __libc_malloc #define calloc __libc_calloc @@ -20,6 +21,7 @@ static struct fl static int slot; static volatile int lock[1]; +volatile int *const __atexit_lockptr = lock; void __funcs_on_exit() { diff --git a/src/internal/fork_impl.h b/src/internal/fork_impl.h new file mode 100644 index 000000000..5892c13bf --- /dev/null +++ b/src/internal/fork_impl.h @@ -0,0 +1,19 @@ +#include + +extern hidden volatile int *const __at_quick_exit_lockptr; +extern hidden volatile int *const __atexit_lockptr; +extern hidden volatile int *const __dlerror_lockptr; +extern hidden volatile int *const __gettext_lockptr; +extern hidden volatile int *const __locale_lockptr; +extern hidden volatile int *const __random_lockptr; +extern hidden volatile int *const __sem_open_lockptr; +extern hidden volatile int *const __stdio_ofl_lockptr; +extern hidden volatile int *const __syslog_lockptr; +extern hidden volatile int *const __timezone_lockptr; + +extern hidden volatile int *const __bump_lockptr; + +extern hidden volatile int *const __vmlock_lockptr; + +hidden void __malloc_atfork(int); +hidden void __ldso_atfork(int); diff --git a/src/ldso/dlerror.c b/src/ldso/dlerror.c index c782ca6c3..afe59253e 100644 --- a/src/ldso/dlerror.c +++ b/src/ldso/dlerror.c @@ -4,6 +4,7 @@ #include "pthread_impl.h" #include "dynlink.h" #include "lock.h" +#include "fork_impl.h" #define malloc __libc_malloc #define calloc __libc_calloc @@ -24,6 +25,7 @@ char *dlerror() static volatile int freebuf_queue_lock[1]; static void **freebuf_queue; +volatile int *const __dlerror_lockptr = freebuf_queue_lock; void __dl_thread_cleanup(void) { diff --git a/src/locale/dcngettext.c b/src/locale/dcngettext.c index 39a98e830..d1e6c6d13 100644 --- a/src/locale/dcngettext.c +++ b/src/locale/dcngettext.c @@ -10,6 +10,7 @@ #include "atomic.h" #include "pleval.h" #include "lock.h" +#include "fork_impl.h" #define malloc __libc_malloc #define calloc __libc_calloc @@ -39,9 +40,11 @@ static char *gettextdir(const char *domainname, size_t *dirlen) return 0; } +static volatile int lock[1]; +volatile int *const __gettext_lockptr = lock; + char *bindtextdomain(const char *domainname, const char *dirname) { - static volatile int lock[1]; struct binding *p, *q; if (!domainname) return 0; diff --git a/src/locale/locale_map.c b/src/locale/locale_map.c index 94f1b04e8..fa51f2e36 100644 --- a/src/locale/locale_map.c +++ b/src/locale/locale_map.c @@ -5,6 +5,7 @@ #include "locale_impl.h" #include "libc.h" #include "lock.h" +#include "fork_impl.h" #define malloc __libc_malloc #define calloc undef @@ -27,9 +28,11 @@ static const char envvars[][12] = { "LC_MESSAGES", }; +static volatile int lock[1]; +volatile int *const __locale_lockptr = lock; + const struct __locale_map *__get_locale(int cat, const char *val) { - static volatile int lock[1]; static void *volatile loc_head; const struct __locale_map *p; struct __locale_map *new = 0; diff --git a/src/malloc/lite_malloc.c b/src/malloc/lite_malloc.c index 0f4616173..43a988fbb 100644 --- a/src/malloc/lite_malloc.c +++ b/src/malloc/lite_malloc.c @@ -6,6 +6,7 @@ #include "libc.h" #include "lock.h" #include "syscall.h" +#include "fork_impl.h" #define ALIGN 16 @@ -31,10 +32,12 @@ static int traverses_stack_p(uintptr_t old, uintptr_t new) return 0; } +static volatile int lock[1]; +volatile int *const __bump_lockptr = lock; + static void *__simple_malloc(size_t n) { static uintptr_t brk, cur, end; - static volatile int lock[1]; static unsigned mmap_step; size_t align=1; void *p; diff --git a/src/malloc/mallocng/glue.h b/src/malloc/mallocng/glue.h index 8d7d9a3b9..151c48b80 100644 --- a/src/malloc/mallocng/glue.h +++ b/src/malloc/mallocng/glue.h @@ -60,7 +60,8 @@ __attribute__((__visibility__("hidden"))) extern int __malloc_lock[1]; #define LOCK_OBJ_DEF \ -int __malloc_lock[1]; +int __malloc_lock[1]; \ +void __malloc_atfork(int who) { malloc_atfork(who); } static inline void rdlock() { @@ -77,5 +78,16 @@ static inline void unlock() static inline void upgradelock() { } +static inline void resetlock() +{ + __malloc_lock[0] = 0; +} + +static inline void malloc_atfork(int who) +{ + if (who<0) rdlock(); + else if (who>0) resetlock(); + else unlock(); +} #endif diff --git a/src/malloc/oldmalloc/malloc.c b/src/malloc/oldmalloc/malloc.c index 0c082bce1..53f5f959e 100644 --- a/src/malloc/oldmalloc/malloc.c +++ b/src/malloc/oldmalloc/malloc.c @@ -9,6 +9,7 @@ #include "atomic.h" #include "pthread_impl.h" #include "malloc_impl.h" +#include "fork_impl.h" #define malloc __libc_malloc #define realloc __libc_realloc @@ -531,3 +532,21 @@ void __malloc_donate(char *start, char *end) c->csize = n->psize = C_INUSE | (end-start); __bin_chunk(c); } + +void __malloc_atfork(int who) +{ + if (who<0) { + lock(mal.split_merge_lock); + for (int i=0; i<64; i++) + lock(mal.bins[i].lock); + } else if (!who) { + for (int i=0; i<64; i++) + unlock(mal.bins[i].lock); + unlock(mal.split_merge_lock); + } else { + for (int i=0; i<64; i++) + mal.bins[i].lock[0] = mal.bins[i].lock[1] = 0; + mal.split_merge_lock[1] = 0; + mal.split_merge_lock[0] = 0; + } +} diff --git a/src/misc/syslog.c b/src/misc/syslog.c index 13d4b0a6d..7dc0c1be5 100644 --- a/src/misc/syslog.c +++ b/src/misc/syslog.c @@ -10,6 +10,7 @@ #include #include #include "lock.h" +#include "fork_impl.h" static volatile int lock[1]; static char log_ident[32]; @@ -17,6 +18,7 @@ static int log_opt; static int log_facility = LOG_USER; static int log_mask = 0xff; static int log_fd = -1; +volatile int *const __syslog_lockptr = lock; int setlogmask(int maskpri) { diff --git a/src/prng/random.c b/src/prng/random.c index 633a17f69..d3780fa7e 100644 --- a/src/prng/random.c +++ b/src/prng/random.c @@ -1,6 +1,7 @@ #include #include #include "lock.h" +#include "fork_impl.h" /* this code uses the same lagged fibonacci generator as the @@ -23,6 +24,7 @@ static int i = 3; static int j = 0; static uint32_t *x = init+1; static volatile int lock[1]; +volatile int *const __random_lockptr = lock; static uint32_t lcg31(uint32_t x) { return (1103515245*x + 12345) & 0x7fffffff; diff --git a/src/process/fork.c b/src/process/fork.c index 8d34a9c4b..54bc28920 100644 --- a/src/process/fork.c +++ b/src/process/fork.c @@ -1,15 +1,85 @@ #include #include #include "libc.h" +#include "lock.h" +#include "pthread_impl.h" +#include "fork_impl.h" + +static volatile int *const dummy_lockptr = 0; + +weak_alias(dummy_lockptr, __at_quick_exit_lockptr); +weak_alias(dummy_lockptr, __atexit_lockptr); +weak_alias(dummy_lockptr, __dlerror_lockptr); +weak_alias(dummy_lockptr, __gettext_lockptr); +weak_alias(dummy_lockptr, __locale_lockptr); +weak_alias(dummy_lockptr, __random_lockptr); +weak_alias(dummy_lockptr, __sem_open_lockptr); +weak_alias(dummy_lockptr, __stdio_ofl_lockptr); +weak_alias(dummy_lockptr, __syslog_lockptr); +weak_alias(dummy_lockptr, __timezone_lockptr); +weak_alias(dummy_lockptr, __bump_lockptr); + +weak_alias(dummy_lockptr, __vmlock_lockptr); + +static volatile int *const *const atfork_locks[] = { + &__at_quick_exit_lockptr, + &__atexit_lockptr, + &__dlerror_lockptr, + &__gettext_lockptr, + &__locale_lockptr, + &__random_lockptr, + &__sem_open_lockptr, + &__stdio_ofl_lockptr, + &__syslog_lockptr, + &__timezone_lockptr, + &__bump_lockptr, +}; static void dummy(int x) { } weak_alias(dummy, __fork_handler); +weak_alias(dummy, __malloc_atfork); +weak_alias(dummy, __ldso_atfork); + +static void dummy_0(void) { } +weak_alias(dummy_0, __tl_lock); +weak_alias(dummy_0, __tl_unlock); pid_t fork(void) { + sigset_t set; __fork_handler(-1); + __block_app_sigs(&set); + int need_locks = libc.need_locks > 0; + if (need_locks) { + __ldso_atfork(-1); + __inhibit_ptc(); + for (int i=0; inext; pid_t ret = _Fork(); int errno_save = errno; + if (need_locks) { + if (!ret) { + for (pthread_t td=next; td!=self; td=td->next) + td->tid = -1; + if (__vmlock_lockptr) { + __vmlock_lockptr[0] = 0; + __vmlock_lockptr[1] = 0; + } + } + __tl_unlock(); + __malloc_atfork(!ret); + for (int i=0; i #include #include "lock.h" +#include "fork_impl.h" #define malloc __libc_malloc #define calloc __libc_calloc @@ -24,6 +25,7 @@ static struct { int refcnt; } *semtab; static volatile int lock[1]; +volatile int *const __sem_open_lockptr = lock; #define FLAGS (O_RDWR|O_NOFOLLOW|O_CLOEXEC|O_NONBLOCK) diff --git a/src/thread/vmlock.c b/src/thread/vmlock.c index 75f3cb761..fa0a8e3c2 100644 --- a/src/thread/vmlock.c +++ b/src/thread/vmlock.c @@ -1,6 +1,8 @@ #include "pthread_impl.h" +#include "fork_impl.h" static volatile int vmlock[2]; +volatile int *const __vmlock_lockptr = vmlock; void __vm_wait() { diff --git a/src/time/__tz.c b/src/time/__tz.c index 3044d2067..dd2c42c07 100644 --- a/src/time/__tz.c +++ b/src/time/__tz.c @@ -6,6 +6,7 @@ #include #include "libc.h" #include "lock.h" +#include "fork_impl.h" #define malloc __libc_malloc #define calloc undef @@ -35,6 +36,7 @@ static char *old_tz = old_tz_buf; static size_t old_tz_size = sizeof old_tz_buf; static volatile int lock[1]; +volatile int *const __timezone_lockptr = lock; static int getint(const char **p) { From 6674ce73d60b25423fdc8e398a056f9e151fcc5c Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Thu, 19 Nov 2020 16:09:16 -0500 Subject: [PATCH 152/189] pthread_exit: don't __vm_wait under thread list lock the __vm_wait operation can delay forward progress arbitrarily long if a thread holding the lock is interrupted by a signal. in a worst case this can deadlock. any critical section holding the thread list lock must respect lock ordering contracts and must not take any lock which is not AS-safe. to fix, move the determination of thread joinable/detached state to take place before the killlock and thread list lock are taken. this requires reverting the atomic state transition if we determine that the exiting thread is the last thread and must call exit, but that's easy to do since it's a single-threaded context with application signals blocked. --- src/thread/pthread_create.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/src/thread/pthread_create.c b/src/thread/pthread_create.c index 557441559..250cd0a4c 100644 --- a/src/thread/pthread_create.c +++ b/src/thread/pthread_create.c @@ -69,12 +69,25 @@ _Noreturn void __pthread_exit(void *result) __pthread_tsd_run_dtors(); + __block_app_sigs(&set); + + /* This atomic potentially competes with a concurrent pthread_detach + * call; the loser is responsible for freeing thread resources. */ + int state = a_cas(&self->detach_state, DT_JOINABLE, DT_EXITING); + + if (state==DT_DETACHED && self->map_base) { + /* Since __unmapself bypasses the normal munmap code path, + * explicitly wait for vmlock holders first. This must be + * done before any locks are taken, to avoid lock ordering + * issues that could lead to deadlock. */ + __vm_wait(); + } + /* Access to target the exiting thread with syscalls that use * its kernel tid is controlled by killlock. For detached threads, * any use past this point would have undefined behavior, but for * joinable threads it's a valid usage that must be handled. * Signals must be blocked since pthread_kill must be AS-safe. */ - __block_app_sigs(&set); LOCK(self->killlock); /* The thread list lock must be AS-safe, and thus depends on @@ -87,6 +100,7 @@ _Noreturn void __pthread_exit(void *result) if (self->next == self) { __tl_unlock(); UNLOCK(self->killlock); + self->detach_state = state; __restore_sigs(&set); exit(0); } @@ -125,10 +139,6 @@ _Noreturn void __pthread_exit(void *result) self->prev->next = self->next; self->prev = self->next = self; - /* This atomic potentially competes with a concurrent pthread_detach - * call; the loser is responsible for freeing thread resources. */ - int state = a_cas(&self->detach_state, DT_JOINABLE, DT_EXITING); - if (state==DT_DETACHED && self->map_base) { /* Detached threads must block even implementation-internal * signals, since they will not have a stack in their last @@ -140,10 +150,6 @@ _Noreturn void __pthread_exit(void *result) if (self->robust_list.off) __syscall(SYS_set_robust_list, 0, 3*sizeof(long)); - /* Since __unmapself bypasses the normal munmap code path, - * explicitly wait for vmlock holders first. */ - __vm_wait(); - /* The following call unmaps the thread's stack mapping * and then exits without touching the stack. */ __unmapself(self->map_base, self->map_size); From 666949245a727617916a8b6a9c7ceceb83ae3153 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Thu, 19 Nov 2020 16:20:45 -0500 Subject: [PATCH 153/189] protect destruction of process-shared mutexes against robust list races after a non-normal-type process-shared mutex is unlocked, it's immediately available to another thread to lock, unlock, and destroy, but the first unlocking thread may still have a pointer to it in its robust_list pending slot. this means, on async process termination, the kernel may attempt to access and modify the memory that used to contain the mutex -- memory that may have been reused for some other purpose after the mutex was destroyed. setting up for this kind of race to occur is difficult to begin with, requiring dynamic use of shared memory maps, and actually hitting the race is very difficult even with a suitable setup. so this is mostly a theoretical fix, but in any case the cost is very low. --- src/thread/pthread_mutex_destroy.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/thread/pthread_mutex_destroy.c b/src/thread/pthread_mutex_destroy.c index 6d49e6898..8d1bf77b8 100644 --- a/src/thread/pthread_mutex_destroy.c +++ b/src/thread/pthread_mutex_destroy.c @@ -1,6 +1,10 @@ -#include +#include "pthread_impl.h" int pthread_mutex_destroy(pthread_mutex_t *mutex) { + /* If the mutex being destroyed is process-shared and has nontrivial + * type (tracking ownership), it might be in the pending slot of a + * robust_list; wait for quiescence. */ + if (mutex->_m_type > 128) __vm_wait(); return 0; } From 26a2ecfd1016539491219c75ae9f8511f5c4ee32 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Thu, 19 Nov 2020 17:12:43 -0500 Subject: [PATCH 154/189] rewrite wcsnrtombs to fix buffer overflow and other bugs the original wcsnrtombs implementation, which has been largely untouched since 0.5.0, attempted to build input-length-limiting conversion on top of wcsrtombs, which only limits output length. as best I recall, this choice was made out of a mix of disdain over having yet another variant function to implement (added in POSIX 2008; not standard C) and preference not to switch things around and implement the wcsrtombs in terms of the more general new function, probably over namespace issues. the strategy employed was to impose output limits that would ensure the input limit wasn't exceeded, then finish up the tail character-at-a-time. unfortunately, none of that worked correctly. first, the logic in the wcsrtombs loop was wrong in that it could easily get stuck making no forward progress, by imposing an output limit too small to convert even one character. the character-at-a-time loop that followed was even worse. it made no effort to ensure that the converted multibyte character would fit in the remaining output space, only that there was a nonzero amount of output space remaining. it also employed an incorrect interpretation of wcrtomb's interface contract for converting the null character, thereby failing to act on end of input, and remaining space accounting was subject to unsigned wrap-around. together these errors allow unbounded overflow of the destination buffer, controlled by input length limit and input wchar_t string contents. given the extent to which this function was broken, it's plausible that most applications that would have been rendered exploitable were sufficiently broken not to be usable in the first place. however, it's also plausible that common (especially ASCII-only) inputs succeeded in the wcsrtombs loop, which mostly worked, while leaving the wildly erroneous code in the second loop exposed to particular non-ASCII inputs. CVE-2020-28928 has been assigned for this issue. --- src/multibyte/wcsnrtombs.c | 46 ++++++++++++++++---------------------- 1 file changed, 19 insertions(+), 27 deletions(-) diff --git a/src/multibyte/wcsnrtombs.c b/src/multibyte/wcsnrtombs.c index 676932b5d..95e25e708 100644 --- a/src/multibyte/wcsnrtombs.c +++ b/src/multibyte/wcsnrtombs.c @@ -1,41 +1,33 @@ #include +#include +#include size_t wcsnrtombs(char *restrict dst, const wchar_t **restrict wcs, size_t wn, size_t n, mbstate_t *restrict st) { - size_t l, cnt=0, n2; - char *s, buf[256]; const wchar_t *ws = *wcs; - const wchar_t *tmp_ws; - - if (!dst) s = buf, n = sizeof buf; - else s = dst; - - while ( ws && n && ( (n2=wn)>=n || n2>32 ) ) { - if (n2>=n) n2=n; - tmp_ws = ws; - l = wcsrtombs(s, &ws, n2, 0); - if (!(l+1)) { - cnt = l; - n = 0; + size_t cnt = 0; + if (!dst) n=0; + while (ws && wn) { + char tmp[MB_LEN_MAX]; + size_t l = wcrtomb(nn) break; + memcpy(dst, tmp, l); + } + dst += l; n -= l; } - wn = ws ? wn - (ws - tmp_ws) : 0; - cnt += l; - } - if (ws) while (n && wn) { - l = wcrtomb(s, *ws, 0); - if ((l+1)<=1) { - if (!l) ws = 0; - else cnt = l; + if (!*ws) { + ws = 0; break; } - ws++; wn--; - /* safe - this loop runs fewer than sizeof(buf) times */ - s+=l; n-=l; + ws++; + wn--; cnt += l; } if (dst) *wcs = ws; From 290ca04108c6c8274c756c3e6d0a1046b1ed4abc Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Fri, 20 Nov 2020 10:43:20 -0500 Subject: [PATCH 155/189] fix regression in pthread_exit commit d26e0774a59bb7245b205bc8e7d8b35cc2037095 moved the detach state transition at exit before the thread list lock was taken. this inadvertently allowed pthread_join to race to take the thread list lock first, and proceed with unmapping of the exiting thread's memory. we could fix this by just revering the offending commit and instead performing __vm_wait unconditionally before taking the thread list lock, but that may be costly. instead, bring back the old DT_EXITING vs DT_EXITED state distinction that was removed in commit 8f11e6127fe93093f81a52b15bb1537edc3fc8af, and don't transition to DT_EXITED (a value of 0, which is what pthread_join waits for) until after the lock has been taken. --- src/internal/pthread_impl.h | 3 ++- src/thread/pthread_create.c | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/internal/pthread_impl.h b/src/internal/pthread_impl.h index 1322a6a81..de2b9d8b4 100644 --- a/src/internal/pthread_impl.h +++ b/src/internal/pthread_impl.h @@ -68,7 +68,8 @@ struct pthread { }; enum { - DT_EXITING = 0, + DT_EXITED = 0, + DT_EXITING, DT_JOINABLE, DT_DETACHED, }; diff --git a/src/thread/pthread_create.c b/src/thread/pthread_create.c index 250cd0a4c..6f187ee89 100644 --- a/src/thread/pthread_create.c +++ b/src/thread/pthread_create.c @@ -156,6 +156,7 @@ _Noreturn void __pthread_exit(void *result) } /* Wake any joiner. */ + a_store(&self->detach_state, DT_EXITED); __wake(&self->detach_state, 1, 1); /* After the kernel thread exits, its tid may be reused. Clear it From bbd5e8a6dd42b27edfc386d3c0b9d3f510f96850 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Sun, 22 Nov 2020 17:00:01 -0500 Subject: [PATCH 156/189] explicitly prefer 64-bit/v2 zoneinfo tables since commit 38143339646a4ccce8afe298c34467767c899f51, the condition sizeof(time_t) > 4 is always true, so there is no functional change being made here. but semantically, the 64-bit tables should always be preferred now, because upstream zic (zoneinfo compiler) has quietly switched to emitting empty 32-bit tables by default, and the resulting backwards-incompatible zoneinfo files will be encountered in the wild. --- src/time/__tz.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/time/__tz.c b/src/time/__tz.c index dd2c42c07..2eb6ab4e2 100644 --- a/src/time/__tz.c +++ b/src/time/__tz.c @@ -185,7 +185,7 @@ static void do_tzset() zi = map; if (map) { int scale = 2; - if (sizeof(time_t) > 4 && map[4]=='2') { + if (map[4]=='2') { size_t skip = zi_dotprod(zi+20, VEC(1,1,8,5,6,1), 6); trans = zi+skip+44+44; scale++; From a4a40f65aa89285df75839c90c9bcb8d3aee20be Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Sun, 22 Nov 2020 17:26:36 -0500 Subject: [PATCH 157/189] parse v3 or future-unknown zoneinfo file versions as v2+ the v1 zoneinfo format with 32-bit time is deprecated. previously, the v2 parsing code was only used if an exact match for '2' was found in the version field of the header. this was already incorrect for v3 files (trivial differences from v2 that arguably didn't merit a new version number anyway) but also failed to be future-proof. --- src/time/__tz.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/time/__tz.c b/src/time/__tz.c index 2eb6ab4e2..09a6317e6 100644 --- a/src/time/__tz.c +++ b/src/time/__tz.c @@ -185,7 +185,7 @@ static void do_tzset() zi = map; if (map) { int scale = 2; - if (map[4]=='2') { + if (map[4]!='1') { size_t skip = zi_dotprod(zi+20, VEC(1,1,8,5,6,1), 6); trans = zi+skip+44+44; scale++; From 4f78d921d4b1829e0ae453bf81e2221f662cdf2f Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Mon, 23 Nov 2020 19:44:19 -0500 Subject: [PATCH 158/189] work around linux bug in readlink syscall with zero buffer size linux fails with EINVAL when a zero buffer size is passed to the syscall. this is non-conforming because POSIX already defines EINVAL with a significantly different meaning: the target is not a symlink. since the request is semantically valid, patch it up by using a dummy buffer of length one, and truncating the return value to zero if it succeeds. --- src/unistd/readlink.c | 11 +++++++++-- src/unistd/readlinkat.c | 9 ++++++++- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/src/unistd/readlink.c b/src/unistd/readlink.c index a152d5249..32f4537f9 100644 --- a/src/unistd/readlink.c +++ b/src/unistd/readlink.c @@ -4,9 +4,16 @@ ssize_t readlink(const char *restrict path, char *restrict buf, size_t bufsize) { + char dummy[1]; + if (!bufsize) { + buf = dummy; + bufsize = 1; + } #ifdef SYS_readlink - return syscall(SYS_readlink, path, buf, bufsize); + int r = __syscall(SYS_readlink, path, buf, bufsize); #else - return syscall(SYS_readlinkat, AT_FDCWD, path, buf, bufsize); + int r = __syscall(SYS_readlinkat, AT_FDCWD, path, buf, bufsize); #endif + if (buf == dummy && r > 0) r = 0; + return __syscall_ret(r); } diff --git a/src/unistd/readlinkat.c b/src/unistd/readlinkat.c index 9af45cd5a..f79d3d142 100644 --- a/src/unistd/readlinkat.c +++ b/src/unistd/readlinkat.c @@ -3,5 +3,12 @@ ssize_t readlinkat(int fd, const char *restrict path, char *restrict buf, size_t bufsize) { - return syscall(SYS_readlinkat, fd, path, buf, bufsize); + char dummy[1]; + if (!bufsize) { + buf = dummy; + bufsize = 1; + } + int r = __syscall(SYS_readlinkat, fd, path, buf, bufsize); + if (buf == dummy && r > 0) r = 0; + return __syscall_ret(r); } From 334f5351afc91b40d18e31b8cfdc3aa9f2989776 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89rico=20Rolim?= Date: Thu, 26 Nov 2020 10:46:03 -0300 Subject: [PATCH 159/189] fix typo in INSTALL "big-engian" should be "big-endian". --- INSTALL | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/INSTALL b/INSTALL index 22477b6ba..c583691d7 100644 --- a/INSTALL +++ b/INSTALL @@ -86,7 +86,7 @@ and ABI combinations: * SuperH (SH) * Standard ELF ABI or FDPIC ABI (shared-text without MMU) - * Little-endian by default; big-engian variant also supported + * Little-endian by default; big-endian variant also supported * Full FPU ABI or soft-float ABI is supported, but the single-precision-only FPU ABI is not From e29e7d2fe854cb00b7f8383011a30bf50f4aa88c Mon Sep 17 00:00:00 2001 From: Jinliang Li Date: Fri, 20 Nov 2020 18:45:03 +0800 Subject: [PATCH 160/189] arm fabs and sqrt: support single-precision-only fpu variants --- src/math/arm/fabs.c | 2 +- src/math/arm/sqrt.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/math/arm/fabs.c b/src/math/arm/fabs.c index f890520a5..6e1d367d3 100644 --- a/src/math/arm/fabs.c +++ b/src/math/arm/fabs.c @@ -1,6 +1,6 @@ #include -#if __ARM_PCS_VFP +#if __ARM_PCS_VFP && __ARM_FP&8 double fabs(double x) { diff --git a/src/math/arm/sqrt.c b/src/math/arm/sqrt.c index 874af9606..567e2e910 100644 --- a/src/math/arm/sqrt.c +++ b/src/math/arm/sqrt.c @@ -1,6 +1,6 @@ #include -#if __ARM_PCS_VFP || (__VFP_FP__ && !__SOFTFP__) +#if (__ARM_PCS_VFP || (__VFP_FP__ && !__SOFTFP__)) && (__ARM_FP&8) double sqrt(double x) { From 07843e6a86de8d1fd497d6ab12ea3c0b30ea9f00 Mon Sep 17 00:00:00 2001 From: William Woodruff Date: Wed, 18 Nov 2020 10:59:31 -0500 Subject: [PATCH 161/189] add missing personality values Adds two missing personality(2) personas: UNAME26 and FDPIC_FUNCPTRS. FDPIC_FUNCPTRS was also missing its corresponding PER_LINUX_FDPIC value. --- include/sys/personality.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/sys/personality.h b/include/sys/personality.h index 31d43dfe1..411dc4756 100644 --- a/include/sys/personality.h +++ b/include/sys/personality.h @@ -5,7 +5,9 @@ extern "C" { #endif +#define UNAME26 0x0020000 #define ADDR_NO_RANDOMIZE 0x0040000 +#define FDPIC_FUNCPTRS 0x0080000 #define MMAP_PAGE_ZERO 0x0100000 #define ADDR_COMPAT_LAYOUT 0x0200000 #define READ_IMPLIES_EXEC 0x0400000 @@ -17,6 +19,7 @@ extern "C" { #define PER_LINUX 0 #define PER_LINUX_32BIT ADDR_LIMIT_32BIT +#define PER_LINUX_FDPIC FDPIC_FUNCPTRS #define PER_SVR4 (1 | STICKY_TIMEOUTS | MMAP_PAGE_ZERO) #define PER_SVR3 (2 | STICKY_TIMEOUTS | SHORT_INODE) #define PER_SCOSVR3 (3 | STICKY_TIMEOUTS | WHOLE_SECONDS | SHORT_INODE) From ff712f5e1e3d81e40825ce6be2af06ace2e4bfd8 Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Sat, 24 Oct 2020 10:15:43 +0000 Subject: [PATCH 162/189] bits/syscall.h: add __NR_close_range from linux v5.9 see linux commit 9b4feb630e8e9801603f3cab3a36369e3c1cf88d arch: wire-up close_range() linux commit 278a5fbaed89dacd04e9d052f4594ffd0e0585de open: add close_range() --- arch/aarch64/bits/syscall.h.in | 1 + arch/arm/bits/syscall.h.in | 1 + arch/i386/bits/syscall.h.in | 1 + arch/m68k/bits/syscall.h.in | 1 + arch/microblaze/bits/syscall.h.in | 1 + arch/mips/bits/syscall.h.in | 1 + arch/mips64/bits/syscall.h.in | 1 + arch/mipsn32/bits/syscall.h.in | 1 + arch/or1k/bits/syscall.h.in | 1 + arch/powerpc/bits/syscall.h.in | 1 + arch/powerpc64/bits/syscall.h.in | 1 + arch/riscv64/bits/syscall.h.in | 1 + arch/s390x/bits/syscall.h.in | 1 + arch/sh/bits/syscall.h.in | 1 + arch/x32/bits/syscall.h.in | 1 + arch/x86_64/bits/syscall.h.in | 1 + 16 files changed, 16 insertions(+) diff --git a/arch/aarch64/bits/syscall.h.in b/arch/aarch64/bits/syscall.h.in index ac3eaf803..f9457c184 100644 --- a/arch/aarch64/bits/syscall.h.in +++ b/arch/aarch64/bits/syscall.h.in @@ -289,6 +289,7 @@ #define __NR_fspick 433 #define __NR_pidfd_open 434 #define __NR_clone3 435 +#define __NR_close_range 436 #define __NR_openat2 437 #define __NR_pidfd_getfd 438 #define __NR_faccessat2 439 diff --git a/arch/arm/bits/syscall.h.in b/arch/arm/bits/syscall.h.in index 5b4e67910..7e2fc2669 100644 --- a/arch/arm/bits/syscall.h.in +++ b/arch/arm/bits/syscall.h.in @@ -389,6 +389,7 @@ #define __NR_fspick 433 #define __NR_pidfd_open 434 #define __NR_clone3 435 +#define __NR_close_range 436 #define __NR_openat2 437 #define __NR_pidfd_getfd 438 #define __NR_faccessat2 439 diff --git a/arch/i386/bits/syscall.h.in b/arch/i386/bits/syscall.h.in index fb562db5e..abdb210d3 100644 --- a/arch/i386/bits/syscall.h.in +++ b/arch/i386/bits/syscall.h.in @@ -426,6 +426,7 @@ #define __NR_fspick 433 #define __NR_pidfd_open 434 #define __NR_clone3 435 +#define __NR_close_range 436 #define __NR_openat2 437 #define __NR_pidfd_getfd 438 #define __NR_faccessat2 439 diff --git a/arch/m68k/bits/syscall.h.in b/arch/m68k/bits/syscall.h.in index 93703b46c..e10969a26 100644 --- a/arch/m68k/bits/syscall.h.in +++ b/arch/m68k/bits/syscall.h.in @@ -406,6 +406,7 @@ #define __NR_fspick 433 #define __NR_pidfd_open 434 #define __NR_clone3 435 +#define __NR_close_range 436 #define __NR_openat2 437 #define __NR_pidfd_getfd 438 #define __NR_faccessat2 439 diff --git a/arch/microblaze/bits/syscall.h.in b/arch/microblaze/bits/syscall.h.in index 1e78dfde6..9d4690473 100644 --- a/arch/microblaze/bits/syscall.h.in +++ b/arch/microblaze/bits/syscall.h.in @@ -427,6 +427,7 @@ #define __NR_fspick 433 #define __NR_pidfd_open 434 #define __NR_clone3 435 +#define __NR_close_range 436 #define __NR_openat2 437 #define __NR_pidfd_getfd 438 #define __NR_faccessat2 439 diff --git a/arch/mips/bits/syscall.h.in b/arch/mips/bits/syscall.h.in index 5b2066ef1..2bb03f067 100644 --- a/arch/mips/bits/syscall.h.in +++ b/arch/mips/bits/syscall.h.in @@ -408,6 +408,7 @@ #define __NR_fspick 4433 #define __NR_pidfd_open 4434 #define __NR_clone3 4435 +#define __NR_close_range 4436 #define __NR_openat2 4437 #define __NR_pidfd_getfd 4438 #define __NR_faccessat2 4439 diff --git a/arch/mips64/bits/syscall.h.in b/arch/mips64/bits/syscall.h.in index 30cb321f7..045e8238a 100644 --- a/arch/mips64/bits/syscall.h.in +++ b/arch/mips64/bits/syscall.h.in @@ -338,6 +338,7 @@ #define __NR_fspick 5433 #define __NR_pidfd_open 5434 #define __NR_clone3 5435 +#define __NR_close_range 5436 #define __NR_openat2 5437 #define __NR_pidfd_getfd 5438 #define __NR_faccessat2 5439 diff --git a/arch/mipsn32/bits/syscall.h.in b/arch/mipsn32/bits/syscall.h.in index 12eae0346..5b3225585 100644 --- a/arch/mipsn32/bits/syscall.h.in +++ b/arch/mipsn32/bits/syscall.h.in @@ -362,6 +362,7 @@ #define __NR_fspick 6433 #define __NR_pidfd_open 6434 #define __NR_clone3 6435 +#define __NR_close_range 6436 #define __NR_openat2 6437 #define __NR_pidfd_getfd 6438 #define __NR_faccessat2 6439 diff --git a/arch/or1k/bits/syscall.h.in b/arch/or1k/bits/syscall.h.in index bc9def13b..b36038915 100644 --- a/arch/or1k/bits/syscall.h.in +++ b/arch/or1k/bits/syscall.h.in @@ -311,6 +311,7 @@ #define __NR_fspick 433 #define __NR_pidfd_open 434 #define __NR_clone3 435 +#define __NR_close_range 436 #define __NR_openat2 437 #define __NR_pidfd_getfd 438 #define __NR_faccessat2 439 diff --git a/arch/powerpc/bits/syscall.h.in b/arch/powerpc/bits/syscall.h.in index 2d4c5dfce..5c6fae3e5 100644 --- a/arch/powerpc/bits/syscall.h.in +++ b/arch/powerpc/bits/syscall.h.in @@ -415,6 +415,7 @@ #define __NR_fspick 433 #define __NR_pidfd_open 434 #define __NR_clone3 435 +#define __NR_close_range 436 #define __NR_openat2 437 #define __NR_pidfd_getfd 438 #define __NR_faccessat2 439 diff --git a/arch/powerpc64/bits/syscall.h.in b/arch/powerpc64/bits/syscall.h.in index 2a5c70341..edf73d3d6 100644 --- a/arch/powerpc64/bits/syscall.h.in +++ b/arch/powerpc64/bits/syscall.h.in @@ -387,6 +387,7 @@ #define __NR_fspick 433 #define __NR_pidfd_open 434 #define __NR_clone3 435 +#define __NR_close_range 436 #define __NR_openat2 437 #define __NR_pidfd_getfd 438 #define __NR_faccessat2 439 diff --git a/arch/riscv64/bits/syscall.h.in b/arch/riscv64/bits/syscall.h.in index 439712a49..5def016b1 100644 --- a/arch/riscv64/bits/syscall.h.in +++ b/arch/riscv64/bits/syscall.h.in @@ -289,6 +289,7 @@ #define __NR_fspick 433 #define __NR_pidfd_open 434 #define __NR_clone3 435 +#define __NR_close_range 436 #define __NR_openat2 437 #define __NR_pidfd_getfd 438 #define __NR_faccessat2 439 diff --git a/arch/s390x/bits/syscall.h.in b/arch/s390x/bits/syscall.h.in index 4c04abc5f..fb2e60e30 100644 --- a/arch/s390x/bits/syscall.h.in +++ b/arch/s390x/bits/syscall.h.in @@ -352,6 +352,7 @@ #define __NR_fspick 433 #define __NR_pidfd_open 434 #define __NR_clone3 435 +#define __NR_close_range 436 #define __NR_openat2 437 #define __NR_pidfd_getfd 438 #define __NR_faccessat2 439 diff --git a/arch/sh/bits/syscall.h.in b/arch/sh/bits/syscall.h.in index 3942dea2a..158afc091 100644 --- a/arch/sh/bits/syscall.h.in +++ b/arch/sh/bits/syscall.h.in @@ -399,6 +399,7 @@ #define __NR_fspick 433 #define __NR_pidfd_open 434 #define __NR_clone3 435 +#define __NR_close_range 436 #define __NR_openat2 437 #define __NR_pidfd_getfd 438 #define __NR_faccessat2 439 diff --git a/arch/x32/bits/syscall.h.in b/arch/x32/bits/syscall.h.in index e4c4bd06a..cfd9856f1 100644 --- a/arch/x32/bits/syscall.h.in +++ b/arch/x32/bits/syscall.h.in @@ -298,6 +298,7 @@ #define __NR_fspick (0x40000000 + 433) #define __NR_pidfd_open (0x40000000 + 434) #define __NR_clone3 (0x40000000 + 435) +#define __NR_close_range (0x40000000 + 436) #define __NR_openat2 (0x40000000 + 437) #define __NR_pidfd_getfd (0x40000000 + 438) #define __NR_faccessat2 (0x40000000 + 439) diff --git a/arch/x86_64/bits/syscall.h.in b/arch/x86_64/bits/syscall.h.in index 12a869801..a61179510 100644 --- a/arch/x86_64/bits/syscall.h.in +++ b/arch/x86_64/bits/syscall.h.in @@ -345,6 +345,7 @@ #define __NR_fspick 433 #define __NR_pidfd_open 434 #define __NR_clone3 435 +#define __NR_close_range 436 #define __NR_openat2 437 #define __NR_pidfd_getfd 438 #define __NR_faccessat2 439 From 75a0177693c31429e4e6420ac9dbb5d44b17d33d Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Sun, 25 Oct 2020 16:25:34 +0000 Subject: [PATCH 163/189] sys/fanotify.h: add new FAN_* macros from linux v5.9 Update fanotify.h, see linux commit 929943b38daf817f2e6d303ea04401651fc3bc05 fanotify: add support for FAN_REPORT_NAME linux commit 83b7a59896dd24015a34b7f00027f0ff3747972f fanotify: add basic support for FAN_REPORT_DIR_FID linux commit 08b95c338e0c5a96e47f4ca314ea1e7580ecb5d7 fanotify: remove event FAN_DIR_MODIFY FAN_DIR_MODIFY that was new in v5.7 is now removed from linux uapi, but kept in musl, so we don't break api, linux cannot reuse the value anyway. --- include/sys/fanotify.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/sys/fanotify.h b/include/sys/fanotify.h index 757667909..10e5f15e2 100644 --- a/include/sys/fanotify.h +++ b/include/sys/fanotify.h @@ -71,6 +71,9 @@ struct fanotify_response { #define FAN_ENABLE_AUDIT 0x40 #define FAN_REPORT_TID 0x100 #define FAN_REPORT_FID 0x200 +#define FAN_REPORT_DIR_FID 0x00000400 +#define FAN_REPORT_NAME 0x00000800 +#define FAN_REPORT_DFID_NAME (FAN_REPORT_DIR_FID | FAN_REPORT_NAME) #define FAN_ALL_INIT_FLAGS (FAN_CLOEXEC | FAN_NONBLOCK | FAN_ALL_CLASS_BITS | FAN_UNLIMITED_QUEUE | FAN_UNLIMITED_MARKS) #define FAN_MARK_ADD 0x01 #define FAN_MARK_REMOVE 0x02 @@ -90,6 +93,7 @@ struct fanotify_response { #define FANOTIFY_METADATA_VERSION 3 #define FAN_EVENT_INFO_TYPE_FID 1 #define FAN_EVENT_INFO_TYPE_DFID_NAME 2 +#define FAN_EVENT_INFO_TYPE_DFID 3 #define FAN_ALLOW 0x01 #define FAN_DENY 0x02 #define FAN_AUDIT 0x10 From b0862c91483a2666df0a03e9c15daf4868ab873f Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Sun, 25 Oct 2020 16:42:41 +0000 Subject: [PATCH 164/189] netinet/in.h: add IP_RECVERR_4884 from linux v5.9 see linux commit eba75c587e811d3249c8bd50d22bb2266ccd3c0f icmp: support rfc 4884 --- include/netinet/in.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/netinet/in.h b/include/netinet/in.h index 36a2013ad..f9594339f 100644 --- a/include/netinet/in.h +++ b/include/netinet/in.h @@ -202,6 +202,7 @@ uint16_t ntohs(uint16_t); #define IP_CHECKSUM 23 #define IP_BIND_ADDRESS_NO_PORT 24 #define IP_RECVFRAGSIZE 25 +#define IP_RECVERR_RFC4884 26 #define IP_MULTICAST_IF 32 #define IP_MULTICAST_TTL 33 #define IP_MULTICAST_LOOP 34 From ef8faa2932c2aa70dc0e506ba4f1446c67aa2d78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89rico=20Rolim?= Date: Thu, 12 Nov 2020 17:52:23 -0300 Subject: [PATCH 165/189] fix segfault in lutimes when tv argument is NULL calling lutimes with tv=0 is valid if the application wants to set the timestamps to the current time. this commit makes it so the timespec struct is populated with values from tv only if tv != 0 and calls utimensat with times=0 if tv == 0. --- src/legacy/lutimes.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/legacy/lutimes.c b/src/legacy/lutimes.c index 2e5502d1e..dd465923a 100644 --- a/src/legacy/lutimes.c +++ b/src/legacy/lutimes.c @@ -6,9 +6,11 @@ int lutimes(const char *filename, const struct timeval tv[2]) { struct timespec times[2]; - times[0].tv_sec = tv[0].tv_sec; - times[0].tv_nsec = tv[0].tv_usec * 1000; - times[1].tv_sec = tv[1].tv_sec; - times[1].tv_nsec = tv[1].tv_usec * 1000; - return utimensat(AT_FDCWD, filename, times, AT_SYMLINK_NOFOLLOW); + if (tv) { + times[0].tv_sec = tv[0].tv_sec; + times[0].tv_nsec = tv[0].tv_usec * 1000; + times[1].tv_sec = tv[1].tv_sec; + times[1].tv_nsec = tv[1].tv_usec * 1000; + } + return utimensat(AT_FDCWD, filename, tv ? times : 0, AT_SYMLINK_NOFOLLOW); } From 1c715dc4707c57f64605706adde87f35e6a204d4 Mon Sep 17 00:00:00 2001 From: "Issam E. Maghni" Date: Mon, 2 Nov 2020 17:16:41 -0500 Subject: [PATCH 166/189] configure: do not use obsolescent form of test -a|o The -a and -o operators are obsolescent and not in baseline POSIX. --- configure | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/configure b/configure index 947adf419..a5231a0eb 100755 --- a/configure +++ b/configure @@ -204,7 +204,7 @@ fi abs_builddir="$(pwd)" || fail "$0: cannot determine working directory" abs_srcdir="$(cd $srcdir && pwd)" || fail "$0: invalid source directory $srcdir" test "$abs_srcdir" = "$abs_builddir" && srcdir=. -test "$srcdir" != "." -a -f Makefile -a ! -h Makefile && fail "$0: Makefile already exists in the working directory" +test "$srcdir" != "." && test -f Makefile && test ! -h Makefile && fail "$0: Makefile already exists in the working directory" # # Get a temp filename we can use @@ -279,7 +279,7 @@ echo "$cc_family" # # Figure out toolchain wrapper to build # -if test "$wrapper" = auto -o "$wrapper" = detect ; then +if test "$wrapper" = auto || test "$wrapper" = detect ; then echo "#include " > "$tmpc" echo "#if ! __GLIBC__" >> "$tmpc" echo "#error no" >> "$tmpc" @@ -468,7 +468,7 @@ tryflag CFLAGS_AUTO -pipe # pointer is no longer needed for debugging. # if fnmatch '-g*|*\ -g*' "$CFLAGS_AUTO $CFLAGS" ; then : -else +else tryflag CFLAGS_AUTO -fomit-frame-pointer fi From e1b7a0670cae397759bee279a75c419c4cb2ff83 Mon Sep 17 00:00:00 2001 From: Dominic Chen Date: Wed, 25 Nov 2020 02:53:16 -0500 Subject: [PATCH 167/189] fix mallocng regression in malloc_usable_size with null argument commit d1507646975cbf6c3e511ba07b193f27f032d108 added support for null argument in oldmalloc and was overlooked when switching to mallocng. --- src/malloc/mallocng/malloc_usable_size.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/malloc/mallocng/malloc_usable_size.c b/src/malloc/mallocng/malloc_usable_size.c index a440a4eab..ce6a960c6 100644 --- a/src/malloc/mallocng/malloc_usable_size.c +++ b/src/malloc/mallocng/malloc_usable_size.c @@ -3,6 +3,7 @@ size_t malloc_usable_size(void *p) { + if (!p) return 0; struct meta *g = get_meta(p); int idx = get_slot_index(p); size_t stride = get_stride(g); From a5627588fb7f7c90e2ee75b3e1a5fbd3acbe62e5 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Mon, 30 Nov 2020 12:14:47 -0500 Subject: [PATCH 168/189] implement realpath directly instead of using procfs readlink inability to use realpath in chroot/container without procfs access and at early boot prior to mount of /proc has been an ongoing issue, and it turns out realpath was one of the last remaining interfaces that needed procfs for its core functionality. during investigation while reimplementing, it was determined that there were also serious problems with the procfs-based implementation. most seriously it was unsafe on pre-O_PATH kernels, and unlike other places where O_PATH was used, the unsafety was hard or impossible to fix because O_NOFOLLOW can't be used (since the whole purpose was to follow symlinks). the new implementation is a direct one, performing readlink on each path component to resolve it. an explicit stack, as opposed to recursion, is used to represent the remaining components to be processed. the stack starts out holding just the input string, and reading a link pushes the link contents onto the stack. unlike many other implementations, this one does not call getcwd initially for relative pathnames. instead it accumulates initial .. components to be applied to the working directory if the result is still a relative path. this avoids calling getcwd (which may fail) at all when symlink traversal will eventually yield an absolute path. it also doesn't use any form of stat operation; instead it arranges for readlink to tell it when a non-directory is used in a context where a directory is needed. this minimizes the number of syscalls needed, avoids accessing inodes when the directory table suffices, and reduces the amount of code pulled in for static linking. --- src/misc/realpath.c | 159 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 136 insertions(+), 23 deletions(-) diff --git a/src/misc/realpath.c b/src/misc/realpath.c index d2708e59d..db8b74dc8 100644 --- a/src/misc/realpath.c +++ b/src/misc/realpath.c @@ -1,43 +1,156 @@ #include #include -#include -#include #include #include #include -#include "syscall.h" + +static size_t slash_len(const char *s) +{ + const char *s0 = s; + while (*s == '/') s++; + return s-s0; +} char *realpath(const char *restrict filename, char *restrict resolved) { - int fd; - ssize_t r; - struct stat st1, st2; - char buf[15+3*sizeof(int)]; - char tmp[PATH_MAX]; + char stack[PATH_MAX+1]; + char output[PATH_MAX]; + size_t p, q, l, l0, cnt=0, nup=0; + int check_dir=0; if (!filename) { errno = EINVAL; return 0; } + l = strnlen(filename, sizeof stack); + if (!l) { + errno = ENOENT; + return 0; + } + if (l >= PATH_MAX) goto toolong; + p = sizeof stack - l - 1; + q = 0; + memcpy(stack+p, filename, l+1); + + /* Main loop. Each iteration pops the next part from stack of + * remaining path components and consumes any slashes that follow. + * If not a link, it's moved to output; if a link, contents are + * pushed to the stack. */ +restart: + for (; ; p+=slash_len(stack+p)) { + /* If stack starts with /, the whole component is / or // + * and the output state must be reset. */ + if (stack[p] == '/') { + check_dir=0; + nup=0; + q=0; + output[q++] = '/'; + p++; + /* Initial // is special. */ + if (stack[p] == '/' && stack[p+1] != '/') + output[q++] = '/'; + continue; + } + + char *z = __strchrnul(stack+p, '/'); + l0 = l = z-(stack+p); - fd = sys_open(filename, O_PATH|O_NONBLOCK|O_CLOEXEC); - if (fd < 0) return 0; - __procfdname(buf, fd); + if (!l && !check_dir) break; - r = readlink(buf, tmp, sizeof tmp - 1); - if (r < 0) goto err; - tmp[r] = 0; + /* Skip any . component but preserve check_dir status. */ + if (l==1 && stack[p]=='.') { + p += l; + continue; + } - fstat(fd, &st1); - r = stat(tmp, &st2); - if (r<0 || st1.st_dev != st2.st_dev || st1.st_ino != st2.st_ino) { - if (!r) errno = ELOOP; - goto err; + /* Copy next component onto output at least temporarily, to + * call readlink, but wait to advance output position until + * determining it's not a link. */ + if (q && output[q-1] != '/') { + if (!p) goto toolong; + stack[--p] = '/'; + l++; + } + if (q+l >= PATH_MAX) goto toolong; + memcpy(output+q, stack+p, l); + output[q+l] = 0; + p += l; + + int up = 0; + if (l0==2 && stack[p-2]=='.' && stack[p-1]=='.') { + up = 1; + /* Any non-.. path components we could cancel start + * after nup repetitions of the 3-byte string "../"; + * if there are none, accumulate .. components to + * later apply to cwd, if needed. */ + if (q <= 3*nup) { + nup++; + q += l; + continue; + } + /* When previous components are already known to be + * directories, processing .. can skip readlink. */ + if (!check_dir) goto skip_readlink; + } + ssize_t k = readlink(output, stack, p); + if (k==p) goto toolong; + if (!k) { + errno = ENOENT; + return 0; + } + if (k<0) { + if (errno != EINVAL) return 0; +skip_readlink: + check_dir = 0; + if (up) { + while(q && output[q-1]!='/') q--; + if (q>1 && (q>2 || output[0]!='/')) q--; + continue; + } + if (l0) q += l; + check_dir = stack[p]; + continue; + } + if (++cnt == SYMLOOP_MAX) { + errno = ELOOP; + return 0; + } + + /* If link contents end in /, strip any slashes already on + * stack to avoid /->// or //->/// or spurious toolong. */ + if (stack[k-1]=='/') while (stack[p]=='/') p++; + p -= k; + memmove(stack+p, stack, k); + + /* Skip the stack advancement in case we have a new + * absolute base path. */ + goto restart; } - __syscall(SYS_close, fd); - return resolved ? strcpy(resolved, tmp) : strdup(tmp); -err: - __syscall(SYS_close, fd); + output[q] = 0; + + if (output[0] != '/') { + if (!getcwd(stack, sizeof stack)) return 0; + l = strlen(stack); + /* Cancel any initial .. components. */ + p = 0; + while (nup--) { + while(l>1 && stack[l-1]!='/') l--; + if (l>1) l--; + p += 2; + if (p= PATH_MAX) goto toolong; + memmove(output + l, output + p, q - p + 1); + memcpy(output, stack, l); + q = l + q-p; + } + + if (resolved) return memcpy(resolved, output, q+1); + else return strdup(output); + +toolong: + errno = ENAMETOOLONG; return 0; } From 5f563499c73825f88b7879c251833d7a91c21b37 Mon Sep 17 00:00:00 2001 From: Ariadne Conill Date: Sat, 1 Aug 2020 08:26:35 -0600 Subject: [PATCH 169/189] implement reallocarray reallocarray is an extension introduced by OpenBSD, which introduces calloc overflow checking to realloc. glibc 2.28 introduced support for this function behind _GNU_SOURCE, while glibc 2.29 allows its usage in _DEFAULT_SOURCE. --- include/stdlib.h | 1 + src/malloc/reallocarray.c | 13 +++++++++++++ 2 files changed, 14 insertions(+) create mode 100644 src/malloc/reallocarray.c diff --git a/include/stdlib.h b/include/stdlib.h index 194c20339..b54a051fe 100644 --- a/include/stdlib.h +++ b/include/stdlib.h @@ -145,6 +145,7 @@ int getloadavg(double *, int); int clearenv(void); #define WCOREDUMP(s) ((s) & 0x80) #define WIFCONTINUED(s) ((s) == 0xffff) +void *reallocarray (void *, size_t, size_t); #endif #ifdef _GNU_SOURCE diff --git a/src/malloc/reallocarray.c b/src/malloc/reallocarray.c new file mode 100644 index 000000000..4a6ebe460 --- /dev/null +++ b/src/malloc/reallocarray.c @@ -0,0 +1,13 @@ +#define _BSD_SOURCE +#include +#include + +void *reallocarray(void *ptr, size_t m, size_t n) +{ + if (n && m > -1 / n) { + errno = ENOMEM; + return 0; + } + + return realloc(ptr, m * n); +} From eca5e00d1bac72e5e8e88c83fa017db0a6188e8c Mon Sep 17 00:00:00 2001 From: Marius Hillenbrand Date: Tue, 1 Dec 2020 15:36:34 +0100 Subject: [PATCH 170/189] s390x: derive float_t from compiler or default to float float_t should represent the type that is used to evaluate float expressions internally. On s390x, float_t is currently set to double. In contrast, the isa supports single-precision float operations and compilers by default evaluate float in single precision, which violates the C standard (sections 5.2.4.2.2 and 7.12 in C11/C17, to be precise). With -fexcess-precision=standard, gcc evaluates float in double precision, which aligns with the standard yet at the cost of added conversion instructions. gcc-11 will drop the special case to retrofit double precision behavior for -fexcess-precision=standard so that __FLT_EVAL_METHOD__ will be 0 on s390x in any scenario. To improve standards compliance and compatibility with future compiler direction, this patch changes the definition of float_t to be derived from the compiler's __FLT_EVAL_METHOD__. --- arch/s390x/bits/alltypes.h.in | 4 ++++ arch/s390x/bits/float.h | 6 +++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/s390x/bits/alltypes.h.in b/arch/s390x/bits/alltypes.h.in index 15d18c8f4..6c0eb7f4b 100644 --- a/arch/s390x/bits/alltypes.h.in +++ b/arch/s390x/bits/alltypes.h.in @@ -9,7 +9,11 @@ TYPEDEF int wchar_t; #endif +#if defined(__FLT_EVAL_METHOD__) && __FLT_EVAL_METHOD__ == 1 TYPEDEF double float_t; +#else +TYPEDEF float float_t; +#endif TYPEDEF double double_t; TYPEDEF struct { long long __ll; long double __ld; } max_align_t; diff --git a/arch/s390x/bits/float.h b/arch/s390x/bits/float.h index 90b73beed..e188cb619 100644 --- a/arch/s390x/bits/float.h +++ b/arch/s390x/bits/float.h @@ -1,4 +1,8 @@ -#define FLT_EVAL_METHOD 1 +#ifdef __FLT_EVAL_METHOD__ +#define FLT_EVAL_METHOD __FLT_EVAL_METHOD__ +#else +#define FLT_EVAL_METHOD 0 +#endif #define LDBL_TRUE_MIN 6.47517511943802511092443895822764655e-4966L #define LDBL_MIN 3.36210314311209350626267781732175260e-4932L From 9734319adb46dfc29cb50ffcdf507382df68be0a Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Fri, 4 Dec 2020 17:01:05 -0500 Subject: [PATCH 171/189] fix failure to preserve r6 in s390x asm; per ABI it is call-saved both __clone and __syscall_cp_asm failed to restore the original value of r6 after using it as a syscall argument register. the extent of breakage is not known, and in some cases may be mitigated by the only callers being internal to libc; if they used r6 but no longer needed its value after the call, they may not have noticed the problem. however at least posix_spawn (which uses __clone) was observed returning to the application with the wrong value in r6, leading to crash. since the call frame ABI already provides a place to spill registers, fixing this is just a matter of using it. in __clone, we also spuriously restore r6 in the child, since the parent branch directly returns to the caller. this takes the value from an uninitialized slot of the child's stack, but is harmless since there is no caller to return to in the child. --- src/thread/s390x/clone.s | 6 ++++++ src/thread/s390x/syscall_cp.s | 2 ++ 2 files changed, 8 insertions(+) diff --git a/src/thread/s390x/clone.s b/src/thread/s390x/clone.s index 577748eab..2125f20b8 100644 --- a/src/thread/s390x/clone.s +++ b/src/thread/s390x/clone.s @@ -17,6 +17,9 @@ __clone: # if (!tid) syscall(SYS_exit, a(d)); # return tid; + # preserve call-saved register used as syscall arg + stg %r6, 48(%r15) + # create initial stack frame for new thread nill %r3, 0xfff8 aghi %r3, -160 @@ -35,6 +38,9 @@ __clone: lg %r6, 160(%r15) svc 120 + # restore call-saved register + lg %r6, 48(%r15) + # if error or if we're the parent, return ltgr %r2, %r2 bnzr %r14 diff --git a/src/thread/s390x/syscall_cp.s b/src/thread/s390x/syscall_cp.s index c1da40de8..d094cbf5a 100644 --- a/src/thread/s390x/syscall_cp.s +++ b/src/thread/s390x/syscall_cp.s @@ -14,6 +14,7 @@ __cp_begin: icm %r2, 15, 0(%r2) jne __cp_cancel + stg %r6, 48(%r15) stg %r7, 56(%r15) lgr %r1, %r3 lgr %r2, %r4 @@ -26,6 +27,7 @@ __cp_begin: __cp_end: lg %r7, 56(%r15) + lg %r6, 48(%r15) br %r14 __cp_cancel: From e7639767c1679ddba734d5e1b5e2411f0d8b1f1b Mon Sep 17 00:00:00 2001 From: Drew DeVault Date: Sat, 5 Dec 2020 18:10:06 +0000 Subject: [PATCH 172/189] riscv64: fix inconsistent ucontext_t struct tag ucontext.h depends on the internal struct tag name for namespacing reasons, and the intent was always for it to be consistent across archs anyway. --- arch/riscv64/bits/signal.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv64/bits/signal.h b/arch/riscv64/bits/signal.h index b006334f7..287367db7 100644 --- a/arch/riscv64/bits/signal.h +++ b/arch/riscv64/bits/signal.h @@ -60,10 +60,10 @@ struct sigaltstack { size_t ss_size; }; -typedef struct ucontext_t +typedef struct __ucontext { unsigned long uc_flags; - struct ucontext_t *uc_link; + struct __ucontext *uc_link; stack_t uc_stack; sigset_t uc_sigmask; mcontext_t uc_mcontext; From 258aa22e3024fe0cc30bab3c2baa25ee154d28e4 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Mon, 7 Dec 2020 17:25:08 -0500 Subject: [PATCH 173/189] fix omission of non-stub pthread_mutexattr_getprotocol this change should have been made when priority inheritance mutex support was added. if priority protection is also added at some point the implementation will need to change and will probably no longer be a simple bit shuffling. --- src/thread/pthread_attr_get.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/thread/pthread_attr_get.c b/src/thread/pthread_attr_get.c index 4aa5afdb2..f12ff4425 100644 --- a/src/thread/pthread_attr_get.c +++ b/src/thread/pthread_attr_get.c @@ -70,7 +70,7 @@ int pthread_condattr_getpshared(const pthread_condattr_t *restrict a, int *restr int pthread_mutexattr_getprotocol(const pthread_mutexattr_t *restrict a, int *restrict protocol) { - *protocol = PTHREAD_PRIO_NONE; + *protocol = a->__attr / 8U % 2; return 0; } int pthread_mutexattr_getpshared(const pthread_mutexattr_t *restrict a, int *restrict pshared) From 207996716093dc60a444c3a06369a2aaa72588fc Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Tue, 8 Dec 2020 18:02:39 -0500 Subject: [PATCH 174/189] drop use of pthread_once for aio thread stack size init pthread_once is not compatible with MT-fork constraints (commit 167390f05564e0a4d3fcb4329377fd7743267560) and is not needed here anyway; we already have a lock suitable for initialization. while changing this, fix a corner case where AT_MINSIGSTKSZ gives a value that's more than MINSIGSTKSZ but by a margin of less than 2048, thereby causing the size to be reduced. it shouldn't matter but the intent was to be the larger of a 2048-byte margin over the legacy fixed minimum stack requirement or a 512-byte margin over the minimum the kernel reports at runtime. --- src/aio/aio.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/src/aio/aio.c b/src/aio/aio.c index e004f98bc..a1a3e7914 100644 --- a/src/aio/aio.c +++ b/src/aio/aio.c @@ -76,6 +76,10 @@ static struct aio_queue *****map; static volatile int aio_fd_cnt; volatile int __aio_fut; +static size_t io_thread_stack_size; + +#define MAX(a,b) ((a)>(b) ? (a) : (b)) + static struct aio_queue *__aio_get_queue(int fd, int need) { if (fd < 0) { @@ -90,6 +94,10 @@ static struct aio_queue *__aio_get_queue(int fd, int need) pthread_rwlock_unlock(&maplock); if (fcntl(fd, F_GETFD) < 0) return 0; pthread_rwlock_wrlock(&maplock); + if (!io_thread_stack_size) { + unsigned long val = __getauxval(AT_MINSIGSTKSZ); + io_thread_stack_size = MAX(MINSIGSTKSZ+2048, val+512); + } if (!map) map = calloc(sizeof *map, (-1U/2+1)>>24); if (!map) goto out; if (!map[a]) map[a] = calloc(sizeof **map, 256); @@ -265,15 +273,6 @@ static void *io_thread_func(void *ctx) return 0; } -static size_t io_thread_stack_size = MINSIGSTKSZ+2048; -static pthread_once_t init_stack_size_once; - -static void init_stack_size() -{ - unsigned long val = __getauxval(AT_MINSIGSTKSZ); - if (val > MINSIGSTKSZ) io_thread_stack_size = val + 512; -} - static int submit(struct aiocb *cb, int op) { int ret = 0; @@ -299,7 +298,6 @@ static int submit(struct aiocb *cb, int op) else pthread_attr_init(&a); } else { - pthread_once(&init_stack_size_once, init_stack_size); pthread_attr_init(&a); pthread_attr_setstacksize(&a, io_thread_stack_size); pthread_attr_setguardsize(&a, 0); From ce77816aa7ed46e961008d61b879ac9e6b5d0fcb Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Wed, 9 Dec 2020 11:34:29 -0500 Subject: [PATCH 175/189] fix misleading comment in strstr the intent here is just to scan at least l bytes forward for the end of the haystack and at least some decent minimum to avoid doing it over and over if the needle is short, with no need to be precise. the comment erroneously stated this as an estimate for MIN when it's actually an estimate for MAX. --- src/string/strstr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/string/strstr.c b/src/string/strstr.c index 43a0207a7..96657bc23 100644 --- a/src/string/strstr.c +++ b/src/string/strstr.c @@ -96,7 +96,7 @@ static char *twoway_strstr(const unsigned char *h, const unsigned char *n) for (;;) { /* Update incremental end-of-haystack pointer */ if (z-h < l) { - /* Fast estimate for MIN(l,63) */ + /* Fast estimate for MAX(l,63) */ size_t grow = l | 63; const unsigned char *z2 = memchr(z, 0, grow); if (z2) { From 4f8f6eebc28008d830e1a407764fe4c3c9d526fe Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Wed, 9 Dec 2020 16:58:32 -0500 Subject: [PATCH 176/189] lift locale lock out of internal __get_locale this allows the lock to be shared with setlocale, eliminates repeated per-category lock/unlock in newlocale, and will allow the use of pthread_once in newlocale to be dropped (to be done separately). --- src/internal/locale_impl.h | 2 ++ src/locale/locale_map.c | 13 ++----------- src/locale/newlocale.c | 11 ++++++++++- src/locale/setlocale.c | 11 +++++------ 4 files changed, 19 insertions(+), 18 deletions(-) diff --git a/src/internal/locale_impl.h b/src/internal/locale_impl.h index 741a71c4d..4431a92eb 100644 --- a/src/internal/locale_impl.h +++ b/src/internal/locale_impl.h @@ -15,6 +15,8 @@ struct __locale_map { const struct __locale_map *next; }; +extern hidden volatile int __locale_lock[1]; + extern hidden const struct __locale_map __c_dot_utf8; extern hidden const struct __locale_struct __c_locale; extern hidden const struct __locale_struct __c_dot_utf8_locale; diff --git a/src/locale/locale_map.c b/src/locale/locale_map.c index fa51f2e36..da61f7fc0 100644 --- a/src/locale/locale_map.c +++ b/src/locale/locale_map.c @@ -28,8 +28,8 @@ static const char envvars[][12] = { "LC_MESSAGES", }; -static volatile int lock[1]; -volatile int *const __locale_lockptr = lock; +volatile int __locale_lock[1]; +volatile int *const __locale_lockptr = __locale_lock; const struct __locale_map *__get_locale(int cat, const char *val) { @@ -63,14 +63,6 @@ const struct __locale_map *__get_locale(int cat, const char *val) for (p=loc_head; p; p=p->next) if (!strcmp(val, p->name)) return p; - LOCK(lock); - - for (p=loc_head; p; p=p->next) - if (!strcmp(val, p->name)) { - UNLOCK(lock); - return p; - } - if (!libc.secure) path = getenv("MUSL_LOCPATH"); /* FIXME: add a default path? */ @@ -117,6 +109,5 @@ const struct __locale_map *__get_locale(int cat, const char *val) * requested name was "C" or "POSIX". */ if (!new && cat == LC_CTYPE) new = (void *)&__c_dot_utf8; - UNLOCK(lock); return new; } diff --git a/src/locale/newlocale.c b/src/locale/newlocale.c index d20a84898..8eee2e10c 100644 --- a/src/locale/newlocale.c +++ b/src/locale/newlocale.c @@ -2,6 +2,7 @@ #include #include #include "locale_impl.h" +#include "lock.h" static pthread_once_t default_locale_once; static struct __locale_struct default_locale, default_ctype_locale; @@ -19,7 +20,7 @@ int __loc_is_allocated(locale_t loc) && loc != &default_locale && loc != &default_ctype_locale; } -locale_t __newlocale(int mask, const char *name, locale_t loc) +static locale_t do_newlocale(int mask, const char *name, locale_t loc) { struct __locale_struct tmp; @@ -55,4 +56,12 @@ locale_t __newlocale(int mask, const char *name, locale_t loc) return loc; } +locale_t __newlocale(int mask, const char *name, locale_t loc) +{ + LOCK(__locale_lock); + loc = do_newlocale(mask, name, loc); + UNLOCK(__locale_lock); + return loc; +} + weak_alias(__newlocale, newlocale); diff --git a/src/locale/setlocale.c b/src/locale/setlocale.c index 2bc7b5007..360c44376 100644 --- a/src/locale/setlocale.c +++ b/src/locale/setlocale.c @@ -9,12 +9,11 @@ static char buf[LC_ALL*(LOCALE_NAME_MAX+1)]; char *setlocale(int cat, const char *name) { - static volatile int lock[1]; const struct __locale_map *lm; if ((unsigned)cat > LC_ALL) return 0; - LOCK(lock); + LOCK(__locale_lock); /* For LC_ALL, setlocale is required to return a string which * encodes the current setting for all categories. The format of @@ -36,7 +35,7 @@ char *setlocale(int cat, const char *name) } lm = __get_locale(i, part); if (lm == LOC_MAP_FAILED) { - UNLOCK(lock); + UNLOCK(__locale_lock); return 0; } tmp_locale.cat[i] = lm; @@ -57,14 +56,14 @@ char *setlocale(int cat, const char *name) s += l+1; } *--s = 0; - UNLOCK(lock); + UNLOCK(__locale_lock); return same==LC_ALL ? (char *)part : buf; } if (name) { lm = __get_locale(cat, name); if (lm == LOC_MAP_FAILED) { - UNLOCK(lock); + UNLOCK(__locale_lock); return 0; } libc.global_locale.cat[cat] = lm; @@ -73,7 +72,7 @@ char *setlocale(int cat, const char *name) } char *ret = lm ? (char *)lm->name : "C"; - UNLOCK(lock); + UNLOCK(__locale_lock); return ret; } From 1e31a2822380656dc40fbbb4a5d2a047279f57a9 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Wed, 9 Dec 2020 17:01:57 -0500 Subject: [PATCH 177/189] drop use of pthread_once in newlocale in general, pthread_once is not compatible with MT-fork constraints (commit 167390f05564e0a4d3fcb4329377fd7743267560). here it actually no longer matters, because it's now called with a lock held, but since the lock is held it's pointless to use pthread_once. --- src/locale/newlocale.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/locale/newlocale.c b/src/locale/newlocale.c index 8eee2e10c..12ae87d67 100644 --- a/src/locale/newlocale.c +++ b/src/locale/newlocale.c @@ -4,16 +4,9 @@ #include "locale_impl.h" #include "lock.h" -static pthread_once_t default_locale_once; +static int default_locale_init_done; static struct __locale_struct default_locale, default_ctype_locale; -static void default_locale_init(void) -{ - for (int i=0; i Date: Wed, 9 Dec 2020 17:11:05 -0500 Subject: [PATCH 178/189] use libc-internal malloc for newlocale/freelocale this is necessary for MT-fork correctness now that the code runs under locale lock. it would not be hard to avoid, but __get_locale is already using libc-internal malloc anyway. this can be reconsidered during locale overhaul later if needed. --- src/locale/freelocale.c | 5 +++++ src/locale/newlocale.c | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/src/locale/freelocale.c b/src/locale/freelocale.c index 802b8bfe1..385d12069 100644 --- a/src/locale/freelocale.c +++ b/src/locale/freelocale.c @@ -1,6 +1,11 @@ #include #include "locale_impl.h" +#define malloc undef +#define calloc undef +#define realloc undef +#define free __libc_free + void freelocale(locale_t l) { if (__loc_is_allocated(l)) free(l); diff --git a/src/locale/newlocale.c b/src/locale/newlocale.c index 12ae87d67..9ac3cd386 100644 --- a/src/locale/newlocale.c +++ b/src/locale/newlocale.c @@ -4,6 +4,11 @@ #include "locale_impl.h" #include "lock.h" +#define malloc __libc_malloc +#define calloc undef +#define realloc undef +#define free undef + static int default_locale_init_done; static struct __locale_struct default_locale, default_ctype_locale; From 687cdf19fe2c7955d3345feddc4ff07b80869c14 Mon Sep 17 00:00:00 2001 From: Ariadne Conill Date: Sat, 12 Dec 2020 04:30:53 +0000 Subject: [PATCH 179/189] sh: fix incorrect mcontext_t member naming while the layouts match, the member member naming expected by software using mcontext_t omits the sc_ prefix. --- arch/sh/bits/signal.h | 13 +++++++++++-- arch/sh/pthread_arch.h | 4 ++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/arch/sh/bits/signal.h b/arch/sh/bits/signal.h index 160311fae..d0b148282 100644 --- a/arch/sh/bits/signal.h +++ b/arch/sh/bits/signal.h @@ -9,7 +9,16 @@ #if defined(_GNU_SOURCE) || defined(_BSD_SOURCE) typedef int greg_t, gregset_t[16]; typedef int freg_t, fpregset_t[16]; -typedef struct sigcontext { +typedef struct { + unsigned long oldmask; + unsigned long gregs[16]; + unsigned long pc, pr, sr; + unsigned long gbr, mach, macl; + unsigned long fpregs[16]; + unsigned long xfpregs[16]; + unsigned int fpscr, fpul, ownedfp; +} mcontext_t; +struct sigcontext { unsigned long oldmask; unsigned long sc_regs[16]; unsigned long sc_pc, sc_pr, sc_sr; @@ -17,7 +26,7 @@ typedef struct sigcontext { unsigned long sc_fpregs[16]; unsigned long sc_xfpregs[16]; unsigned int sc_fpscr, sc_fpul, sc_ownedfp; -} mcontext_t; +}; #else typedef struct { unsigned long __regs[58]; diff --git a/arch/sh/pthread_arch.h b/arch/sh/pthread_arch.h index 0fcf70d27..199c2d55f 100644 --- a/arch/sh/pthread_arch.h +++ b/arch/sh/pthread_arch.h @@ -8,9 +8,9 @@ static inline uintptr_t __get_tp() #define TLS_ABOVE_TP #define GAP_ABOVE_TP 8 -#define MC_PC sc_pc +#define MC_PC pc #ifdef __FDPIC__ -#define MC_GOT sc_regs[12] +#define MC_GOT gregs[12] #define CANCEL_GOT (*(uintptr_t *)((char *)__syscall_cp_asm+sizeof(uintptr_t))) #endif From 0d022f4f70bdd818df3e8dd0dc8269534c2740ff Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 13 Dec 2020 21:43:16 -0500 Subject: [PATCH 180/189] fix v4l2 buffer ioctl fallbacks for pre-5.6 kernels commit 2412638bb39eb799b2600393bbd71cca8ae96bb2 got the size of struct v4l2_buffer wrong and omitted the tv_usec member slot from the offset list, so the ioctl numbers never matched and fallback code path was never taken. this caused the affected ioctls to fail with ENOTTY on kernels not new enough to have the native time64 ioctls. --- src/misc/ioctl.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/misc/ioctl.c b/src/misc/ioctl.c index 894775114..26481c6d3 100644 --- a/src/misc/ioctl.c +++ b/src/misc/ioctl.c @@ -49,10 +49,10 @@ static const struct ioctl_compat_map compat_map[] = { { 0, 0, 8, WR, 1, OFFS(0,4) }, /* snd_pcm_mmap_control */ /* VIDIOC_QUERYBUF, VIDIOC_QBUF, VIDIOC_DQBUF, VIDIOC_PREPARE_BUF */ - { _IOWR('V', 9, new_misaligned(72)), _IOWR('V', 9, char[72]), 72, WR, 0, OFFS(20) }, - { _IOWR('V', 15, new_misaligned(72)), _IOWR('V', 15, char[72]), 72, WR, 0, OFFS(20) }, - { _IOWR('V', 17, new_misaligned(72)), _IOWR('V', 17, char[72]), 72, WR, 0, OFFS(20) }, - { _IOWR('V', 93, new_misaligned(72)), _IOWR('V', 93, char[72]), 72, WR, 0, OFFS(20) }, + { _IOWR('V', 9, new_misaligned(68)), _IOWR('V', 9, char[68]), 68, WR, 1, OFFS(20, 24) }, + { _IOWR('V', 15, new_misaligned(68)), _IOWR('V', 15, char[68]), 68, WR, 1, OFFS(20, 24) }, + { _IOWR('V', 17, new_misaligned(68)), _IOWR('V', 17, char[68]), 68, WR, 1, OFFS(20, 24) }, + { _IOWR('V', 93, new_misaligned(68)), _IOWR('V', 93, char[68]), 68, WR, 1, OFFS(20, 24) }, /* VIDIOC_DQEVENT */ { _IOR('V', 89, new_misaligned(96)), _IOR('V', 89, char[96]), 96, R, 0, OFFS(76,80) }, From aa4ce4a4aa75986d505ea4ab4d7120832230e023 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Mon, 14 Dec 2020 11:10:30 -0500 Subject: [PATCH 181/189] fix VIDIOC_DQEVENT (v4l2) ioctl fallback for pre-5.6 kernels commit 2412638bb39eb799b2600393bbd71cca8ae96bb2 got the size of struct v4l2_event wrong and failed to account for the fact that the old struct might be either 120 bytes with time misaligned mod 8, or 128 bytes with time aligned mod 8, due to the contained union having 64-bit members whose alignment is arch-dependent. rather than adding new logic to handle the differences, use an actual stripped-down version of the structure in question to derive the ioctl number, size, and offsets. --- src/misc/ioctl.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/misc/ioctl.c b/src/misc/ioctl.c index 26481c6d3..492828119 100644 --- a/src/misc/ioctl.c +++ b/src/misc/ioctl.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include "syscall.h" @@ -28,6 +29,12 @@ struct ioctl_compat_map { * number producing macros; only size of result is meaningful. */ #define new_misaligned(n) struct { int i; time_t t; char c[(n)-4]; } +struct v4l2_event { + uint32_t a; + uint64_t b[8]; + uint32_t c[2], ts[2], d[9]; +}; + static const struct ioctl_compat_map compat_map[] = { { SIOCGSTAMP, SIOCGSTAMP_OLD, 8, R, 0, OFFS(0, 4) }, { SIOCGSTAMPNS, SIOCGSTAMPNS_OLD, 8, R, 0, OFFS(0, 4) }, @@ -55,7 +62,8 @@ static const struct ioctl_compat_map compat_map[] = { { _IOWR('V', 93, new_misaligned(68)), _IOWR('V', 93, char[68]), 68, WR, 1, OFFS(20, 24) }, /* VIDIOC_DQEVENT */ - { _IOR('V', 89, new_misaligned(96)), _IOR('V', 89, char[96]), 96, R, 0, OFFS(76,80) }, + { _IOR('V', 89, new_misaligned(120)), _IOR('V', 89, struct v4l2_event), sizeof(struct v4l2_event), + R, 0, OFFS(offsetof(struct v4l2_event, ts[0]), offsetof(struct v4l2_event, ts[1])) }, /* VIDIOC_OMAP3ISP_STAT_REQ */ { _IOWR('V', 192+6, char[32]), _IOWR('V', 192+6, char[24]), 22, WR, 0, OFFS(0,4) }, From 83675e4ea34292a90a3182b03423c362999cbdd1 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Thu, 14 Jan 2021 21:26:00 -0500 Subject: [PATCH 182/189] release 1.2.2 --- VERSION | 2 +- WHATSNEW | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 6085e9465..23aa83906 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.2.1 +1.2.2 diff --git a/WHATSNEW b/WHATSNEW index d9826fc0e..e1d01982a 100644 --- a/WHATSNEW +++ b/WHATSNEW @@ -2236,3 +2236,54 @@ arch-specific bugs fixed: - mips* had negated error codes for some syscalls (kernel bug) - mips* SIGEMT was wrongly called SIGSTKFLT - sh fesetround didn't work correctly on sh + + + +1.2.2 release notes + +major changes: +- child restrictions lifted after fork of multithreaded parent + +new features: +- _Fork function (POSIX-future) +- reallocarray function (extension from OpenBSD, now widespread) +- gettid function (kernel tid as supported concept) +- SIGEV_THREAD_ID sigevent API (Linux extension) +- tcgetwinsize and tcsetwinsize functions (POSIX-future) + +performance: +- faster software sqrt on archs without native sqrt instruction + +compatibility: +- realpath no longer depends on procfs availability & accuracy +- time zone parser now always prefers 64-bit tables if present +- crypt_blowfish now supports $2b$ prefix +- res_query now reports errors via h_errno +- set*id and setrlimit are now safe in vforked/cloned child +- setgroups now applies to all threads +- dlopen debugger notification is improved, should work with lldb +- setrlimit no longer needs __synccall broadcast on linux 2.6.36+ +- faccessat with AT_EACCESS no longer needs child process on linux 5.8+ + +bugs fixed: +- buffer overflow and infinite loop errors in wcsnrtombs (CVE-2020-28928) +- sem_close unmapped still-referenced semaphores +- fork of process with active aio could deadlock or crash paren +- pthread_cond_wait was broken with priority-inheritance mutex +- getgrouplist wrongly failed when nscd reported an empty list +- abort could leak modified SIGABRT disposition to fork or posix_spawn child +- regression with mallocng: malloc_usable_size(0) crashed +- readlink wrongly gave EINVAL on zero length dest buffer +- sqrtl was severely inaccurate (not correctly rounded) on ldquad archs +- assert failure wrongly flushed stdio (possible deadlock) +- MUSL_LOCPATH search was broken with multiple components +- missing newline in herror output +- possible deadlock in pthread_exit with pshared mutex or barrier usage +- pthread_mutexattr_getprotocol didn't read back protocol +- v4l2 ioctl translation for pre-time64 kernels didn't work + +arch-specific bugs fixed: +- x86_64 longjmp failed to handle 0 argument reliably +- i386 __set_thread_area fallback for pre-2.6 kernels didn't work +- missing O_LARGEFILE macro value on x86_64, x32, mips64 +- unpredictable s390x breakage from failure to preserve call-saved registers From ecac1cc258ce67efa0ab35dc540c288d40f4e4cb Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Sat, 30 Jan 2021 16:09:22 -0500 Subject: [PATCH 183/189] fail posix_spawn file_actions operations with negative fds these functions are specified to fail with EBADF on negative fd arguments. apart from close, they are also specified to fail if the value exceeds OPEN_MAX, but as written it is not clear that this imposes any requirement when OPEN_MAX is not defined, and it's undesirable to impose a dynamic limit (via setrlimit) here since the limit at the time of posix_spawn may be different from the limit at the time of setting up the file actions. this may require revisiting later. --- src/process/posix_spawn_file_actions_addclose.c | 1 + src/process/posix_spawn_file_actions_adddup2.c | 1 + src/process/posix_spawn_file_actions_addfchdir.c | 1 + src/process/posix_spawn_file_actions_addopen.c | 1 + 4 files changed, 4 insertions(+) diff --git a/src/process/posix_spawn_file_actions_addclose.c b/src/process/posix_spawn_file_actions_addclose.c index cdda59799..0c2ef8fa3 100644 --- a/src/process/posix_spawn_file_actions_addclose.c +++ b/src/process/posix_spawn_file_actions_addclose.c @@ -5,6 +5,7 @@ int posix_spawn_file_actions_addclose(posix_spawn_file_actions_t *fa, int fd) { + if (fd < 0) return EBADF; struct fdop *op = malloc(sizeof *op); if (!op) return ENOMEM; op->cmd = FDOP_CLOSE; diff --git a/src/process/posix_spawn_file_actions_adddup2.c b/src/process/posix_spawn_file_actions_adddup2.c index 0367498fd..addca4d4f 100644 --- a/src/process/posix_spawn_file_actions_adddup2.c +++ b/src/process/posix_spawn_file_actions_adddup2.c @@ -5,6 +5,7 @@ int posix_spawn_file_actions_adddup2(posix_spawn_file_actions_t *fa, int srcfd, int fd) { + if (srcfd < 0 || fd < 0) return EBADF; struct fdop *op = malloc(sizeof *op); if (!op) return ENOMEM; op->cmd = FDOP_DUP2; diff --git a/src/process/posix_spawn_file_actions_addfchdir.c b/src/process/posix_spawn_file_actions_addfchdir.c index 436c683d2..e89ede8c3 100644 --- a/src/process/posix_spawn_file_actions_addfchdir.c +++ b/src/process/posix_spawn_file_actions_addfchdir.c @@ -6,6 +6,7 @@ int posix_spawn_file_actions_addfchdir_np(posix_spawn_file_actions_t *fa, int fd) { + if (fd < 0) return EBADF; struct fdop *op = malloc(sizeof *op); if (!op) return ENOMEM; op->cmd = FDOP_FCHDIR; diff --git a/src/process/posix_spawn_file_actions_addopen.c b/src/process/posix_spawn_file_actions_addopen.c index 368922c76..82bbcec9e 100644 --- a/src/process/posix_spawn_file_actions_addopen.c +++ b/src/process/posix_spawn_file_actions_addopen.c @@ -6,6 +6,7 @@ int posix_spawn_file_actions_addopen(posix_spawn_file_actions_t *restrict fa, int fd, const char *restrict path, int flags, mode_t mode) { + if (fd < 0) return EBADF; struct fdop *op = malloc(sizeof *op + strlen(path) + 1); if (!op) return ENOMEM; op->cmd = FDOP_OPEN; From 392641c70df88df617138d1b575a5861d3d253f7 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Sat, 30 Jan 2021 16:42:26 -0500 Subject: [PATCH 184/189] fix inconsistent signature of __libc_start_main commit 7586360badcae6e73f04eb1b8189ce630281c4b2 removed the unused arguments from the definition of __libc_start_main, making it incompatible with the declaration at the point of call, which still passed 6 arguments. calls with mismatched function type have undefined behavior, breaking LTO and any other tooling that checks for function signature mismatch. removing the extra arguments from the point of call (crt1) is not an option for fixing this, since that would be a change in ABI surface between application and libc. adding back the extra arguments requires some care. on archs that pass arguments on the stack or that reserve argument spill space for the callee on the stack, it imposes an ABI requirement on the caller to provide such space. the modern crt1.c entry point provides such space, but originally there was arch-specific asm for the call to __libc_start_main. the last of this asm was removed in commit 6fef8cafbd0f6f185897bc87feb1ff66e2e204e1, and manual review of the code removed and its prior history was performed to check that all archs/variants passed the legacy init/fini/ldso_fini arguments. --- src/env/__libc_start_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/env/__libc_start_main.c b/src/env/__libc_start_main.c index 8fbe52627..c5b277bdc 100644 --- a/src/env/__libc_start_main.c +++ b/src/env/__libc_start_main.c @@ -69,7 +69,8 @@ weak_alias(libc_start_init, __libc_start_init); typedef int lsm2_fn(int (*)(int,char **,char **), int, char **); static lsm2_fn libc_start_main_stage2; -int __libc_start_main(int (*main)(int,char **,char **), int argc, char **argv) +int __libc_start_main(int (*main)(int,char **,char **), int argc, char **argv, + void (*init_dummy)(), void(*fini_dummy)(), void(*ldso_dummy)()) { char **envp = argv+argc+1; From 2532a23ec7fc249d575c446f10829bf5a9f56cda Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Sat, 30 Jan 2021 17:14:20 -0500 Subject: [PATCH 185/189] preserve errno across free as an outcome of Austin Group issue #385, future versions of the standard will require free not to alter the value of errno. save and restore it individually around the calls to madvise and munmap so that the cost is not imposed on calls to free that do not result in any syscall. --- src/malloc/mallocng/free.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/malloc/mallocng/free.c b/src/malloc/mallocng/free.c index 40745f97d..418a085c1 100644 --- a/src/malloc/mallocng/free.c +++ b/src/malloc/mallocng/free.c @@ -119,7 +119,11 @@ void free(void *p) if (((uintptr_t)(start-1) ^ (uintptr_t)end) >= 2*PGSZ && g->last_idx) { unsigned char *base = start + (-(uintptr_t)start & (PGSZ-1)); size_t len = (end-base) & -PGSZ; - if (len) madvise(base, len, MADV_FREE); + if (len) { + int e = errno; + madvise(base, len, MADV_FREE); + errno = e; + } } // atomic free without locking if this is neither first or last slot @@ -139,5 +143,9 @@ void free(void *p) wrlock(); struct mapinfo mi = nontrivial_free(g, idx); unlock(); - if (mi.len) munmap(mi.base, mi.len); + if (mi.len) { + int e = errno; + munmap(mi.base, mi.len); + errno = e; + } } From de3db7390684a9dd151b8d643fa7123249c9d5a1 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Sat, 30 Jan 2021 17:26:34 -0500 Subject: [PATCH 186/189] fix build regression in oldmalloc commit 8d37958d58cf36f53d5fcc7a8aa6d633da6071b2 inadvertently broke oldmalloc by having it implement __libc_malloc rather than __libc_malloc_impl. --- src/malloc/oldmalloc/malloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/malloc/oldmalloc/malloc.c b/src/malloc/oldmalloc/malloc.c index 53f5f959e..a5cbdb686 100644 --- a/src/malloc/oldmalloc/malloc.c +++ b/src/malloc/oldmalloc/malloc.c @@ -11,7 +11,7 @@ #include "malloc_impl.h" #include "fork_impl.h" -#define malloc __libc_malloc +#define malloc __libc_malloc_impl #define realloc __libc_realloc #define free __libc_free From a49232ba4db0920c7d0155ae7ffb7349a0ae2b9b Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Sat, 30 Jan 2021 17:28:08 -0500 Subject: [PATCH 187/189] oldmalloc: preserve errno across free as an outcome of Austin Group issue #385, future versions of the standard will require free not to alter the value of errno. save and restore it individually around the calls to madvise and munmap so that the cost is not imposed on calls to free that do not result in any syscall. --- src/malloc/oldmalloc/malloc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/malloc/oldmalloc/malloc.c b/src/malloc/oldmalloc/malloc.c index a5cbdb686..25d00d44d 100644 --- a/src/malloc/oldmalloc/malloc.c +++ b/src/malloc/oldmalloc/malloc.c @@ -481,12 +481,14 @@ void __bin_chunk(struct chunk *self) if (size > RECLAIM && (size^(size-osize)) > size-osize) { uintptr_t a = (uintptr_t)self + SIZE_ALIGN+PAGE_SIZE-1 & -PAGE_SIZE; uintptr_t b = (uintptr_t)next - SIZE_ALIGN & -PAGE_SIZE; + int e = errno; #if 1 __madvise((void *)a, b-a, MADV_DONTNEED); #else __mmap((void *)a, b-a, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0); #endif + errno = e; } unlock_bin(i); @@ -499,7 +501,9 @@ static void unmap_chunk(struct chunk *self) size_t len = CHUNK_SIZE(self) + extra; /* Crash on double free */ if (extra & 1) a_crash(); + int e = errno; __munmap(base, len); + errno = e; } void free(void *p) From 721ca8b60ec4180ff767c317f8869a5eca13fd92 Mon Sep 17 00:00:00 2001 From: quic-nwtn <162972850+quic-nwtn@users.noreply.github.com> Date: Wed, 8 May 2024 11:47:59 +0530 Subject: [PATCH 188/189] Create quic-organization-repolinter.yml Signed-off-by: quic-nwtn <162972850+quic-nwtn@users.noreply.github.com> --- .../quic-organization-repolinter.yml | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 .github/workflows/quic-organization-repolinter.yml diff --git a/.github/workflows/quic-organization-repolinter.yml b/.github/workflows/quic-organization-repolinter.yml new file mode 100644 index 000000000..faaea08fb --- /dev/null +++ b/.github/workflows/quic-organization-repolinter.yml @@ -0,0 +1,31 @@ +name: QuIC Organization Repolinter + +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master" ] + workflow_dispatch: + +jobs: + repolinter: + runs-on: ubuntu-latest + steps: + - name: Checkout Repo + uses: actions/checkout@v2 + - name: Verify repolinter config file is present + id: check_files + uses: andstor/file-existence-action@v1 + with: + files: "repolint.json" + - name: Run Repolinter with local repolint.json + if: steps.check_files.outputs.files_exists == 'true' + uses: todogroup/repolinter-action@v1 + with: + config_file: "repolint.json" + - name: Run Repolinter with default ruleset + if: steps.check_files.outputs.files_exists == 'false' + uses: todogroup/repolinter-action@v1 + with: + config_url: "https://raw.githubusercontent.com/quic/.github/main/repolint.json" + From f89e0a71a9f88e1c744fec24d6de4858d4735545 Mon Sep 17 00:00:00 2001 From: nwtn Date: Thu, 19 Dec 2024 21:53:55 +0530 Subject: [PATCH 189/189] update repolinter Signed-off-by: Nitish kumar --- .github/workflows/quic-organization-repolinter.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/quic-organization-repolinter.yml b/.github/workflows/quic-organization-repolinter.yml index faaea08fb..54c1da1da 100644 --- a/.github/workflows/quic-organization-repolinter.yml +++ b/.github/workflows/quic-organization-repolinter.yml @@ -2,9 +2,9 @@ name: QuIC Organization Repolinter on: push: - branches: [ "master" ] + branches: [ "hexagon" ] pull_request: - branches: [ "master" ] + branches: [ "hexagon" ] workflow_dispatch: jobs: @@ -12,10 +12,10 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout Repo - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Verify repolinter config file is present id: check_files - uses: andstor/file-existence-action@v1 + uses: andstor/file-existence-action@v3 with: files: "repolint.json" - name: Run Repolinter with local repolint.json