Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions Documentation/admin-guide/kernel-parameters.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1741,6 +1741,12 @@
0 -- machine default
1 -- force brightness inversion

ia32_emulation= [X86-64]
Format: <bool>
When true, allows loading 32-bit programs and executing 32-bit
syscalls, essentially overriding IA32_EMULATION_DEFAULT_DISABLED at
boot time. When false, unconditionally disables IA32 emulation.

icn= [HW,ISDN]
Format: <io>[,<membase>[,<icn_id>[,<icn_id2>]]]

Expand Down
9 changes: 9 additions & 0 deletions arch/x86/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -2944,6 +2944,15 @@ config IA32_EMULATION
64-bit kernel. You should likely turn this on, unless you're
100% sure that you don't have any 32-bit programs left.

config IA32_EMULATION_DEFAULT_DISABLED
bool "IA32 emulation disabled by default"
default n
depends on IA32_EMULATION
help
Make IA32 emulation disabled by default. This prevents loading 32-bit
processes and access to 32-bit syscalls. If unsure, leave it to its
default value.

config IA32_AOUT
tristate "IA32 a.out support"
depends on IA32_EMULATION
Expand Down
4 changes: 4 additions & 0 deletions arch/x86/boot/compressed/sev.c
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,10 @@ void do_boot_stage2_vc(struct pt_regs *regs, unsigned long exit_code)
if (result != ES_OK)
goto finish;

result = vc_check_opcode_bytes(&ctxt, exit_code);
if (result != ES_OK)
goto finish;

switch (exit_code) {
case SVM_EXIT_RDTSC:
case SVM_EXIT_RDTSCP:
Expand Down
1 change: 1 addition & 0 deletions arch/x86/coco/tdx/tdx.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <asm/coco.h>
#include <asm/tdx.h>
#include <asm/vmx.h>
#include <asm/ia32.h>
#include <asm/insn.h>
#include <asm/insn-eval.h>
#include <asm/pgtable.h>
Expand Down
104 changes: 103 additions & 1 deletion arch/x86/entry/common.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,14 @@
#include <linux/nospec.h>
#include <linux/syscalls.h>
#include <linux/uaccess.h>
#include <linux/init.h>

#ifdef CONFIG_XEN_PV
#include <xen/xen-ops.h>
#include <xen/events.h>
#endif

#include <asm/apic.h>
#include <asm/desc.h>
#include <asm/traps.h>
#include <asm/vdso.h>
Expand Down Expand Up @@ -96,6 +98,16 @@ static __always_inline int syscall_32_enter(struct pt_regs *regs)
return (int)regs->orig_ax;
}

#ifdef CONFIG_IA32_EMULATION
bool __ia32_enabled __ro_after_init = !IS_ENABLED(CONFIG_IA32_EMULATION_DEFAULT_DISABLED);

static int __init ia32_emulation_override_cmdline(char *arg)
{
return kstrtobool(arg, &__ia32_enabled);
}
early_param("ia32_emulation", ia32_emulation_override_cmdline);
#endif

/*
* Invoke a 32-bit syscall. Called with IRQs on in CONTEXT_KERNEL.
*/
Expand All @@ -115,7 +127,96 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr)
}
}

/* Handles int $0x80 */
#ifdef CONFIG_IA32_EMULATION
static __always_inline bool int80_is_external(void)
{
const unsigned int offs = (0x80 / 32) * 0x10;
const u32 bit = BIT(0x80 % 32);

/* The local APIC on XENPV guests is fake */
if (cpu_feature_enabled(X86_FEATURE_XENPV))
return false;

/*
* If vector 0x80 is set in the APIC ISR then this is an external
* interrupt. Either from broken hardware or injected by a VMM.
*
* Note: In guest mode this is only valid for secure guests where
* the secure module fully controls the vAPIC exposed to the guest.
*/
return apic_read(APIC_ISR + offs) & bit;
}

/**
* int80_emulation - 32-bit legacy syscall entry
*
* This entry point can be used by 32-bit and 64-bit programs to perform
* 32-bit system calls. Instances of INT $0x80 can be found inline in
* various programs and libraries. It is also used by the vDSO's
* __kernel_vsyscall fallback for hardware that doesn't support a faster
* entry method. Restarted 32-bit system calls also fall back to INT
* $0x80 regardless of what instruction was originally used to do the
* system call.
*
* This is considered a slow path. It is not used by most libc
* implementations on modern hardware except during process startup.
*
* The arguments for the INT $0x80 based syscall are on stack in the
* pt_regs structure:
* eax: system call number
* ebx, ecx, edx, esi, edi, ebp: arg1 - arg 6
*/
DEFINE_IDTENTRY_RAW(int80_emulation)
{
int nr;

/* Kernel does not use INT $0x80! */
if (unlikely(!user_mode(regs))) {
irqentry_enter(regs);
instrumentation_begin();
panic("Unexpected external interrupt 0x80\n");
}

/*
* Establish kernel context for instrumentation, including for
* int80_is_external() below which calls into the APIC driver.
* Identical for soft and external interrupts.
*/
enter_from_user_mode(regs);

instrumentation_begin();
add_random_kstack_offset();

/* Validate that this is a soft interrupt to the extent possible */
if (unlikely(int80_is_external()))
panic("Unexpected external interrupt 0x80\n");

/*
* The low level idtentry code pushed -1 into regs::orig_ax
* and regs::ax contains the syscall number.
*
* User tracing code (ptrace or signal handlers) might assume
* that the regs::orig_ax contains a 32-bit number on invoking
* a 32-bit syscall.
*
* Establish the syscall convention by saving the 32bit truncated
* syscall number in regs::orig_ax and by invalidating regs::ax.
*/
regs->orig_ax = regs->ax & GENMASK(31, 0);
regs->ax = -ENOSYS;

nr = syscall_32_enter(regs);

local_irq_enable();
nr = syscall_enter_from_user_mode_work(regs, nr);
do_syscall_32_irqs_on(regs, nr);

instrumentation_end();
syscall_exit_to_user_mode(regs);
}
#else /* CONFIG_IA32_EMULATION */

/* Handles int $0x80 on a 32bit kernel */
__visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
{
int nr = syscall_32_enter(regs);
Expand All @@ -134,6 +235,7 @@ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
instrumentation_end();
syscall_exit_to_user_mode(regs);
}
#endif /* !CONFIG_IA32_EMULATION */

static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
{
Expand Down
6 changes: 2 additions & 4 deletions arch/x86/entry/entry_64.S
Original file line number Diff line number Diff line change
Expand Up @@ -1498,18 +1498,16 @@ nmi_restore:
iretq
SYM_CODE_END(asm_exc_nmi)

#ifndef CONFIG_IA32_EMULATION
/*
* This handles SYSCALL from 32-bit code. There is no way to program
* MSRs to fully disable 32-bit SYSCALL.
*/
SYM_CODE_START(ignore_sysret)
SYM_CODE_START(entry_SYSCALL32_ignore)
UNWIND_HINT_EMPTY
ENDBR
mov $-ENOSYS, %eax
sysretl
SYM_CODE_END(ignore_sysret)
#endif
SYM_CODE_END(entry_SYSCALL32_ignore)

.pushsection .text, "ax"
SYM_CODE_START(rewind_stack_and_make_dead)
Expand Down
77 changes: 0 additions & 77 deletions arch/x86/entry/entry_64_compat.S
Original file line number Diff line number Diff line change
Expand Up @@ -274,80 +274,3 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_unsafe_stack, SYM_L_GLOBAL)
sysretl
SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL)
SYM_CODE_END(entry_SYSCALL_compat)

/*
* 32-bit legacy system call entry.
*
* 32-bit x86 Linux system calls traditionally used the INT $0x80
* instruction. INT $0x80 lands here.
*
* This entry point can be used by 32-bit and 64-bit programs to perform
* 32-bit system calls. Instances of INT $0x80 can be found inline in
* various programs and libraries. It is also used by the vDSO's
* __kernel_vsyscall fallback for hardware that doesn't support a faster
* entry method. Restarted 32-bit system calls also fall back to INT
* $0x80 regardless of what instruction was originally used to do the
* system call.
*
* This is considered a slow path. It is not used by most libc
* implementations on modern hardware except during process startup.
*
* Arguments:
* eax system call number
* ebx arg1
* ecx arg2
* edx arg3
* esi arg4
* edi arg5
* ebp arg6
*/
SYM_CODE_START(entry_INT80_compat)
UNWIND_HINT_EMPTY
ENDBR
/*
* Interrupts are off on entry.
*/
ASM_CLAC /* Do this early to minimize exposure */
SWAPGS

/*
* User tracing code (ptrace or signal handlers) might assume that
* the saved RAX contains a 32-bit number when we're invoking a 32-bit
* syscall. Just in case the high bits are nonzero, zero-extend
* the syscall number. (This could almost certainly be deleted
* with no ill effects.)
*/
movl %eax, %eax

/* switch to thread stack expects orig_ax and rdi to be pushed */
pushq %rax /* pt_regs->orig_ax */

/* Need to switch before accessing the thread stack. */
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax

/* In the Xen PV case we already run on the thread stack. */
ALTERNATIVE "", "jmp .Lint80_keep_stack", X86_FEATURE_XENPV

movq %rsp, %rax
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp

pushq 5*8(%rax) /* regs->ss */
pushq 4*8(%rax) /* regs->rsp */
pushq 3*8(%rax) /* regs->eflags */
pushq 2*8(%rax) /* regs->cs */
pushq 1*8(%rax) /* regs->ip */
pushq 0*8(%rax) /* regs->orig_ax */
.Lint80_keep_stack:

PUSH_AND_CLEAR_REGS rax=$-ENOSYS
UNWIND_HINT_REGS

cld

IBRS_ENTER
UNTRAIN_RET

movq %rsp, %rdi
call do_int80_syscall_32
jmp swapgs_restore_regs_and_return_to_usermode
SYM_CODE_END(entry_INT80_compat)
23 changes: 22 additions & 1 deletion arch/x86/include/asm/ia32.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,27 @@ extern void ia32_pick_mmap_layout(struct mm_struct *mm);

#endif

#endif /* !CONFIG_IA32_SUPPORT */
extern bool __ia32_enabled;

static inline bool ia32_enabled(void)
{
return __ia32_enabled;
}

static inline void ia32_disable(void)
{
__ia32_enabled = false;
}

#else /* !CONFIG_IA32_EMULATION */

static inline bool ia32_enabled(void)
{
return IS_ENABLED(CONFIG_X86_32);
}

static inline void ia32_disable(void) {}

#endif

#endif /* _ASM_X86_IA32_H */
4 changes: 4 additions & 0 deletions arch/x86/include/asm/idtentry.h
Original file line number Diff line number Diff line change
Expand Up @@ -569,6 +569,10 @@ DECLARE_IDTENTRY_RAW(X86_TRAP_UD, exc_invalid_op);
DECLARE_IDTENTRY_RAW(X86_TRAP_BP, exc_int3);
DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_PF, exc_page_fault);

#if defined(CONFIG_IA32_EMULATION)
DECLARE_IDTENTRY_RAW(IA32_SYSCALL_VECTOR, int80_emulation);
#endif

#ifdef CONFIG_X86_MCE
#ifdef CONFIG_X86_64
DECLARE_IDTENTRY_MCE(X86_TRAP_MC, exc_machine_check);
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/include/asm/irq_stack.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
* the output constraints to make the compiler aware that R11 cannot be
* reused after the asm() statement.
*
* For builds with CONFIG_UNWIND_FRAME_POINTER ASM_CALL_CONSTRAINT is
* For builds with CONFIG_UNWINDER_FRAME_POINTER, ASM_CALL_CONSTRAINT is
* required as well as this prevents certain creative GCC variants from
* misplacing the ASM code.
*
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/include/asm/page_32.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ static inline void copy_page(void *to, void *from)
{
memcpy(to, from, PAGE_SIZE);
}
#endif /* CONFIG_X86_3DNOW */
#endif /* CONFIG_X86_USE_3DNOW */
#endif /* !__ASSEMBLY__ */

#endif /* _ASM_X86_PAGE_32_H */
2 changes: 1 addition & 1 deletion arch/x86/include/asm/processor.h
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,7 @@ static inline unsigned long cpu_kernelmode_gs_base(int cpu)

DECLARE_PER_CPU(void *, hardirq_stack_ptr);
DECLARE_PER_CPU(bool, hardirq_stack_inuse);
extern asmlinkage void ignore_sysret(void);
extern asmlinkage void entry_SYSCALL32_ignore(void);

/* Save actual FS/GS selectors and bases to current->thread */
void current_save_fsgs(void);
Expand Down
7 changes: 3 additions & 4 deletions arch/x86/include/asm/proto.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,9 @@ void entry_SYSCALL_compat(void);
void entry_SYSCALL_compat_safe_stack(void);
void entry_SYSRETL_compat_unsafe_stack(void);
void entry_SYSRETL_compat_end(void);
void entry_INT80_compat(void);
#ifdef CONFIG_XEN_PV
void xen_entry_INT80_compat(void);
#endif
#else /* !CONFIG_IA32_EMULATION */
#define entry_SYSCALL_compat NULL
#define entry_SYSENTER_compat NULL
#endif

void x86_configure_nx(void);
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/include/asm/uaccess.h
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,7 @@ do { \
: [umem] "m" (__m(addr)), \
"0" (err))

#endif // CONFIG_CC_ASM_GOTO_OUTPUT
#endif // CONFIG_CC_HAS_ASM_GOTO_OUTPUT

#ifdef CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT
#define __try_cmpxchg_user_asm(itype, ltype, _ptr, _pold, _new, label) ({ \
Expand Down
Loading
Loading