NetBSD-Bugs archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: port-xen/58561 (panic: kernel diagnostic assertion, "x86_read_psl() == 0" failed: file, "/home/netbsd/10/src/sys/arch/x86/x86/pmap.c", line 3581)



The following reply was made to PR port-xen/58561; it has been noted by GNATS.

From: Manuel Bouyer <bouyer%antioche.eu.org@localhost>
To: Konrad Schroder <perseant%hhhh.org@localhost>
Cc: gnats-bugs%NetBSD.org@localhost, port-xen-maintainer%netbsd.org@localhost,
        netbsd-bugs%netbsd.org@localhost, gnats-admin%netbsd.org@localhost, riastradh%NetBSD.org@localhost,
        campbell+netbsd%mumble.net@localhost, cherry%NetBSD.org@localhost
Subject: Re: port-xen/58561 (panic: kernel diagnostic assertion,
 "x86_read_psl() == 0" failed: file,
 "/home/netbsd/10/src/sys/arch/x86/x86/pmap.c", line 3581)
Date: Mon, 12 Jan 2026 23:20:34 +0100

 --bYTERQBjABj6NPgt
 Content-Type: text/plain; charset=us-ascii
 Content-Disposition: inline
 
 On Sun, Jan 11, 2026 at 08:35:10PM -0800, Konrad Schroder wrote:
 > On 1/10/2026 2:33 PM, Manuel Bouyer wrote:
 > > And this assembly work may have allowed me to find the problem.
 > > Basically, the CLI and STI macros are not atomic on Xen PV, and if preemption
 > > happens at a bad time we may end up updating the upcall_mask of our previous
 > > CPU. The attached patch should close this race condition; hopefully it's
 > > the last one.
 > 
 > I can confirm that, at least in my case, this allows me to build
 > continuously all day without a crash.
 
 Good, it points that we're on the right track.
 Can you try the attached patch instead ? It tries to limit the extra
 instructions to only places where it's needed, and add DIAGNOSTIC code
 to check it's really not needed in other places.
 
 -- 
 Manuel Bouyer <bouyer%antioche.eu.org@localhost>
      NetBSD: 26 ans d'experience feront toujours la difference
 --
 
 --bYTERQBjABj6NPgt
 Content-Type: text/plain; charset=us-ascii
 Content-Disposition: attachment; filename=diff2
 
 Index: sys/arch/amd64/amd64/amd64_trap.S
 ===================================================================
 RCS file: /cvsroot/src/sys/arch/amd64/amd64/amd64_trap.S,v
 retrieving revision 1.55
 diff -u -p -u -r1.55 amd64_trap.S
 --- sys/arch/amd64/amd64/amd64_trap.S	27 Feb 2023 16:24:28 -0000	1.55
 +++ sys/arch/amd64/amd64/amd64_trap.S	12 Jan 2026 22:17:57 -0000
 @@ -90,7 +90,7 @@
   */
  
  #ifdef	XENPV
 -#define	PRE_TRAP	CLI(cx); movq (%rsp),%rcx ; movq 8(%rsp),%r11 ; addq $0x10,%rsp
 +#define	PRE_TRAP	CLI2(cx,r11); movq (%rsp),%rcx ; movq 8(%rsp),%r11 ; addq $0x10,%rsp
  #else
  #define	PRE_TRAP
  #endif
 @@ -671,7 +671,7 @@ calltrap:
  .Lalltraps_checkast:
  	movq	CPUVAR(CURLWP),%r14
  	/* Check for ASTs on exit to user mode. */
 -	CLI(si)
 +	CLI2(si, rdi)
  	CHECK_ASTPENDING(%r14)
  	je	3f
  	CLEAR_ASTPENDING(%r14)
 Index: sys/arch/amd64/amd64/cpufunc.S
 ===================================================================
 RCS file: /cvsroot/src/sys/arch/amd64/amd64/cpufunc.S,v
 retrieving revision 1.70
 diff -u -p -u -r1.70 cpufunc.S
 --- sys/arch/amd64/amd64/cpufunc.S	6 Sep 2025 02:53:22 -0000	1.70
 +++ sys/arch/amd64/amd64/cpufunc.S	12 Jan 2026 22:17:57 -0000
 @@ -184,7 +184,7 @@ ENTRY(wbinvd)
  END(wbinvd)
  
  ENTRY(setusergs)
 -	CLI(ax)
 +	CLI2(ax, rdi)
  	swapgs
  	movw	%di, %gs
  	swapgs
 Index: sys/arch/amd64/amd64/locore.S
 ===================================================================
 RCS file: /cvsroot/src/sys/arch/amd64/amd64/locore.S,v
 retrieving revision 1.235
 diff -u -p -u -r1.235 locore.S
 --- sys/arch/amd64/amd64/locore.S	31 Dec 2025 15:33:50 -0000	1.235
 +++ sys/arch/amd64/amd64/locore.S	12 Jan 2026 22:17:57 -0000
 @@ -1641,7 +1641,7 @@ ENTRY(handle_syscall)
  	 * to ensure we don't take an interrupt with some of the user
  	 * registers loaded.
  	 */
 -	CLI(si)
 +	CLI2(si, rax)
  	/* Check for ASTs on exit to user mode. */
  	movl	L_MD_ASTPENDING(%r14),%eax
  	orl	CPUVAR(WANT_PMAPLOAD),%eax
 @@ -1768,7 +1768,9 @@ IDTVEC(\name)
  	 * But it didn't disable events
  	 */
  	pushq	%rsi
 -	CLI(si)
 +	pushq	%rdi
 +	CLI2(si, rdi)
 +	popq	%rdi
  	popq	%rsi
  	addq	$0x10,%rsp	/* gap to match cs:rip */
  	pushq	$2		/* error code */
 @@ -1813,7 +1815,9 @@ IDTVEC_END(syscall32)
  IDTVEC(osyscall)
  #ifdef XENPV
  	pushq	%rsi
 -	CLI(si)
 +	pushq	%rdi
 +	CLI2(si, rdi)
 +	popq	%rdi
  	popq	%rsi
  	movq (%rsp),%rcx
  	movq 8(%rsp),%r11
 Index: sys/arch/amd64/amd64/machdep.c
 ===================================================================
 RCS file: /cvsroot/src/sys/arch/amd64/amd64/machdep.c,v
 retrieving revision 1.378
 diff -u -p -u -r1.378 machdep.c
 --- sys/arch/amd64/amd64/machdep.c	21 Dec 2025 07:00:26 -0000	1.378
 +++ sys/arch/amd64/amd64/machdep.c	12 Jan 2026 22:17:57 -0000
 @@ -2462,3 +2462,8 @@ idt_vec_init_cpu_md(struct idt_vec *iv, 
  		iv->iv_idt = (void *)idt_vaddr;
  	}
  }
 +
 +#if defined(DIAGNOSTIC) && defined(XENPV)
 +const char *cli_panic = "cli with preemtion";
 +const char *sti_panic = "sti with interrupts enabled";
 +#endif
 Index: sys/arch/amd64/amd64/spl.S
 ===================================================================
 RCS file: /cvsroot/src/sys/arch/amd64/amd64/spl.S,v
 retrieving revision 1.49
 diff -u -p -u -r1.49 spl.S
 --- sys/arch/amd64/amd64/spl.S	1 Mar 2023 08:38:50 -0000	1.49
 +++ sys/arch/amd64/amd64/spl.S	12 Jan 2026 22:17:57 -0000
 @@ -378,7 +378,7 @@ IDTVEC(doreti)
  	movq	%rsp,%rdi
  	KMSAN_INIT_ARG(8)
  	call	_C_LABEL(trap)
 -	CLI(si)
 +	CLI2(si, rdi)
  	jmp	.Ldoreti_checkast
  3:
  	CHECK_DEFERRED_SWITCH
 @@ -389,6 +389,6 @@ IDTVEC(doreti)
  9:
  	STI(si)
  	call	_C_LABEL(do_pmap_load)
 -	CLI(si)
 +	CLI2(si, rdi)
  	jmp	.Ldoreti_checkast		/* recheck ASTs */
  IDTVEC_END(doreti)
 Index: sys/arch/amd64/include/frameasm.h
 ===================================================================
 RCS file: /cvsroot/src/sys/arch/amd64/include/frameasm.h,v
 retrieving revision 1.55
 diff -u -p -u -r1.55 frameasm.h
 --- sys/arch/amd64/include/frameasm.h	30 Jul 2022 14:11:00 -0000	1.55
 +++ sys/arch/amd64/include/frameasm.h	12 Jan 2026 22:17:57 -0000
 @@ -23,31 +23,68 @@
  #define	XEN_ONLY2(x,y)	x,y
  #define	NOT_XEN(x)
  
 +#ifdef DIAGNOSTIC
 +/*
 + * acessing EVTCHN_UPCALL_MASK is save only if preemption is disabled, i.e.:
 + * l_nopreempt is not 0, or
 + * ci_ilevel is nit 0, or
 + * EVTCHN_UPCALL_MASK is not 0
 + * ci_idepth is not negative
 + */
  #define CLI(temp_reg) \
 - 	movq CPUVAR(VCPU),%r ## temp_reg ;			\
 + 	movq CPUVAR(CURLWP),%r ## temp_reg ;			\
 +	cmpl $0, L_NOPREEMPT(%r ## temp_reg);			\
 +	jne 199f;						\
 +	cmpb $0, CPUVAR(ILEVEL);				\
 +	jne 199f;						\
 +	movl CPUVAR(IDEPTH), %e ## temp_reg;			\
 +	test %eax, %eax;					\
 +	jns 199f;						\
 + 	movq $panicstr,%r ## temp_reg ;			 	\
 +	cmpq $0, 0(%r ## temp_reg);				\
 +	jne 199f;						\
 +	movq _C_LABEL(cli_panic), %rdi;				\
 +	call _C_LABEL(panic);					\
 +199:	movq CPUVAR(VCPU),%r ## temp_reg ;			\
  	movb $1,EVTCHN_UPCALL_MASK(%r ## temp_reg);
  
  #define STI(temp_reg) \
   	movq CPUVAR(VCPU),%r ## temp_reg ;			\
 -	movb $0,EVTCHN_UPCALL_MASK(%r ## temp_reg);
 +	cmpb $0, EVTCHN_UPCALL_MASK(%r ## temp_reg);		\
 +	jne 198f;						\
 + 	movq $panicstr,%r ## temp_reg ;				\
 +	cmpq $0, 0(%r ## temp_reg);				\
 +	jne 198f;						\
 +	movq _C_LABEL(sti_panic), %rdi;				\
 +	call _C_LABEL(panic);					\
 +198:	movb $0,EVTCHN_UPCALL_MASK(%r ## temp_reg);		\
 + 	movq CPUVAR(CURLWP),%r ## temp_reg ;
 +#else
 +
 +#define CLI(temp_reg) \
 + 	movq CPUVAR(VCPU),%r ## temp_reg ;			\
 +	movb $1,EVTCHN_UPCALL_MASK(%r ## temp_reg);
  
 -#define PUSHF(temp_reg) \
 +#define STI(temp_reg) \
   	movq CPUVAR(VCPU),%r ## temp_reg ;			\
 -	movzbl EVTCHN_UPCALL_MASK(%r ## temp_reg), %e ## temp_reg; \
 -	pushq %r ## temp_reg
 +	movb $0,EVTCHN_UPCALL_MASK(%r ## temp_reg);		\
  
 -#define POPF \
 -	popq %rdi; \
 -	call _C_LABEL(xen_write_psl)
 +#endif /* DIAGNOSTIC */
  
 +/* CLI() with preemtion disabled */
 +#define CLI2(temp_reg, temp_reg2) \
 + 	movq CPUVAR(CURLWP),% ## temp_reg2 ;			\
 +	incl L_NOPREEMPT(% ## temp_reg2);			\
 + 	movq CPUVAR(VCPU),%r ## temp_reg ;			\
 +	movb $1,EVTCHN_UPCALL_MASK(%r ## temp_reg);		\
 +	decl L_NOPREEMPT(% ## temp_reg2);
  
  #else /* XENPV */
  #define	XEN_ONLY2(x,y)
  #define	NOT_XEN(x)	x
  #define CLI(temp_reg) cli
 +#define CLI2(temp_reg, temp_reg2) cli
  #define STI(temp_reg) sti
 -#define PUSHF(temp_reg) pushf
 -#define POPL popl
  #endif	/* XENPV */
  
  #define HP_NAME_CLAC		1
 
 --bYTERQBjABj6NPgt--
 


Home | Main Index | Thread Index | Old Index