NetBSD-Bugs archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
Re: port-xen/58561 (panic: kernel diagnostic assertion, "x86_read_psl() == 0" failed: file, "/home/netbsd/10/src/sys/arch/x86/x86/pmap.c", line 3581)
The following reply was made to PR port-xen/58561; it has been noted by GNATS.
From: Manuel Bouyer <bouyer%antioche.eu.org@localhost>
To: Konrad Schroder <perseant%hhhh.org@localhost>
Cc: gnats-bugs%NetBSD.org@localhost, port-xen-maintainer%netbsd.org@localhost,
netbsd-bugs%netbsd.org@localhost, gnats-admin%netbsd.org@localhost, riastradh%NetBSD.org@localhost,
campbell+netbsd%mumble.net@localhost, cherry%NetBSD.org@localhost
Subject: Re: port-xen/58561 (panic: kernel diagnostic assertion,
"x86_read_psl() == 0" failed: file,
"/home/netbsd/10/src/sys/arch/x86/x86/pmap.c", line 3581)
Date: Mon, 12 Jan 2026 23:20:34 +0100
--bYTERQBjABj6NPgt
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
On Sun, Jan 11, 2026 at 08:35:10PM -0800, Konrad Schroder wrote:
> On 1/10/2026 2:33 PM, Manuel Bouyer wrote:
> > And this assembly work may have allowed me to find the problem.
> > Basically, the CLI and STI macros are not atomic on Xen PV, and if preemption
> > happens at a bad time we may end up updating the upcall_mask of our previous
> > CPU. The attached patch should close this race condition; hopefully it's
> > the last one.
>
> I can confirm that, at least in my case, this allows me to build
> continuously all day without a crash.
Good, it points that we're on the right track.
Can you try the attached patch instead ? It tries to limit the extra
instructions to only places where it's needed, and add DIAGNOSTIC code
to check it's really not needed in other places.
--
Manuel Bouyer <bouyer%antioche.eu.org@localhost>
NetBSD: 26 ans d'experience feront toujours la difference
--
--bYTERQBjABj6NPgt
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename=diff2
Index: sys/arch/amd64/amd64/amd64_trap.S
===================================================================
RCS file: /cvsroot/src/sys/arch/amd64/amd64/amd64_trap.S,v
retrieving revision 1.55
diff -u -p -u -r1.55 amd64_trap.S
--- sys/arch/amd64/amd64/amd64_trap.S 27 Feb 2023 16:24:28 -0000 1.55
+++ sys/arch/amd64/amd64/amd64_trap.S 12 Jan 2026 22:17:57 -0000
@@ -90,7 +90,7 @@
*/
#ifdef XENPV
-#define PRE_TRAP CLI(cx); movq (%rsp),%rcx ; movq 8(%rsp),%r11 ; addq $0x10,%rsp
+#define PRE_TRAP CLI2(cx,r11); movq (%rsp),%rcx ; movq 8(%rsp),%r11 ; addq $0x10,%rsp
#else
#define PRE_TRAP
#endif
@@ -671,7 +671,7 @@ calltrap:
.Lalltraps_checkast:
movq CPUVAR(CURLWP),%r14
/* Check for ASTs on exit to user mode. */
- CLI(si)
+ CLI2(si, rdi)
CHECK_ASTPENDING(%r14)
je 3f
CLEAR_ASTPENDING(%r14)
Index: sys/arch/amd64/amd64/cpufunc.S
===================================================================
RCS file: /cvsroot/src/sys/arch/amd64/amd64/cpufunc.S,v
retrieving revision 1.70
diff -u -p -u -r1.70 cpufunc.S
--- sys/arch/amd64/amd64/cpufunc.S 6 Sep 2025 02:53:22 -0000 1.70
+++ sys/arch/amd64/amd64/cpufunc.S 12 Jan 2026 22:17:57 -0000
@@ -184,7 +184,7 @@ ENTRY(wbinvd)
END(wbinvd)
ENTRY(setusergs)
- CLI(ax)
+ CLI2(ax, rdi)
swapgs
movw %di, %gs
swapgs
Index: sys/arch/amd64/amd64/locore.S
===================================================================
RCS file: /cvsroot/src/sys/arch/amd64/amd64/locore.S,v
retrieving revision 1.235
diff -u -p -u -r1.235 locore.S
--- sys/arch/amd64/amd64/locore.S 31 Dec 2025 15:33:50 -0000 1.235
+++ sys/arch/amd64/amd64/locore.S 12 Jan 2026 22:17:57 -0000
@@ -1641,7 +1641,7 @@ ENTRY(handle_syscall)
* to ensure we don't take an interrupt with some of the user
* registers loaded.
*/
- CLI(si)
+ CLI2(si, rax)
/* Check for ASTs on exit to user mode. */
movl L_MD_ASTPENDING(%r14),%eax
orl CPUVAR(WANT_PMAPLOAD),%eax
@@ -1768,7 +1768,9 @@ IDTVEC(\name)
* But it didn't disable events
*/
pushq %rsi
- CLI(si)
+ pushq %rdi
+ CLI2(si, rdi)
+ popq %rdi
popq %rsi
addq $0x10,%rsp /* gap to match cs:rip */
pushq $2 /* error code */
@@ -1813,7 +1815,9 @@ IDTVEC_END(syscall32)
IDTVEC(osyscall)
#ifdef XENPV
pushq %rsi
- CLI(si)
+ pushq %rdi
+ CLI2(si, rdi)
+ popq %rdi
popq %rsi
movq (%rsp),%rcx
movq 8(%rsp),%r11
Index: sys/arch/amd64/amd64/machdep.c
===================================================================
RCS file: /cvsroot/src/sys/arch/amd64/amd64/machdep.c,v
retrieving revision 1.378
diff -u -p -u -r1.378 machdep.c
--- sys/arch/amd64/amd64/machdep.c 21 Dec 2025 07:00:26 -0000 1.378
+++ sys/arch/amd64/amd64/machdep.c 12 Jan 2026 22:17:57 -0000
@@ -2462,3 +2462,8 @@ idt_vec_init_cpu_md(struct idt_vec *iv,
iv->iv_idt = (void *)idt_vaddr;
}
}
+
+#if defined(DIAGNOSTIC) && defined(XENPV)
+const char *cli_panic = "cli with preemtion";
+const char *sti_panic = "sti with interrupts enabled";
+#endif
Index: sys/arch/amd64/amd64/spl.S
===================================================================
RCS file: /cvsroot/src/sys/arch/amd64/amd64/spl.S,v
retrieving revision 1.49
diff -u -p -u -r1.49 spl.S
--- sys/arch/amd64/amd64/spl.S 1 Mar 2023 08:38:50 -0000 1.49
+++ sys/arch/amd64/amd64/spl.S 12 Jan 2026 22:17:57 -0000
@@ -378,7 +378,7 @@ IDTVEC(doreti)
movq %rsp,%rdi
KMSAN_INIT_ARG(8)
call _C_LABEL(trap)
- CLI(si)
+ CLI2(si, rdi)
jmp .Ldoreti_checkast
3:
CHECK_DEFERRED_SWITCH
@@ -389,6 +389,6 @@ IDTVEC(doreti)
9:
STI(si)
call _C_LABEL(do_pmap_load)
- CLI(si)
+ CLI2(si, rdi)
jmp .Ldoreti_checkast /* recheck ASTs */
IDTVEC_END(doreti)
Index: sys/arch/amd64/include/frameasm.h
===================================================================
RCS file: /cvsroot/src/sys/arch/amd64/include/frameasm.h,v
retrieving revision 1.55
diff -u -p -u -r1.55 frameasm.h
--- sys/arch/amd64/include/frameasm.h 30 Jul 2022 14:11:00 -0000 1.55
+++ sys/arch/amd64/include/frameasm.h 12 Jan 2026 22:17:57 -0000
@@ -23,31 +23,68 @@
#define XEN_ONLY2(x,y) x,y
#define NOT_XEN(x)
+#ifdef DIAGNOSTIC
+/*
+ * acessing EVTCHN_UPCALL_MASK is save only if preemption is disabled, i.e.:
+ * l_nopreempt is not 0, or
+ * ci_ilevel is nit 0, or
+ * EVTCHN_UPCALL_MASK is not 0
+ * ci_idepth is not negative
+ */
#define CLI(temp_reg) \
- movq CPUVAR(VCPU),%r ## temp_reg ; \
+ movq CPUVAR(CURLWP),%r ## temp_reg ; \
+ cmpl $0, L_NOPREEMPT(%r ## temp_reg); \
+ jne 199f; \
+ cmpb $0, CPUVAR(ILEVEL); \
+ jne 199f; \
+ movl CPUVAR(IDEPTH), %e ## temp_reg; \
+ test %eax, %eax; \
+ jns 199f; \
+ movq $panicstr,%r ## temp_reg ; \
+ cmpq $0, 0(%r ## temp_reg); \
+ jne 199f; \
+ movq _C_LABEL(cli_panic), %rdi; \
+ call _C_LABEL(panic); \
+199: movq CPUVAR(VCPU),%r ## temp_reg ; \
movb $1,EVTCHN_UPCALL_MASK(%r ## temp_reg);
#define STI(temp_reg) \
movq CPUVAR(VCPU),%r ## temp_reg ; \
- movb $0,EVTCHN_UPCALL_MASK(%r ## temp_reg);
+ cmpb $0, EVTCHN_UPCALL_MASK(%r ## temp_reg); \
+ jne 198f; \
+ movq $panicstr,%r ## temp_reg ; \
+ cmpq $0, 0(%r ## temp_reg); \
+ jne 198f; \
+ movq _C_LABEL(sti_panic), %rdi; \
+ call _C_LABEL(panic); \
+198: movb $0,EVTCHN_UPCALL_MASK(%r ## temp_reg); \
+ movq CPUVAR(CURLWP),%r ## temp_reg ;
+#else
+
+#define CLI(temp_reg) \
+ movq CPUVAR(VCPU),%r ## temp_reg ; \
+ movb $1,EVTCHN_UPCALL_MASK(%r ## temp_reg);
-#define PUSHF(temp_reg) \
+#define STI(temp_reg) \
movq CPUVAR(VCPU),%r ## temp_reg ; \
- movzbl EVTCHN_UPCALL_MASK(%r ## temp_reg), %e ## temp_reg; \
- pushq %r ## temp_reg
+ movb $0,EVTCHN_UPCALL_MASK(%r ## temp_reg); \
-#define POPF \
- popq %rdi; \
- call _C_LABEL(xen_write_psl)
+#endif /* DIAGNOSTIC */
+/* CLI() with preemtion disabled */
+#define CLI2(temp_reg, temp_reg2) \
+ movq CPUVAR(CURLWP),% ## temp_reg2 ; \
+ incl L_NOPREEMPT(% ## temp_reg2); \
+ movq CPUVAR(VCPU),%r ## temp_reg ; \
+ movb $1,EVTCHN_UPCALL_MASK(%r ## temp_reg); \
+ decl L_NOPREEMPT(% ## temp_reg2);
#else /* XENPV */
#define XEN_ONLY2(x,y)
#define NOT_XEN(x) x
#define CLI(temp_reg) cli
+#define CLI2(temp_reg, temp_reg2) cli
#define STI(temp_reg) sti
-#define PUSHF(temp_reg) pushf
-#define POPL popl
#endif /* XENPV */
#define HP_NAME_CLAC 1
--bYTERQBjABj6NPgt--
Home |
Main Index |
Thread Index |
Old Index