[parisc-linux] itlb miss handler optimizations!
Carlos O'Donell
carlos@baldric.uwo.ca
Mon, 11 Aug 2003 23:58:11 -0400
On Fri, Jul 25, 2003 at 12:46:15PM +0100, Matthew Wilcox wrote:
> can't do that. we have three sets of routines -- itlb_miss_common_11,
> itlb_miss_common_20 and itlb_miss_common_20w. we select between _20w
> or not at compile time (if it's 64-bit, it's PA 2.0 Wide), but select
> between _20 and _11 at boot time (fault_vector_20 vs fault_vector_11).
>
> shame on you, you didn't even try assembling it ;-)
Assembles, and boots on my C3K, 32-bit kernel. Looking for any takers
who want to try it in 64-bit mode. I'm running lmbench to see if I can
tell the difference between this and the original code.
I would be most appreciative if anyone would pipe up and say "Run X to
test if Y works better/faster/harder" :}
c.
--- arch/parisc/kernel/entry.S 9 Dec 2002 06:09:08 -0000 1.98
+++ arch/parisc/kernel/entry.S 12 Aug 2003 03:49:04 -0000
@@ -1469,8 +1469,7 @@ itlb_miss_20w:
mfctl %cr25,ptp /* load user pgd */
mfsp %sr7,t0 /* Get current space */
- or,*= %r0,t0,%r0 /* If kernel, nullify following test */
- cmpb,*<>,n t0,spc,itlb_fault /* forward */
+ cmpb,<>,n t0,spc,itlb_user_fault_20w /* forward */
/* First level page table lookup */
@@ -1535,8 +1534,7 @@ itlb_miss_11:
mfctl %cr25,ptp /* load user pgd */
mfsp %sr7,t0 /* Get current space */
- or,= %r0,t0,%r0 /* If kernel, nullify following test */
- cmpb,<>,n t0,spc,itlb_fault /* forward */
+ cmpb,<>,n t0,spc,itlb_user_fault_11 /* forward */
/* First level page table lookup */
@@ -1551,6 +1549,10 @@ itlb_miss_common_11:
sh2addl t0,ptp,ptp
ldi _PAGE_ACCESSED,t1
ldw 0(ptp),pte
+
+ /* Running parallel, taken from below 'zdep0' */
+ zdep spc,30,15,prot /* create prot id from space */
+
bb,>=,n pte,_PAGE_PRESENT_BIT,itlb_fault
/* Check whether the "accessed" bit was set, otherwise do so */
@@ -1559,7 +1561,7 @@ itlb_miss_common_11:
and,<> t1,pte,%r0 /* test and nullify if already set */
stw t0,0(ptp) /* write back pte */
- zdep spc,30,15,prot /* create prot id from space */
+ /* zdep0 moved back */
dep pte,8,7,prot /* add in prot bits from pte */
extru,= pte,_PAGE_NO_CACHE_BIT,1,r0
@@ -1602,8 +1604,7 @@ itlb_miss_20:
mfctl %cr25,ptp /* load user pgd */
mfsp %sr7,t0 /* Get current space */
- or,= %r0,t0,%r0 /* If kernel, nullify following test */
- cmpb,<>,n t0,spc,itlb_fault /* forward */
+ cmpb,<>,n t0,spc,itlb_user_fault_20 /* forward */
/* First level page table lookup */
@@ -1882,6 +1883,37 @@ kernel_bad_space:
dbit_fault:
b intr_save
ldi 20,%r8
+
+/* The following three labels relate to an optimization in the itlb handler.
+ itlb_user_fault_20w:
+ itlb_user_fault_20:
+ itlb_user_fault_11:
+ We keep the CPU jumping fwd/bkwd in the common case, and the uncommon case
+ has the cmpb fail (no jump) and thus branch prediction failing. */
+
+#ifdef __LP64__
+itlb_user_fault_20w:
+ /* User tlb missed for other than his own space. Optimization. */
+ cmpb,= %r0,t0,itlb_miss_common_20w /* backward */
+ nop
+#else
+itlb_user_fault_20:
+ /* User tlb missed for other than his own space. Optimization. */
+ cmpb,= %r0,t0,itlb_miss_common_20 /* backward */
+ nop
+
+/* FALL THROUGH - We don't care if we run the test twice. If someone
+ asks to have the "user is faulting death" path optimal
+ then they should seek help. */
+
+itlb_user_fault_11:
+ /* User tlb missed for other than his own space. Optimization. */
+ cmpb,= %r0,t0,itlb_miss_common_11 /* backward */
+ nop
+#endif
+
+/* FALL THROUGH - We have a real itlb_fault from one of the above three
+ label sequences */
itlb_fault:
b intr_save