[parisc-linux-cvs] Patch for SMP support, etc.

John Marvin jsm@udlkern.fc.hp.com
Thu, 6 Sep 2001 03:48:16 -0600 (MDT)


A rather large patch that includes my current SMP support changes, plus
a variety of other fixes/changes.

John

--- arch/parisc/kernel/cache.c.old	Wed Aug 29 03:39:09 2001
+++ arch/parisc/kernel/cache.c	Wed Aug 29 04:37:37 2001
@@ -32,13 +32,28 @@ struct pdc_cache_info cache_info;
 static struct pdc_btlb_info btlb_info;
 #endif
 
+#ifdef CONFIG_SMP
+void
+flush_data_cache(void)
+{
+	smp_call_function((void (*)(void *))flush_data_cache_local, NULL, 1, 1);
+	flush_data_cache_local();
+}
+#endif
+
+void
+flush_cache_all_local(void)
+{
+	flush_instruction_cache_local();
+	flush_data_cache_local();
+}
+
 /* flushes EVERYTHING (tlb & cache) */
 
 void
 flush_all_caches(void)
 {
-	flush_instruction_cache();
-	flush_data_cache();
+	flush_cache_all();
 	flush_tlb_all();
 }
 
--- arch/parisc/kernel/smp.c.old	Wed Aug 29 03:39:09 2001
+++ arch/parisc/kernel/smp.c	Tue Sep  4 17:48:36 2001
@@ -53,7 +53,7 @@
 
 #define kDEBUG 0
 
-extern void _start(void);
+spinlock_t pa_dbit_lock = SPIN_LOCK_UNLOCKED;
 
 spinlock_t smp_lock = SPIN_LOCK_UNLOCKED;
 
@@ -139,7 +139,7 @@ halt_processor(void) 
 #else
 	/* REVISIT : redirect I/O Interrupts to another CPU? */
 	/* REVISIT : does PM *know* this CPU isn't available? */
-	clear_bit(smp_processor_id(), &cpu_callin_map);
+	clear_bit(smp_processor_id(), (void *)&cpu_callin_map);
 	__cli();
 	for (;;)
 		;
@@ -151,96 +151,110 @@ void
 ipi_interrupt(int irq, void *dev_id, struct pt_regs *regs) 
 {
 	int this_cpu = smp_processor_id();
-	unsigned long *ipis = &(cpu_data[this_cpu].pending_ipi);
+	struct cpuinfo_parisc *p = &cpu_data[this_cpu];
 	unsigned long ops;
+	unsigned long flags;
 
 	/* Count this now; we may make a call that never returns. */
-	cpu_data[this_cpu].ipi_count++;
+	p->ipi_count++;
 
 	mb();	/* Order interrupt and bit testing. */
 
-	while ((ops = xchg(ipis, 0)) != 0) {
-	  mb();	/* Order bit clearing and data access. */
-	  do {
-		unsigned long which = ffz(~ops);
+	for (;;) {
+		spin_lock_irqsave(&(p->lock),flags);
+		ops = p->pending_ipi;
+		p->pending_ipi = 0;
+		spin_unlock_irqrestore(&(p->lock),flags);
+
+		mb(); /* Order bit clearing and data access. */
 
-		switch (which) {
-		case IPI_RESCHEDULE:
+		if (!ops)
+		    break;
+
+		while (ops) {
+			unsigned long which = ffz(~ops);
+
+			switch (which) {
+			case IPI_RESCHEDULE:
 #if (kDEBUG>=100)
-			printk("CPU%d IPI_RESCHEDULE\n",this_cpu);
+				printk(KERN_DEBUG "CPU%d IPI_RESCHEDULE\n",this_cpu);
 #endif /* kDEBUG */
-			ops &= ~(1 << IPI_RESCHEDULE);
-			/* 
-			 * Reschedule callback.  Everything to be done is
-			 * done by the interrupt return path.  
-			 */
-			break;
-			
-		case IPI_CALL_FUNC: 
+				ops &= ~(1 << IPI_RESCHEDULE);
+				/*
+				 * Reschedule callback.  Everything to be
+				 * done is done by the interrupt return path.
+				 */
+				break;
+
+			case IPI_CALL_FUNC:
 #if (kDEBUG>=100)
-			printk("CPU%d IPI_CALL_FUNC\n",this_cpu);
+				printk(KERN_DEBUG "CPU%d IPI_CALL_FUNC\n",this_cpu);
 #endif /* kDEBUG */
-			ops &= ~(1 << IPI_CALL_FUNC);
-			{
-				volatile struct smp_call_struct *data;
-				void (*func)(void *info);
-				void *info;
-				int wait;
-
-				data = smp_call_function_data;
-				func = data->func;
-				info = data->info;
-				wait = data->wait;
-
-				mb();
-				atomic_dec (&data->unstarted_count);
-
-				/* At this point, *data may be gone unless wait is true.  */
-				(*func)(info);
-
-				/* Notify the sending CPU that the task is done.  */
-				mb();
-				if (wait) 
-					atomic_dec (&data->unfinished_count);
-			}
-			break;
+				ops &= ~(1 << IPI_CALL_FUNC);
+				{
+					volatile struct smp_call_struct *data;
+					void (*func)(void *info);
+					void *info;
+					int wait;
+
+					data = smp_call_function_data;
+					func = data->func;
+					info = data->info;
+					wait = data->wait;
+
+					mb();
+					atomic_dec (&data->unstarted_count);
+
+					/* At this point, *data can't
+					 * be relied upon.
+					 */
+
+					(*func)(info);
+
+					/* Notify the sending CPU that the
+					 * task is done.
+					 */
+					mb();
+					if (wait)
+						atomic_dec (&data->unfinished_count);
+				}
+				break;
 
-		case IPI_CPU_START:
+			case IPI_CPU_START:
 #if (kDEBUG>=100)
-			printk("CPU%d IPI_CPU_START\n",this_cpu);
+				printk(KERN_DEBUG "CPU%d IPI_CPU_START\n",this_cpu);
 #endif /* kDEBUG */
-			ops &= ~(1 << IPI_CPU_START);
+				ops &= ~(1 << IPI_CPU_START);
 #ifdef ENTRY_SYS_CPUS
-			cpu_data[this_cpu].state = STATE_RUNNING;
+				p->state = STATE_RUNNING;
 #endif
-			break;
+				break;
 
-		case IPI_CPU_STOP:
+			case IPI_CPU_STOP:
 #if (kDEBUG>=100)
-			printk("CPU%d IPI_CPU_STOP\n",this_cpu);
+				printk(KERN_DEBUG "CPU%d IPI_CPU_STOP\n",this_cpu);
 #endif /* kDEBUG */
-			ops &= ~(1 << IPI_CPU_STOP);
+				ops &= ~(1 << IPI_CPU_STOP);
 #ifdef ENTRY_SYS_CPUS
 #else
-			halt_processor();
+				halt_processor();
 #endif
-			break;
+				break;
 
-		case IPI_CPU_TEST:
+			case IPI_CPU_TEST:
 #if (kDEBUG>=100)
-			printk("CPU%d is alive!\n",this_cpu);
+				printk(KERN_DEBUG "CPU%d is alive!\n",this_cpu);
 #endif /* kDEBUG */
-			ops &= ~(1 << IPI_CPU_TEST);
-			break;
-
-		default:
-			printk(KERN_CRIT "Unknown IPI num on CPU%d: %lu\n", this_cpu, which);
-			ops &= ~(1 << which);
-			return;
-		} /* Switch */
-	  } while (ops);
+				ops &= ~(1 << IPI_CPU_TEST);
+				break;
 
-	  mb();	/* Order data access and bit testing. */
+			default:
+				printk(KERN_CRIT "Unknown IPI num on CPU%d: %lu\n",
+					this_cpu, which);
+				ops &= ~(1 << which);
+				return;
+			} /* Switch */
+		} /* while (ops) */
 	}
 	return;
 }
@@ -249,8 +263,13 @@ ipi_interrupt(int irq, void *dev_id, str
 static inline void
 ipi_send(int cpu, enum ipi_message_type op)
 {
-	cpu_data[cpu].pending_ipi |= 1 << op;
+	struct cpuinfo_parisc *p = &cpu_data[cpu];
+	unsigned long flags;
+
+	spin_lock_irqsave(&(p->lock),flags);
+	p->pending_ipi |= 1 << op;
 	gsc_writel(IRQ_OFFSET(IPI_IRQ), cpu_data[cpu].hpa);
+	spin_unlock_irqrestore(&(p->lock),flags);
 }
 
 
@@ -277,21 +296,6 @@ send_IPI_allbutself(enum ipi_message_typ
 	}
 }
 
-static inline void
-send_IPI_all(enum ipi_message_type op)
-{
-	int i;
-
-	for (i = 0; i < smp_num_cpus; i++)
-		send_IPI_single(__cpu_logical_map[i], op);
-}
-
-static inline void
-send_IPI_self(enum ipi_message_type op)
-{
-	send_IPI_single(smp_processor_id(), op);
-}
-
 inline void 
 smp_send_stop(void)	{ send_IPI_allbutself(IPI_CPU_STOP); }
 
@@ -328,19 +332,9 @@ smp_call_function (void (*func) (void *i
 	atomic_set(&data.unfinished_count, smp_num_cpus - 1);
 
 	if (retry) {
-		while (1) {
-			if (smp_call_function_data) {
-				schedule ();  /*  Give a mate a go  */
-				continue;
-			}
-			spin_lock (&lock);
-			if (smp_call_function_data) {
-				spin_unlock (&lock);  /*  Bad luck  */
-				continue;
-			}
-			/*  Mine, all mine!  */
-			break;
-		}
+		spin_lock (&lock);
+		while (smp_call_function_data != 0)
+			barrier();
 	}
 	else {
 		spin_lock (&lock);
@@ -362,15 +356,19 @@ smp_call_function (void (*func) (void *i
 		time_before (jiffies, timeout) )
 		barrier ();
 
+	/* We either got one or timed out. Release the lock */
+
+	mb();
+	smp_call_function_data = NULL;
 	if (atomic_read (&data.unstarted_count) > 0) {
-		smp_call_function_data = NULL;
+		printk(KERN_CRIT "SMP CALL FUNCTION TIMED OUT! (cpu=%d)\n",
+		      smp_processor_id());
 		return -ETIMEDOUT;
 	}
 
 	while (wait && atomic_read (&data.unfinished_count) > 0)
 			barrier ();
 
-	smp_call_function_data = NULL;
 	return 0;
 }
 
@@ -403,22 +401,18 @@ static int __init maxcpus(char *str)
 
 __setup("maxcpus=", maxcpus);
 
-
-void
-proxy_flush_tlb_all(void)
-{
-	flush_tlb_all();
-}
-
 /*
  * Flush all other CPU's tlb and then mine.  Do this with smp_call_function()
  * as we want to ensure all TLB's flushed before proceeding.
  */
+
+extern void flush_tlb_all_local(void);
+
 void
 smp_flush_tlb_all(void)
 {
-	smp_call_function((void (*)(void *))proxy_flush_tlb_all, NULL, 1, 1);
-	flush_tlb_all();
+	smp_call_function((void (*)(void *))flush_tlb_all_local, NULL, 1, 1);
+	flush_tlb_all_local();
 }
 
 
@@ -432,7 +426,6 @@ smp_setup_percpu_timer(int cpunum)
         cpu_data[cpunum].prof_multiplier = 1;
 }
 
-
 void 
 smp_do_timer(struct pt_regs *regs)
 {
@@ -447,7 +440,6 @@ smp_do_timer(struct pt_regs *regs)
 	}
 }
 
-
 /*
  * Called by secondaries to update state and initialize CPU registers.
  */
@@ -460,6 +452,8 @@ smp_cpu_init(int cpunum)
 	/* Set modes and Enable floating point coprocessor */
 	(void) init_per_cpu(cpunum);
 
+	disable_sr_hashing();
+
 	mb();
 
 	/* Well, support 2.4 linux scheme as well. */
@@ -467,7 +461,7 @@ smp_cpu_init(int cpunum)
 	{
 		extern void machine_halt(void); /* arch/parisc.../process.c */
 
-		printk("CPU#%d already initialized!\n", cpunum);
+		printk(KERN_CRIT "CPU#%d already initialized!\n", cpunum);
 		machine_halt();
 	}  
 
@@ -490,9 +484,21 @@ void __init smp_callin(void)
 {
 	extern void cpu_idle(void);	/* arch/parisc/kernel/process.c */
 	int slave_id = cpu_now_booting;
+	void *istack;
 
 	smp_cpu_init(slave_id);
 
+	istack = (void *)__get_free_pages(GFP_KERNEL,ISTACK_ORDER);
+	if (istack == NULL) {
+	    printk(KERN_CRIT "Failed to allocate interrupt stack for cpu %d\n",slave_id);
+	    BUG();
+	}
+
+	mtctl(istack,31);
+
+	flush_cache_all_local(); /* start with known state */
+	flush_tlb_all_local();
+
 	local_irq_enable();  /* Interrupts have been off until now */
 
 	/* Slaves wait here until Big Poppa daddy say "jump" */
@@ -505,9 +511,6 @@ void __init smp_callin(void)
 panic("smp_callin() AAAAaaaaahhhh....\n");
 }
 
-
-
-
 /*
  * Create the idle task for a new Slave CPU.  DO NOT use kernel_thread()
  * because that could end up calling schedule(). If it did, the new idle
@@ -602,7 +605,7 @@ static int smp_boot_one_cpu(int cpuid, i
 	init_tasks[cpunum] = NULL;
 	free_task_struct(idle);
 
-	printk("SMP: CPU:%d is stuck.\n", cpuid);
+	printk(KERN_CRIT "SMP: CPU:%d is stuck.\n", cpuid);
 	return -1;
 
 alive:
@@ -610,7 +613,7 @@ alive:
 	__cpu_number_map[cpuid] = cpunum;
 	/* Remember the Slave data */
 #if (kDEBUG>=100)
-	printk("SMP: CPU:%d (num %d) came alive after %ld _us\n",
+	printk(KERN_DEBUG "SMP: CPU:%d (num %d) came alive after %ld _us\n",
 		cpuid,  cpunum, timeout * 100);
 #endif /* kDEBUG */
 #ifdef ENTRY_SYS_CPUS
@@ -648,7 +651,7 @@ void __init smp_boot_cpus(void)
 	/* Setup BSP mappings */
 	__cpu_number_map[bootstrap_processor] = 0;
 	__cpu_logical_map[0] = bootstrap_processor;
-	printk("SMP: bootstrap CPU ID is %d\n",bootstrap_processor);
+	printk(KERN_DEBUG "SMP: bootstrap CPU ID is %d\n",bootstrap_processor);
 	init_task.processor = 0; 
 	current->processor = 0;	/*These are set already*/
 	current->active_mm = &init_mm;
@@ -666,7 +669,7 @@ void __init smp_boot_cpus(void)
 	}
 
 	if (max_cpus != -1) 
-		printk("Limiting CPUs to %d\n", max_cpus);
+		printk(KERN_INFO "Limiting CPUs to %d\n", max_cpus);
 
 	/* We found more than one CPU.... */
 	if (boot_cpu_data.cpu_count > 1) {
@@ -688,7 +691,7 @@ void __init smp_boot_cpus(void)
 		}
 	}
 	if (cpu_count == 1) {
-		printk("SMP: Bootstrap processor only.\n");
+		printk(KERN_INFO "SMP: Bootstrap processor only.\n");
 	}
 
 	printk(KERN_INFO "SMP: Total %d of %d processors activated "
@@ -811,50 +814,6 @@ int sys_cpus(int argc, char **argv)
 	return 0;
 }
 #endif /* ENTRY_SYS_CPUS */
-
-
-int sys_get_cpu(void)
-{
-	return(smp_processor_id());
-}
-
-
-int sys_ipi_send(int cpu,int op)
-{
-	if (cpu != NO_PROC_ID) {
-		if (cpu<0 || cpu > NR_CPUS)
-			goto sys_ipi_error;
-		if (__cpu_number_map[cpu] == NO_PROC_ID)
-			goto sys_ipi_error;
-	}
-	switch(op) {
-		case IPI_CALL_FUNC:
-			if(cpu != NO_PROC_ID) {
-				printk("Note:Ignoring cpuid:%d and calling function on all CPUS\n",cpu);
-			}
-			smp_flush_tlb_all();
-			break;
-		case IPI_RESCHEDULE: 
-		case IPI_CPU_TEST:
-		case IPI_CPU_START:
-		case IPI_CPU_STOP:
-		default:
-			if(cpu != NO_PROC_ID){
-				send_IPI_single(cpu,op);break;
-			} else {	
-				if(op == IPI_CPU_STOP){ /* we don't want to down this cpu also */
-					send_IPI_allbutself(op);
-				} else {
-					send_IPI_all(op);
-				}
-			}
-			break;
-	}
-	return 0;
-sys_ipi_error:
-	printk("CPU:%d cann't send IPI to CPU:%d\n",smp_processor_id(),cpu);
-	return -1;
-}
 
 #ifdef CONFIG_PROC_FS
 int __init
--- arch/parisc/kernel/entry.S.old	Wed Aug 29 03:39:09 2001
+++ arch/parisc/kernel/entry.S	Wed Sep  5 07:05:34 2001
@@ -25,11 +25,6 @@
 #include <linux/config.h>
 #include <asm/offset.h>
 
-/* the following is the setup i think we should follow:
- * whenever the CPU is interruptible, the following has to be true:
- *  CR30 is the kernel sp or 0 if we currently use the kernel stack
- *  CR31 is the kernel gp */ 
-
 /* we have the following possibilities to act on an interruption:
  *  - handle in assembly and use shadowed registers only
  *  - save registers to kernel stack and handle in assembly or C */
@@ -41,32 +36,40 @@
 #include <asm/signal.h>
 #include <asm/unistd.h>
 
-/* FIXME! asm_get_current macro has hardcoded dependency on kernel stack size */
-
 #ifdef __LP64__
 #define FRAME_SIZE	128
 #define CMPIB           cmpib,*
-
-	.macro  asm_get_current reg
-	depdi   0,63,14,\reg
-	.endm
+#define CMPB            cmpb,*
 
 	.level 2.0w
 #else
 #define FRAME_SIZE	64
 #define CMPIB           cmpib,
-	.macro  asm_get_current reg
-	depi    0,31,14,\reg
-	.endm
+#define CMPB            cmpb,
 
 	.level 2.0
 #endif
 
+	.import         pa_dbit_lock,data
+
+	/* space_to_prot macro creates a prot id from a space id */
+
+#if (SPACEID_SHIFT) == 0
+	.macro  space_to_prot spc prot
+	depd,z  \spc,62,31,\prot
+	.endm
+#else
+	.macro  space_to_prot spc prot
+	extrd,u \spc,(64 - (SPACEID_SHIFT)),32,\prot
+	.endm
+#endif
+
 	/* Switch to virtual mapping, trashing only %r1 */
 	.macro  virt_map
 	rsm     PSW_SM_Q,%r0
 	tovirt_r1 %r29
 	mfsp	%sr7, %r1
+	or,=    %r0,%r1,%r0 /* Only save sr7 in sr3 if sr7 != 0 */
 	mtsp	%r1, %sr3
 	mtsp	%r0, %sr4
 	mtsp	%r0, %sr5
@@ -88,12 +91,40 @@
 	.endm
 
 	/*
-	 * The get_stack macro is responsible for determining the
-	 * kernel stack value. If cr30 is zero then we are already
-	 * on the kernel stack, so we just use the existing sp.
-	 * Otherwise, a non-zero value in cr30 indicates we just
-	 * faulted in userland, and cr30 contains the value to use
-	 * for the kernel stack pointer.
+	 * The "get_stack" macros are responsible for determining the
+	 * kernel stack value.
+	 *
+	 * For Faults:
+	 *      If sr7 == 0
+	 *          Already using a kernel stack, so call the
+	 *          get_stack_use_r30 macro to push a pt_regs structure
+	 *          on the stack, and store registers there.
+	 *      else
+	 *          Need to set up a kernel stack, so call the
+	 *          get_stack_use_cr30 macro to set up a pointer
+	 *          to the pt_regs structure contained within the
+	 *          task pointer pointed to by cr30. Set the stack
+	 *          pointer to point to the end of the task structure.
+	 *
+	 * For Interrupts:
+	 *      If sr7 == 0
+	 *          Already using a kernel stack, check to see if r30
+	 *          is already pointing to the per processor interrupt
+	 *          stack. If it is, call the get_stack_use_r30 macro
+	 *          to push a pt_regs structure on the stack, and store
+	 *          registers there. Otherwise, call get_stack_use_cr31
+	 *          to get a pointer to the base of the interrupt stack
+	 *          and push a pt_regs structure on that stack.
+	 *      else
+	 *          Need to set up a kernel stack, so call the
+	 *          get_stack_use_cr30 macro to set up a pointer
+	 *          to the pt_regs structure contained within the
+	 *          task pointer pointed to by cr30. Set the stack
+	 *          pointer to point to the end of the task structure.
+	 *          N.B: We don't use the interrupt stack for the
+	 *          first interrupt from userland, because signals/
+	 *          resched's are processed when returning to userland,
+	 *          and we can sleep in those cases.
 	 *
 	 * Note that we use shadowed registers for temps until
 	 * we can save %r26 and %r29. %r26 is used to preserve
@@ -105,29 +136,48 @@
 	 * or handle_interruption. %r29 is used to hold a pointer
 	 * the register save area, and once again, it needs to
 	 * be a non-shadowed register so that it survives the rfir.
+	 *
+	 * N.B. TASK_SZ_ALGN and PT_SZ_ALGN include space for a stack frame.
 	 */
 
-	.macro	get_stack
-	/* TASK_SZ_ALGN and PT_SZ_ALGN include space for a stack frame */
-	mfctl	%cr30, %r1 
-	CMPIB=,n 0, %r1, 0f   /* forward so predicted not taken */
+	.macro  get_stack_use_cr30
 
 	/* we save the registers in the task struct */
 
+	mfctl   %cr30, %r1
 	tophys  %r1,%r9
 	ldo     TASK_REGS(%r9),%r9
 	STREG   %r30, PT_GR30(%r9)
-	STREG   %r1,  PT_CR30(%r9)
 	ldo	TASK_SZ_ALGN(%r1), %r30
-	b	1f		    /* unconditional so predicted taken */	
-	mtctl	%r0,%cr30
-0:
+	STREG   %r29,PT_GR29(%r9)
+	STREG   %r26,PT_GR26(%r9)
+	copy    %r9,%r29
+	copy    %r8,%r26
+	.endm
+
+	.macro  get_stack_use_cr31
+
+	/* We put a struct pt_regs on the per processor interrupt stack
+	 * pointed to by %cr31, and save the registers there.
+	 * N.B: Caller puts value of cr31 in %r1!
+	 */
+
+	tophys  %r1,%r9
+	STREG   %r30, PT_GR30(%r9)
+	ldo     PT_SZ_ALGN(%r1),%r30
+	STREG   %r29,PT_GR29(%r9)
+	STREG   %r26,PT_GR26(%r9)
+	copy    %r9,%r29
+	copy    %r8,%r26
+	.endm
+
+	.macro  get_stack_use_r30
+
 	/* we put a struct pt_regs on the stack and save the registers there */
+
 	tophys  %r30,%r9
-	ldo	PT_SZ_ALGN(%r30),%r30
 	STREG   %r30,PT_GR30(%r9)
-	STREG   %r0,PT_CR30(%r9)
-1:
+	ldo	PT_SZ_ALGN(%r30),%r30
 	STREG   %r29,PT_GR29(%r9)
 	STREG   %r26,PT_GR26(%r9)
 	copy    %r9,%r29
@@ -135,21 +185,9 @@
 	.endm
 
 	.macro  rest_stack
-	LDREG   PT_CR30(%r29), %r1
-	CMPIB=,n 0, %r1, 2f/* forward so predicted not taken */
-
-	/* we restore the registers out of the task struct */
-	mtctl	%r1, %cr30
 	LDREG   PT_GR1(%r29), %r1
 	LDREG   PT_GR30(%r29),%r30
-	b	3f
 	LDREG   PT_GR29(%r29),%r29
-2:
-	/* we take a struct pt_regs off the stack */
-	LDREG   PT_GR1(%r29),  %r1
-	LDREG   PT_GR29(%r29), %r29
-	ldo	-PT_SZ_ALGN(%r30), %r30
-3:
 	.endm
 
 	/* default interruption handler
@@ -164,6 +202,8 @@
 	 * (calls irq.c:do_cpu_irq_mask) */
 	.macro	extint code
 	mfctl   %cr23, %r8
+	mfctl   %cr15, %r9
+	and     %r8,%r9,%r8 /* Only process non masked interrupts */
 	b	intr_extint
 	mtctl   %r8, %cr23
 	.align	32
@@ -215,7 +255,6 @@
 	 */
 
 	.macro	itlb_20 code
-
 	mfctl	%pcsq, spc
 #ifdef __LP64__
 	b       itlb_miss_20w
@@ -374,11 +413,10 @@
 	 * fault_vector_11 and fault_vector_20 are on the
 	 * same page. This is only necessary as long as we
 	 * write protect the kernel text, which we may stop
-	 * doing once we use large parge translations to cover
+	 * doing once we use large page translations to cover
 	 * the static part of the kernel address space.
 	 */
 
-
 	.export fault_vector_20
 
 	.text
@@ -558,7 +596,6 @@ ret_from_kernel_thread:
 	.export	__execve, code
 __execve:
 	copy	%r2, %r15
-	copy	%r23, %r17
 	copy	%r30, %r16
 	ldo	PT_SZ_ALGN(%r30), %r30
 	STREG	%r26, PT_GR26(%r16)
@@ -570,12 +607,8 @@ __execve:
 	bl	sys_execve, %r2
 	copy	%r16, %r26
 
-	cmpib,<>,n 0,%r28,__execve_failed
-
-	b	intr_return
-	STREG	%r17, PT_CR30(%r16)
+	cmpib,=,n 0,%r28,intr_return    /* forward */
 
-__execve_failed:
 	/* yes, this will trap and die. */
 	copy	%r15, %r2
 	bv	%r0(%r2)
@@ -601,8 +634,9 @@ _switch_to:
 	LDREG	TASK_PT_KPC(%r25), %r2
 
 	STREG	%r30, TASK_PT_KSP(%r26)
-	bv	%r0(%r2)
 	LDREG	TASK_PT_KSP(%r25), %r30
+	bv	%r0(%r2)
+	mtctl   %r25,%cr30
 
 _switch_to_ret:
 	mtctl	%r0, %cr0		/* Needed for single stepping */
@@ -633,12 +667,10 @@ _switch_to_ret:
 
 	.export	syscall_exit_rfi
 syscall_exit_rfi:
-	copy    %r30,%r16
-	asm_get_current %r16
+	mfctl   %cr30,%r16
 	ldo	TASK_REGS(%r16),%r16
 	/* Force iaoq to userspace, as the user has had access to our current
-	 * context via sigcontext.
-	 * XXX do we need any other protection here?
+	 * context via sigcontext. Also Filter the PSW for the same reason.
 	 */
 	LDREG	PT_IAOQ0(%r16),%r19
 	depi	3,31,2,%r19
@@ -646,6 +678,19 @@ syscall_exit_rfi:
 	LDREG	PT_IAOQ1(%r16),%r19
 	depi	3,31,2,%r19
 	STREG	%r19,PT_IAOQ1(%r16)
+	LDREG   PT_PSW(%r16),%r19
+	ldil    L%USER_PSW_MASK,%r1
+	ldo     R%USER_PSW_MASK(%r1),%r1
+#ifdef __LP64__
+	ldil    L%USER_PSW_HI_MASK,%r20
+	ldo     R%USER_PSW_HI_MASK(%r20),%r20
+	depd    %r20,31,32,%r1
+#endif
+	and     %r19,%r1,%r19 /* Mask out bits that user shouldn't play with */
+	ldil    L%USER_PSW,%r1
+	ldo     R%USER_PSW(%r1),%r1
+	or      %r19,%r1,%r19 /* Make sure default USER_PSW bits are set */
+	STREG   %r19,PT_PSW(%r16)
 
 	/*
 	 * If we aren't being traced, we never saved space registers
@@ -669,16 +714,16 @@ syscall_exit_rfi:
 
 intr_return:
 
+	ssm     PSW_SM_I, %r0
+
 	/* Check for software interrupts */
 
 	.import irq_stat,data
 
 	ldil	L%irq_stat,%r19
 	ldo	R%irq_stat(%r19),%r19
-
 #ifdef CONFIG_SMP
-	copy	%r30,%r1
-	asm_get_current %r1
+	mfctl   %cr30,%r1
 	ldw	TASK_PROCESSOR(%r1),%r1 /* get cpu # - int */
 	/* shift left ____cacheline_aligned (aka L1_CACHE_BYTES) amount
 	** irq_stat[] is defined using ____cacheline_aligned.
@@ -697,15 +742,13 @@ intr_return:
 intr_check_resched:
 
 	/* check for reschedule */
-	copy    %r30,%r1
-	asm_get_current %r1
+	mfctl   %cr30,%r1
 	LDREG     TASK_NEED_RESCHED(%r1),%r19	/* sched.h: long need_resched */
 	CMPIB<>,n 0,%r19,intr_do_resched /* forward */
 
 intr_check_sig:
 	/* As above */
-	copy    %r30,%r1
-	asm_get_current %r1
+	mfctl   %cr30,%r1
 	ldw	TASK_SIGPENDING(%r1),%r19	/* sched.h: int sigpending */
 	cmpib,<>,n 0,%r19,intr_do_signal /* forward */
 
@@ -750,17 +793,20 @@ intr_do_softirq:
 	.import schedule,code
 intr_do_resched:
 	/* Only do reschedule if we are returning to user space */
-	LDREG     PT_SR7(%r16), %r20
+	LDREG   PT_IASQ0(%r16), %r20
+	CMPIB= 0,%r20,intr_restore /* backward */
+	nop
+	LDREG   PT_IASQ1(%r16), %r20
 	CMPIB= 0,%r20,intr_restore /* backward */
 	nop
 
+	bl      schedule,%r2
 #ifdef __LP64__
 	ldo	-16(%r30),%r29		/* Reference param save area */
+#else
+	nop
 #endif
-	bl      schedule,%r2
-	ssm     PSW_SM_I, %r0
 
-	/* It's OK to leave I bit on */
 	b       intr_return /* start over if we got a resched */
 	nop
 
@@ -788,11 +834,33 @@ intr_do_signal:
 
 	/*
 	 * External interrupts. r8 contains argument for do_cpu_irq_mask.
-	 * get_stack moves value of r8 to r26.
+	 * "get_stack" macros move the value of r8 to r26.
 	 */
 
 intr_extint:
-	get_stack
+	mfsp    %sr7,%r16
+	CMPIB=,n 0,%r16,1f
+	get_stack_use_cr30
+	b,n 3f
+
+1:
+#if 0  /* Interrupt Stack support not working yet! */
+	mfctl   %cr31,%r1
+	copy    %r30,%r17
+	/* FIXME! depi below has hardcoded idea of interrupt stack size (32k)*/
+#ifdef __LP64__
+	depdi   0,63,15,%r17
+#else
+	depi    0,31,15,%r17
+#endif
+	CMPB=,n     %r1,%r17,2f
+	get_stack_use_cr31
+	b,n 3f
+#endif
+2:
+	get_stack_use_r30
+
+3:
 	save_specials	%r29
 	virt_map
 	save_general	%r29
@@ -809,9 +877,9 @@ intr_extint:
 #endif
 
 	/*
-	** We need to either load the CPU's ID or IRQ region.
-	** Until we have a "per CPU" IRQ regions, this is easy.
-	*/
+	 * We need to either load the CPU's ID or IRQ region.
+	 * Until we have "per CPU" IRQ regions, this is easy.
+	 */
 	ldil		L%cpu_irq_region, %r25
 	ldo		R%cpu_irq_region(%r25), %r25
 
@@ -830,7 +898,15 @@ intr_extint:
 	.export         intr_save, code /* for os_hpmc */
 
 intr_save:
-	get_stack
+	mfsp    %sr7,%r16
+	CMPIB=,n 0,%r16,1f
+	get_stack_use_cr30
+	b,n 2f
+
+1:
+	get_stack_use_r30
+
+2:
 	save_specials	%r29
 
 	/* If this trap is a itlb miss, skip saving/adjusting isr/ior */
@@ -873,6 +949,7 @@ intr_save:
 	STREG           %r16, PT_ISR(%r29)
 	STREG           %r17, PT_IOR(%r29)
 
+
 skip_save_ior:
 	virt_map
 	save_general	%r29
@@ -893,8 +970,7 @@ skip_save_ior:
 	bl		handle_interruption,%r2
 	copy		%r25, %r16	/* save pt_regs */
 
-	b		intr_return
-	nop
+	b,n             intr_return
 
 	/*
 	 * Note for all tlb miss handlers:
@@ -926,7 +1002,6 @@ skip_save_ior:
 #ifdef __LP64__
 
 dtlb_miss_20w:
-
 	extrd,u         spc,63,7,t1     /* adjust va */
 	depd            t1,31,7,va      /* adjust va */
 	depdi           0,63,7,spc      /* adjust space */
@@ -966,7 +1041,7 @@ dtlb_miss_20w:
 	and,*<>         t1,pte,%r0      /* test and nullify if already set */
 	std             t0,0(ptp)       /* write back pte */
 
-	depd,z          spc,62,31,prot  /* create prot id from space */
+	space_to_prot   spc prot        /* create prot id from space */
 	depd            pte,8,7,prot    /* add in prot bits from pte */
 
 	extrd,u,*=      pte,_PAGE_USER_BIT+32,1,r0
@@ -1010,7 +1085,6 @@ dtlb_check_alias_20w:
 	nop
 
 nadtlb_miss_20w:
-
 	extrd,u         spc,63,7,t1     /* adjust va */
 	depd            t1,31,7,va      /* adjust va */
 	depdi           0,63,7,spc      /* adjust space */
@@ -1042,15 +1116,9 @@ nadtlb_miss_20w:
 	shladd           t0,3,ptp,ptp
 	ldi		_PAGE_ACCESSED,t1
 	ldd              0(ptp),pte
-	bb,>=,n          pte,_PAGE_PRESENT_BIT,nadtlb_emulate
-
-	/* Check whether the "accessed" bit was set, otherwise do so */
+	bb,>=,n          pte,_PAGE_PRESENT_BIT,nadtlb_check_flush_20w
 
-	or		t1,pte,t0	/* t0 has R bit set */
-	and,*<>         t1,pte,%r0      /* test and nullify if already set */
-	std             t0,0(ptp)       /* write back pte */
-
-	depd,z          spc,62,31,prot  /* create prot id from space */
+	space_to_prot   spc prot        /* create prot id from space */
 	depd            pte,8,7,prot    /* add in prot bits from pte */
 
 	extrd,u,*=      pte,_PAGE_USER_BIT+32,1,r0
@@ -1067,6 +1135,23 @@ nadtlb_miss_20w:
 	rfir
 	nop
 
+nadtlb_check_flush_20w:
+	bb,>=,n          pte,_PAGE_FLUSH_BIT,nadtlb_emulate
+
+	/* Insert a "flush only" translation */
+
+	depdi,z         7,7,3,prot
+	depdi           1,10,1,prot
+
+	/* Get rid of prot bits and convert to page addr for idtlbt */
+
+	depdi		0,63,12,pte
+	extrd,u         pte,56,32,pte
+	idtlbt          pte,prot
+
+	rfir
+	nop
+
 #else
 
 dtlb_miss_11:
@@ -1174,13 +1259,7 @@ nadtlb_miss_11:
 	sh2addl 	 t0,ptp,ptp
 	ldi		_PAGE_ACCESSED,t1
 	ldw		 0(ptp),pte
-	bb,>=,n          pte,_PAGE_PRESENT_BIT,nadtlb_emulate
-
-	/* Check whether the "accessed" bit was set, otherwise do so */
-
-	or		t1,pte,t0	/* t0 has R bit set */
-	and,<>		t1,pte,%r0	/* test and nullify if already set */
-	stw		t0,0(ptp)	/* write back pte */
+	bb,>=,n          pte,_PAGE_PRESENT_BIT,nadtlb_check_flush_11
 
 	zdep            spc,30,15,prot  /* create prot id from space */
 	dep             pte,8,7,prot    /* add in prot bits from pte */
@@ -1208,6 +1287,30 @@ nadtlb_miss_11:
 	rfir
 	nop
 
+nadtlb_check_flush_11:
+	bb,>=,n          pte,_PAGE_FLUSH_BIT,nadtlb_emulate
+
+	/* Insert a "flush only" translation */
+
+	zdepi           7,7,3,prot
+	depi            1,10,1,prot
+
+	/* Get rid of prot bits and convert to page addr for idtlba */
+
+	depi		0,31,12,pte
+	extru		pte,24,25,pte
+
+	mfsp		%sr1,t0  /* Save sr1 so we can use it in tlb inserts */
+	mtsp		spc,%sr1
+
+	idtlba		pte,(%sr1,va)
+	idtlbp		prot,(%sr1,va)
+
+	mtsp		t0, %sr1	/* Restore sr1 */
+
+	rfir
+	nop
+
 dtlb_miss_20:
 	mfctl           %cr25,ptp	/* Assume user space miss */
 	or,<>           %r0,spc,%r0	/* If it is user space, nullify */
@@ -1238,7 +1341,7 @@ dtlb_miss_20:
 	and,<>		t1,pte,%r0	/* test and nullify if already set */
 	stw		t0,0(ptp)	/* write back pte */
 
-	depd,z          spc,62,31,prot  /* create prot id from space */
+	space_to_prot   spc prot        /* create prot id from space */
 	depd            pte,8,7,prot    /* add in prot bits from pte */
 
 	extrd,u,*=      pte,_PAGE_USER_BIT+32,1,r0
@@ -1303,15 +1406,9 @@ nadtlb_miss_20:
 	sh2addl 	 t0,ptp,ptp
 	ldi		_PAGE_ACCESSED,t1
 	ldw		 0(ptp),pte
-	bb,>=,n          pte,_PAGE_PRESENT_BIT,nadtlb_emulate
-
-	/* Check whether the "accessed" bit was set, otherwise do so */
+	bb,>=,n          pte,_PAGE_PRESENT_BIT,nadtlb_check_flush_20
 
-	or		t1,pte,t0	/* t0 has R bit set */
-	and,<>		t1,pte,%r0	/* test and nullify if already set */
-	stw		t0,0(ptp)	/* write back pte */
-
-	depd,z          spc,62,31,prot  /* create prot id from space */
+	space_to_prot   spc prot        /* create prot id from space */
 	depd            pte,8,7,prot    /* add in prot bits from pte */
 
 	extrd,u,*=      pte,_PAGE_USER_BIT+32,1,r0
@@ -1327,6 +1424,23 @@ nadtlb_miss_20:
 
 	rfir
 	nop
+
+nadtlb_check_flush_20:
+	bb,>=,n          pte,_PAGE_FLUSH_BIT,nadtlb_emulate
+
+	/* Insert a "flush only" translation */
+
+	depdi,z         7,7,3,prot
+	depdi           1,10,1,prot
+
+	/* Get rid of prot bits and convert to page addr for idtlbt */
+
+	depdi		0,63,12,pte
+	extrd,u         pte,56,32,pte
+	idtlbt          pte,prot
+
+	rfir
+	nop
 #endif
 
 nadtlb_emulate:
@@ -1418,7 +1532,7 @@ itlb_miss_common_20w:
 	and,*<>         t1,pte,%r0      /* test and nullify if already set */
 	std             t0,0(ptp)       /* write back pte */
 
-	depd,z          spc,62,31,prot  /* create prot id from space */
+	space_to_prot   spc prot        /* create prot id from space */
 	depd            pte,8,7,prot    /* add in prot bits from pte */
 
 	extrd,u,*=      pte,_PAGE_USER_BIT+32,1,r0
@@ -1544,7 +1658,7 @@ itlb_miss_common_20:
 	and,<>		t1,pte,%r0	/* test and nullify if already set */
 	stw		t0,0(ptp)	/* write back pte */
 
-	depd,z          spc,62,31,prot  /* create prot id from space */
+	space_to_prot   spc prot        /* create prot id from space */
 	depd            pte,8,7,prot    /* add in prot bits from pte */
 
 	extrd,u,*=      pte,_PAGE_USER_BIT+32,1,r0
@@ -1570,7 +1684,6 @@ itlb_miss_kernel_20:
 #ifdef __LP64__
 
 dbit_trap_20w:
-
 	extrd,u         spc,63,7,t1     /* adjust va */
 	depd            t1,31,7,va      /* adjust va */
 	depdi           0,1,2,va        /* adjust va */
@@ -1601,6 +1714,18 @@ dbit_trap_20w:
 	/* Third level page table lookup */
 
 	shladd           t0,3,ptp,ptp
+#ifdef CONFIG_SMP
+	CMPIB=,n        0,spc,dbit_nolock_20w
+	ldil            L%PA(pa_dbit_lock),t0
+	ldo             R%PA(pa_dbit_lock)(t0),t0
+
+dbit_spin_20w:
+	ldcw            0(t0),t1
+	cmpib,=         0,t1,dbit_spin_20w
+	nop
+
+dbit_nolock_20w:
+#endif
 	ldi		(_PAGE_ACCESSED|_PAGE_DIRTY),t1
 	ldd              0(ptp),pte
 	bb,>=,n          pte,_PAGE_PRESENT_BIT,dbit_fault
@@ -1610,7 +1735,7 @@ dbit_trap_20w:
 	or		t1,pte,pte
 	std             pte,0(ptp)      /* write back pte */
 
-	depd,z          spc,62,31,prot  /* create prot id from space */
+	space_to_prot   spc prot        /* create prot id from space */
 	depd            pte,8,7,prot    /* add in prot bits from pte */
 
 	extrd,u,*=      pte,_PAGE_USER_BIT+32,1,r0
@@ -1623,6 +1748,13 @@ dbit_trap_20w:
 	depdi		0,63,12,pte
 	extrd,u         pte,56,32,pte
 	idtlbt          pte,prot
+#ifdef CONFIG_SMP
+	CMPIB=,n        0,spc,dbit_nounlock_20w
+	ldi             1,t1
+	stw             t1,0(t0)
+
+dbit_nounlock_20w:
+#endif
 
 	rfir
 	nop
@@ -1648,6 +1780,18 @@ dbit_trap_11:
 	/* Second level page table lookup */
 
 	sh2addl 	 t0,ptp,ptp
+#ifdef CONFIG_SMP
+	CMPIB=,n        0,spc,dbit_nolock_11
+	ldil            L%PA(pa_dbit_lock),t0
+	ldo             R%PA(pa_dbit_lock)(t0),t0
+
+dbit_spin_11:
+	ldcw            0(t0),t1
+	cmpib,=         0,t1,dbit_spin_11
+	nop
+
+dbit_nolock_11:
+#endif
 	ldi		(_PAGE_ACCESSED|_PAGE_DIRTY),t1
 	ldw		 0(ptp),pte
 	bb,>=,n 	 pte,_PAGE_PRESENT_BIT,dbit_fault
@@ -1672,13 +1816,20 @@ dbit_trap_11:
 	depi		0,31,12,pte
 	extru		pte,24,25,pte
 
-	mfsp		%sr1,t0  /* Save sr1 so we can use it in tlb inserts */
+	mfsp            %sr1,t1  /* Save sr1 so we can use it in tlb inserts */
 	mtsp		spc,%sr1
 
 	idtlba		pte,(%sr1,va)
 	idtlbp		prot,(%sr1,va)
 
-	mtsp		t0, %sr1     /* Restore sr1 */
+	mtsp            t1, %sr1     /* Restore sr1 */
+#ifdef CONFIG_SMP
+	CMPIB=,n        0,spc,dbit_nounlock_11
+	ldi             1,t1
+	stw             t1,0(t0)
+
+dbit_nounlock_11:
+#endif
 
 	rfir
 	nop
@@ -1703,6 +1854,18 @@ dbit_trap_20:
 	/* Second level page table lookup */
 
 	sh2addl 	 t0,ptp,ptp
+#ifdef CONFIG_SMP
+	CMPIB=,n        0,spc,dbit_nolock_20
+	ldil            L%PA(pa_dbit_lock),t0
+	ldo             R%PA(pa_dbit_lock)(t0),t0
+
+dbit_spin_20:
+	ldcw            0(t0),t1
+	cmpib,=         0,t1,dbit_spin_20
+	nop
+
+dbit_nolock_20:
+#endif
 	ldi		(_PAGE_ACCESSED|_PAGE_DIRTY),t1
 	ldw		 0(ptp),pte
 	bb,>=,n 	 pte,_PAGE_PRESENT_BIT,dbit_fault
@@ -1712,7 +1875,7 @@ dbit_trap_20:
 	or		t1,pte,pte
 	stw		pte,0(ptp)	/* write back pte */
 
-	depd,z          spc,62,31,prot  /* create prot id from space */
+	space_to_prot   spc prot        /* create prot id from space */
 	depd            pte,8,7,prot    /* add in prot bits from pte */
 
 	extrd,u,*=      pte,_PAGE_USER_BIT+32,1,r0
@@ -1725,6 +1888,13 @@ dbit_trap_20:
 	depdi		0,63,12,pte
 	extrd,u		pte,56,25,pte
 	idtlbt          pte,prot
+#ifdef CONFIG_SMP
+	CMPIB=,n        0,spc,dbit_nounlock_20
+	ldi             1,t1
+	stw             t1,0(t0)
+
+dbit_nounlock_20:
+#endif
 
 	rfir
 	nop
@@ -1875,7 +2045,6 @@ sys_clone_wrapper:
 
 	STREG	%r2,PT_GR19(%r1)	/* save for child */
 	STREG	%r30,PT_GR21(%r1)
-
 	bl	sys_clone,%r2
 	copy	%r1,%r24
 
@@ -2098,15 +2267,18 @@ syscall_check_sig:
 	cmpib,<>,n 0,%r19,syscall_do_signal  /* forward */
 
 syscall_restore:
-	/* disable interrupts while dicking with the kernel stack, */
-	/* or life can become unpleasant */
-	rsm	PSW_SM_I, %r20
 	LDREG	TASK_PTRACE(%r1), %r19		/* Are we being ptraced? */
 	bb,<,n	%r19,31,syscall_restore_rfi
-	LDREG	TASK_PT_GR20(%r1),%r19
-	mtctl	%r19, %cr31
+
+	ldo	TASK_PT_FR31(%r1),%r19		   /* reload fpregs */
+	rest_fp	%r19
+
+	LDREG	TASK_PT_SAR(%r1),%r19		   /* restore SAR */
+	mtsar	%r19
 
 	LDREG	TASK_PT_GR2(%r1),%r2		   /* restore user rp */
+	LDREG	TASK_PT_GR19(%r1),%r19
+	LDREG   TASK_PT_GR20(%r1),%r20
 	LDREG	TASK_PT_GR21(%r1),%r21
 	LDREG	TASK_PT_GR22(%r1),%r22
 	LDREG	TASK_PT_GR23(%r1),%r23
@@ -2116,30 +2288,19 @@ syscall_restore:
 	LDREG	TASK_PT_GR27(%r1),%r27	   /* restore user dp */
 	LDREG	TASK_PT_GR28(%r1),%r28	   /* syscall return value */
 	LDREG	TASK_PT_GR29(%r1),%r29
-	LDREG	TASK_PT_GR30(%r1),%r30	   /* restore user sp */
 	LDREG	TASK_PT_GR31(%r1),%r31	   /* restore syscall rp */
-	ldo	TASK_PT_FR31(%r1),%r19		   /* reload fpregs */
-	rest_fp	%r19
-	LDREG	TASK_PT_SAR(%r1),%r19		   /* restore SAR */
-	mtsar	%r19
-	LDREG	TASK_PT_GR19(%r1),%r19
 
-	mtctl	%r1,%cr30			   /* intrhandler okay. */
+	rsm     PSW_SM_I, %r0
+	LDREG   TASK_PT_GR30(%r1),%r30             /* restore user sp */
 	mfsp	%sr3,%r1			   /* Get users space id */
+	mtsp    %r1,%sr7                           /* Restore sr7 */
+	ssm     PSW_SM_I, %r0
 	mtsp	%r1,%sr4			   /* Restore sr4 */
 	mtsp	%r1,%sr5			   /* Restore sr5 */
 	mtsp	%r1,%sr6			   /* Restore sr6 */
 
 	depi	3,31,2,%r31			   /* ensure return to user mode. */
 
-	mtsm	%r20				   /* restore irq state  */
-	mfctl	%cr31,%r20
-	
-	/*
-	 * Due to a dependency in the tlb miss handlers on sr7, it
-	 * is essential that sr7 get set in the delay slot.
-	 */
-
 #ifdef __LP64__
 	/* Since we are returning to a 32 bit user process, we always
 	 * clear the W bit. This means that the be (and mtsp) gets
@@ -2148,10 +2309,11 @@ syscall_restore:
 	 * we won't clear the W bit, so the be will run in wide mode.
 	 */
 
+	be	0(%sr3,%r31)			   /* return to user space */
 	rsm	PSW_SM_W, %r0
+#else
+	be,n    0(%sr3,%r31)                       /* return to user space */
 #endif
-	be	0(%sr3,%r31)			   /* return to user space */
-	mtsp	%r1,%sr7			   /* Restore sr7 */
 
 	/* We have to return via an RFI, so that PSW T and R bits can be set
 	 * appropriately.
@@ -2195,7 +2357,6 @@ psw_setup:
 	bb,<	%r2,30,pt_regs_ok		   /* Branch if D set */
 	ldo	TASK_REGS(%r1),%r25
 	reg_save %r25				   /* Save r3 to r18 */
-	STREG	%r1,TASK_PT_CR30(%r1)
 	mfsp	%sr0,%r2
 	STREG	%r2,TASK_PT_SR0(%r1)
 	mfsp	%sr1,%r2
--- arch/parisc/kernel/pacache.S.old	Wed Aug 29 03:39:09 2001
+++ arch/parisc/kernel/pacache.S	Wed Aug 29 03:40:14 2001
@@ -46,9 +46,9 @@
 	.text
 	.align 128
 
-	.export __flush_tlb_all,code
+	.export flush_tlb_all_local,code
 
-__flush_tlb_all:
+flush_tlb_all_local:
 	.proc
 	.callinfo NO_CALLS
 	.entry
@@ -201,10 +201,10 @@ fdtdone:
 
 	.procend
 
-	.export flush_instruction_cache,code
+	.export flush_instruction_cache_local,code
 	.import cache_info,data
 
-flush_instruction_cache:
+flush_instruction_cache_local:
 	.proc
 	.callinfo NO_CALLS
 	.entry
@@ -241,10 +241,10 @@ fisync:
 
 	.procend
 
-	.export flush_data_cache,code
+	.export flush_data_cache_local,code
 	.import cache_info,data
 
-flush_data_cache:
+flush_data_cache_local:
 	.proc
 	.callinfo NO_CALLS
 	.entry
--- arch/parisc/kernel/time.c.old	Wed Aug 29 03:39:10 2001
+++ arch/parisc/kernel/time.c	Tue Sep  4 05:32:55 2001
@@ -38,13 +38,21 @@ extern rwlock_t xtime_lock;
 static long clocktick;	/* timer cycles per tick */
 static long halftick;
 
+#ifdef CONFIG_SMP
+extern void smp_do_timer(struct pt_regs *regs);
+#endif
+
 void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 {
 	long now = mfctl(16);
 	long next_tick;
+	int nticks;
+	int cpu = smp_processor_id();
 
 	/* initialize next_tick to time at last clocktick */
-	next_tick = cpu_data[smp_processor_id()].it_value;
+
+	next_tick = cpu_data[cpu].it_value;
+
 	/* since time passes between the interrupt and the mfctl()
 	 * above, it is never true that last_tick + clocktick == now.  If we
 	 * never miss a clocktick, we could set next_tick = last_tick + clocktick
@@ -52,28 +60,29 @@ void timer_interrupt(int irq, void *dev_
 	 *
 	 * Variables are *signed*.
 	 */
+
+	nticks = 0;
 	while((next_tick - now) < halftick) {
 		next_tick += clocktick;
-		/* supposedly (wall_jiffies - jiffies) = "lost" ticks */
-		/* but haven't checked it works for parisc */
-		/* lost++; */
+		nticks++;
 	}
 	mtctl(next_tick, 16);
-	cpu_data[smp_processor_id()].it_value = next_tick;
-#if 0
-	unsigned long now = mfctl(16);
-	/* this code will lose time by delaying the next clocktick by the */
-	/* amount of time between the interrupt and the mfctl(16) above */
-	cpu_data[smp_processor_id()].it_value = now;
+	cpu_data[cpu].it_value = next_tick;
 
-        now += clocktick;
-	mtctl(now ,16);
+	while (nticks--) {
+#ifdef CONFIG_SMP
+		smp_do_timer(regs);
 #endif
-
-	do_timer(regs);
+		if (cpu == 0) {
+			write_lock(&xtime_lock);
+			do_timer(regs);
+			write_unlock(&xtime_lock);
+		}
+	}
     
 #ifdef CONFIG_CHASSIS_LCD_LED
-	tasklet_schedule(&led_tasklet);
+	if (cpu == 0)
+		tasklet_schedule(&led_tasklet);
 #endif
 }
 
@@ -87,8 +96,11 @@ static inline unsigned long
 gettimeoffset (void)
 {
 #ifndef CONFIG_SMP
-	/* this might work for SMP but probably not -- should be ok on */
-	/* all CPUs running timer interrupts, which may only be monarch */
+	/*
+	 * this won't work on smp, because jiffies are updated by cpu 0,
+	 * so we can't use the cr16 value on another processor to get a
+	 * valid offset.
+	 */
 	long last_tick;
 	long elapsed_cycles;
 
@@ -167,11 +179,11 @@ void __init time_init(void)
 	clocktick = (100 * PAGE0->mem_10msec) / HZ;
 	halftick = clocktick / 2;
 
-	/* make the first timer interrupt go off in one second
-	** Used for bogomips calculation.
-	*/
-	cpu_data[smp_processor_id()].it_value = next_tick = mfctl(16);
-	next_tick += HZ * clocktick;
+	/* Setup clock interrupt timing */
+
+	next_tick = mfctl(16);
+	next_tick += clocktick;
+	cpu_data[smp_processor_id()].it_value = next_tick;
 
 	/* kick off Itimer (CR16) */
 	mtctl(next_tick, 16);
--- arch/parisc/kernel/init_task.c.old	Wed Aug 29 03:39:10 2001
+++ arch/parisc/kernel/init_task.c	Wed Aug 29 04:40:44 2001
@@ -19,6 +19,7 @@ struct mm_struct init_mm = INIT_MM(init_
  * way process stacks are handled. This is done by having a special
  * "init_task" linker map entry..
  */
+unsigned char interrupt_stack[ISTACK_SIZE] __attribute__ ((section("init_istack"), aligned(4096)));
 union task_union init_task_union 
 	__attribute__((section("init_task"), aligned(4096))) = { INIT_TASK(init_task_union.task) };
 
--- arch/parisc/kernel/head64.S.old	Wed Aug 29 03:39:10 2001
+++ arch/parisc/kernel/head64.S	Wed Aug 29 03:40:14 2001
@@ -127,6 +127,16 @@ $pgt_fill_loop:
 	/* And the stack pointer too */
 	load32		PA(init_task_union+TASK_SZ_ALGN),%sp
 
+	/* And the initial task pointer */
+
+	load32          init_task_union,%r6
+	mtctl           %r6,%cr30
+
+	/* And the interrupt stack */
+
+	load32          interrupt_stack,%r6
+	mtctl           %r6,%cr31
+
 	/* Act like PDC just called us - that's how slave CPUs enter */
 #define MEM_PDC_LO 0x388
 #define MEM_PDC_HI 0x35C
@@ -216,12 +226,6 @@ stext_pdc_ret:
 	/* Initialize the global data pointer */
 	load32		__gp,%dp
 
-	/* Clear the current task register.
-	** We need this to take interruptions directly after the rfi below
-	** (required for PA2.0 boxes)
-	*/
-	mtctl		%r0, %cr30
-
 	/* Set up our interrupt table.  HPMCs might not work after this! */
 	ldil		L%PA(fault_vector_20),%r10
 	ldo		R%PA(fault_vector_20)(%r10),%r10
@@ -305,6 +309,7 @@ smp_slave_stext:
 	/*  Initialize the SP - monarch sets up smp_init_current_idle_task */
 	load32		PA(smp_init_current_idle_task),%sp
 	ldd		0(%sp),%sp	/* load task address */
+	mtctl           %sp,%cr30       /* store in cr30 */
 	ldo             TASK_SZ_ALGN(%sp),%sp
 	tophys_r1       %sp
 
--- arch/parisc/kernel/head.S.old	Wed Aug 29 03:39:10 2001
+++ arch/parisc/kernel/head.S	Fri Aug 31 05:23:46 2001
@@ -117,9 +117,21 @@ $pgt_fill_loop:
 	ldil            L%start_kernel,%r11
 	ldo             R%start_kernel(%r11),%r11
 
+	/* And the initial task pointer */
+
+	ldil            L%init_task_union,%r6
+	ldo             R%init_task_union(%r6),%r6
+	mtctl           %r6,%cr30
+
 	/* And the stack pointer too */
-	ldil		L%init_task_union+TASK_SZ_ALGN,%sp
-	ldo		R%init_task_union+TASK_SZ_ALGN(%sp),%sp
+
+	ldo             TASK_SZ_ALGN(%r6),%sp
+
+	/* And the interrupt stack */
+
+	ldil            L%interrupt_stack,%r6
+	ldo             R%interrupt_stack(%r6),%r6
+	mtctl           %r6,%cr31
 
 #ifdef CONFIG_SMP
 	/* Set the smp rendevous address into page zero.
@@ -169,12 +181,6 @@ common_stext:
 	ldil		L%$global$,%dp
 	ldo		R%$global$(%dp),%dp
 
-	/* Clear the current task register.
-	** We need this to take interruptions directly after the rfi below
-	** (required for PA2.0 boxes)
-	*/
-	mtctl		%r0, %cr30
-
 	/*
 	 * Set up our interrupt table.  HPMCs might not work after this! 
 	 *
@@ -269,11 +275,8 @@ smp_slave_stext:
 	/*  Initialize the SP - monarch sets up smp_init_current_idle_task */
 	ldil		L%PA(smp_init_current_idle_task),%sp
 	ldo		R%PA(smp_init_current_idle_task)(%sp),%sp
-#ifdef __LP64__
-	ldd		0(%sp),%sp	/* load task address */
-#else
 	ldw		0(%sp),%sp	/* load task address */
-#endif
+	mtctl           %sp,%cr30       /* store in cr30 */
 	addil		L%TASK_SZ_ALGN,%sp	/* stack is above task */
 	ldo		R%TASK_SZ_ALGN(%r1),%sp
 
--- arch/parisc/kernel/superio.c.old	Wed Aug 29 03:39:10 2001
+++ arch/parisc/kernel/superio.c	Wed Aug 29 04:43:14 2001
@@ -103,7 +103,10 @@ superio_interrupt(int irq, void *devp, s
 	results = inb(IC_PIC1+0);
 
 	if ((results & 0x80) == 0) {
+#ifndef CONFIG_SMP
+		/* HACK: need to investigate why this happens if SMP enabled */
 		BUG(); /* This shouldn't happen */
+#endif
 		return;
 	}
 
--- arch/parisc/kernel/traps.c.old	Thu Sep  6 02:19:08 2001
+++ arch/parisc/kernel/traps.c	Wed Sep  5 07:06:33 2001
@@ -71,6 +71,8 @@ void show_regs(struct pt_regs *regs)
 	int i;
 	char buf[128], *p;
 	char *level;
+	unsigned long cr30;
+	unsigned long cr31;
 
 	level = user_mode(regs) ? KERN_DEBUG : KERN_CRIT;
 
@@ -106,11 +108,15 @@ void show_regs(struct pt_regs *regs)
 				regs->fr[i], i+1, regs->fr[i+1]);
 #endif
 
+	cr30 = mfctl(30);
+	cr31 = mfctl(31);
 	printk("%s\n", level);
 	printk("%sIASQ: " RFMT " " RFMT " IAOQ: " RFMT " " RFMT "\n",
 	       level, regs->iasq[0], regs->iasq[1], regs->iaoq[0], regs->iaoq[1]);
 	printk("%s IIR: %08lx    ISR: " RFMT "  IOR: " RFMT "\n",
 	       level, regs->iir, regs->isr, regs->ior);
+	printk("%s CPU: %8d   CR30: " RFMT " CR31: " RFMT "\n",
+	       level, ((struct task_struct *)cr30)->processor, cr30, cr31);
 	printk("%s ORIG_R28: " RFMT "\n", level, regs->orig_r28);
 }
 
@@ -179,7 +185,6 @@ void handle_break(unsigned iir, struct p
 	switch(iir) {
 	case 0x00:
 #ifdef PRINT_USER_FAULTS
-		cli();
 		printk(KERN_DEBUG "break 0,0: pid=%d command='%s'\n",
 		       current->pid, current->comm);
 #endif
@@ -219,7 +224,6 @@ void handle_break(unsigned iir, struct p
 #endif /* CONFIG_KWDB */
 
 	default:
-		set_eiem(0);
 #ifdef PRINT_USER_FAULTS
 		printk(KERN_DEBUG "break %#08x: pid=%d command='%s'\n",
 		       iir, current->pid, current->comm);
@@ -233,6 +237,7 @@ void handle_break(unsigned iir, struct p
 	}
 }
 
+
 int handle_toc(void)
 {
 	return 0;
@@ -368,6 +373,37 @@ void handle_interruption(int code, struc
 		return;
 
 	case  7:  /* I access rights or I protection Id trap */
+
+		/*
+		 * This could be caused by either: 1) a process attempting
+		 * to execute within a vma that does not have execute
+		 * permission, or 2) an access rights violation caused by a
+		 * flush only translation set up by ptep_get_and_clear().
+		 * So we check the vma permissions to differentiate the two.
+		 * If the vma indicates we have execute permission, then
+		 * the cause is the latter one. In this case, we need to
+		 * call do_page_fault() to fix the problem.
+		 */
+
+		if (user_mode(regs)) {
+			struct vm_area_struct *vma;
+
+			down_read(&current->mm->mmap_sem);
+			vma = find_vma(current->mm,regs->iaoq[0]);
+			if (vma && (regs->iaoq[0] >= vma->vm_start)
+				&& (vma->vm_flags & VM_EXEC)) {
+
+				fault_address = regs->iaoq[0];
+				fault_space = regs->iasq[0];
+
+				up_read(&current->mm->mmap_sem);
+				break; /* call do_page_fault() */
+			}
+			up_read(&current->mm->mmap_sem);
+		}
+
+		/* Fall Through */
+
 	case 27:  /* D protection Id trap */
 		die_if_kernel("Protection Id Trap", regs, code);
 		si.si_code = SEGV_MAPERR;
@@ -448,39 +484,89 @@ void handle_interruption(int code, struc
 	return;
 }
 
-void show_stack(struct pt_regs *regs)
+void dump_stack(unsigned long from, unsigned long to,int istackflag)
 {
+	unsigned int *fromptr;
+	unsigned int *toptr;
 
-#if 1
-	/* If regs->cr30 == 0, a kernel stack pointer is installed in sp */
+	fromptr = (unsigned int *)from;
+	toptr = (unsigned int *)to;
+
+	if (istackflag)
+		printk("\nDumping Interrupt Stack from %p to %p:\n",fromptr,toptr);
+	else
+		printk("\nDumping Stack from %p to %p:\n",fromptr,toptr);
+	while (fromptr < toptr) {
+		printk("%04lx %08x %08x %08x %08x %08x %08x %08x %08x\n",
+		    ((unsigned long)fromptr) & 0xffff,
+		    fromptr[0], fromptr[1], fromptr[2], fromptr[3],
+		    fromptr[4], fromptr[5], fromptr[6], fromptr[7]);
+		fromptr += 8;
+	}
+	return;
+}
 
-	if (regs->cr30 == 0) {
+void show_stack(struct pt_regs *regs)
+{
+#if 1
+	/* If regs->sr[7] == 0, we are on a kernel stack */
 
-	    unsigned long sp = regs->gr[30];
-	    unsigned int *stackptr;
-	    unsigned int *dumpptr;
+	if (regs->sr[7] == 0) {
 
-	    /* Stack Dump! */
+		unsigned long sp = regs->gr[30];
+		unsigned long cr30;
+		unsigned long cr31;
+		unsigned long stack_start;
+		struct pt_regs *int_regs;
+
+		cr30 = mfctl(30);
+		cr31 = mfctl(31);
+		stack_start = sp & ~(ISTACK_SIZE - 1);
+		if (stack_start == cr31) {
+
+		    /*
+		     * We are on the interrupt stack, get the stack
+		     * pointer from the first pt_regs structure on
+		     * the interrupt stack, so we can dump the task
+		     * stack first.
+		     */
+
+		    int_regs = (struct pt_regs *)cr31;
+		    sp = int_regs->gr[30];
+		    stack_start = sp & ~(INIT_TASK_SIZE - 1);
+		    if (stack_start != cr30)
+			printk("WARNING! Stack pointer and cr30 do not correspond!\n");
+		    dump_stack(stack_start,sp,0);
+
+		    printk("\n\nRegisters at Interrupt:\n");
+		    show_regs(int_regs);
+
+		    /* Now dump the interrupt stack */
+
+		    sp = regs->gr[30];
+		    stack_start = sp & ~(ISTACK_SIZE - 1);
+		    dump_stack(stack_start,sp,1);
+		}
+		else {
+		    /* Stack Dump! */
 
-	    stackptr = (unsigned int *)sp;
-	    dumpptr  = (unsigned int *)(sp & ~(INIT_TASK_SIZE - 1));
-	    printk("\nDumping Stack from %p to %p:\n",dumpptr,stackptr);
-	    while (dumpptr < stackptr) {
-		printk("%04lx %08x %08x %08x %08x %08x %08x %08x %08x\n",
-		    ((unsigned long)dumpptr) & 0xffff,
-		    dumpptr[0], dumpptr[1], dumpptr[2], dumpptr[3],
-		    dumpptr[4], dumpptr[5], dumpptr[6], dumpptr[7]);
-		dumpptr += 8;
-	    }
+		    stack_start = sp & ~(INIT_TASK_SIZE - 1);
+		    if (stack_start != cr30)
+			printk("WARNING! Stack pointer and cr30 do not correspond!\n");
+		    dump_stack(stack_start,sp,0);
+		}
 	}
 #endif
 }
 
+static spinlock_t terminate_lock = SPIN_LOCK_UNLOCKED;
 
 void parisc_terminate(char *msg, struct pt_regs *regs, int code, unsigned long offset)
 {
 	set_eiem(0);
-	cli();
+	__cli();
+
+	spin_lock(&terminate_lock);
 
 	/* restart pdc console if necessary */
 	if (!console_drivers)
@@ -494,6 +580,8 @@ void parisc_terminate(char *msg, struct 
 	printk("\n%s: Code=%d regs=%p (Addr=" RFMT ")\n",msg,code,regs,offset);
 	show_regs(regs);
 
+	spin_unlock(&terminate_lock);
+
 	for(;;)
 	    ;
 }
@@ -532,7 +620,6 @@ void transfer_pim_to_trap_frame(struct p
 	regs->iaoq[0] = pim_wide->cr[18];
 	regs->iaoq[1] = pim_wide->iaoq_back;
 
-	regs->cr30 = pim_wide->cr[30];
 	regs->sar  = pim_wide->cr[11];
 	regs->iir  = pim_wide->cr[19];
 	regs->isr  = pim_wide->cr[20];
@@ -557,7 +644,6 @@ void transfer_pim_to_trap_frame(struct p
 	regs->iaoq[0] = pim_narrow->cr[18];
 	regs->iaoq[1] = pim_narrow->iaoq_back;
 
-	regs->cr30 = pim_narrow->cr[30];
 	regs->sar  = pim_narrow->cr[11];
 	regs->iir  = pim_narrow->cr[19];
 	regs->isr  = pim_narrow->cr[20];
--- arch/parisc/kernel/syscall.S.old	Wed Aug 29 03:39:10 2001
+++ arch/parisc/kernel/syscall.S	Wed Sep  5 07:06:08 2001
@@ -61,13 +61,13 @@ set_thread_pointer:
 /* This address must remain fixed, or user binaries go splat. */
 	.align 256
 linux_gateway_entry:
-	mfsp	%sr7,%r1			/* we must set sr3 to the space */
-	mtsp	%r1,%sr3			/* of the user before the gate */
 	gate	.+8, %r0			/* become privileged */
 	mtsp	%r0,%sr4			/* get kernel space into sr4 */
 	mtsp	%r0,%sr5			/* get kernel space into sr5 */
 	mtsp	%r0,%sr6			/* get kernel space into sr6 */
-	mtsp	%r0,%sr7			/* get kernel space into sr7 */
+	mfsp    %sr7,%r1                        /* save user sr7 */
+	mtsp    %r1,%sr3                        /* and store it in sr3 */
+
 #ifdef __LP64__
 	/* for now we can *always* set the W bit on entry to the syscall
 	 * since we don't support wide userland processes.  We could
@@ -86,10 +86,21 @@ linux_gateway_entry:
 	depdi	0, 31, 32, %r22
 	depdi	0, 31, 32, %r21
 #endif
-	mtctl	%r28,%cr31
-	rsm	PSW_I, %r28			/* no ints for a bit  */
-	mfctl	%cr30,%r1			/* get the kernel task ptr */
-	mtctl	%r0,%cr30			/* zero it (flag) */
+	mfctl   %cr30,%r1
+	xor     %r1,%r30,%r30                   /* ye olde xor trick */
+	xor     %r1,%r30,%r1
+	xor     %r1,%r30,%r30
+	ldo     TASK_SZ_ALGN+FRAME_SIZE(%r30),%r30  /* set up kernel stack */
+
+	/* N.B.: It is critical that we don't set sr7 to 0 until r30
+	 *       contains a valid kernel stack pointer. It is also
+	 *       critical that we don't start using the kernel stack
+	 *       until after sr7 has been set to 0.
+	 */
+
+	mtsp	%r0,%sr7			/* get kernel space into sr7 */
+	STREG   %r1,TASK_PT_GR30-TASK_SZ_ALGN-FRAME_SIZE(%r30) /* save usp */
+	ldo     -TASK_SZ_ALGN-FRAME_SIZE(%r30),%r1   /* get task ptr in %r1 */
 
 	/* Save some registers for sigcontext and potential task
 	   switch (see entry.S for the details of which ones are
@@ -107,11 +118,9 @@ linux_gateway_entry:
 	STREG	%r25, TASK_PT_GR25(%r1)		/* 2nd argument */
 	STREG	%r26, TASK_PT_GR26(%r1)	 	/* 1st argument */
 	STREG	%r27, TASK_PT_GR27(%r1)		/* user dp */
-	mfctl	%cr31,%r19
-	STREG	%r19, TASK_PT_GR28(%r1)		/* return value 0 */
-	STREG	%r19, TASK_PT_ORIG_R28(%r1)	/* return value 0 (saved for signals) */
+	STREG   %r28, TASK_PT_GR28(%r1)         /* return value 0 */
+	STREG   %r28, TASK_PT_ORIG_R28(%r1)     /* return value 0 (saved for signals) */
 	STREG	%r29, TASK_PT_GR29(%r1)		/* return value 1 */
-	STREG	%r30, TASK_PT_GR30(%r1)		/* preserve userspace sp */
 	STREG	%r31, TASK_PT_GR31(%r1)		/* preserve syscall return ptr */
 	
 	ldo	TASK_PT_FR0(%r1), %r27		/* save fpregs from the kernel */
@@ -122,7 +131,6 @@ linux_gateway_entry:
 
 	loadgp
 
-	ldo	TASK_SZ_ALGN+FRAME_SIZE(%r1),%r30	/* set up kernel stack */
 #ifdef __LP64__
 	ldo	-16(%r30),%r29			/* Reference param save area */
 #else
@@ -134,7 +142,6 @@ linux_gateway_entry:
 
 	/* for some unknown reason, task_struct.ptrace is an unsigned long so use LDREG */
 	LDREG	TASK_PTRACE(%r1), %r19		/* Are we being ptraced? */
-	mtsm	%r28				/* irqs back  */
 
 	bb,<,n	%r19, 31, .Ltracesys		/* must match PT_PTRACE bit */
 	
@@ -197,7 +204,6 @@ tracesys:
 	ldo     -TASK_SZ_ALGN-FRAME_SIZE(%r30),%r1      /* get task ptr */
 	ssm	0,%r2
 	STREG	%r2,TASK_PT_PSW(%r1)		/* Lower 8 bits only!! */
-	STREG	%r1,TASK_PT_CR30(%r1)
 	mfsp	%sr0,%r2
 	STREG	%r2,TASK_PT_SR0(%r1)
 	mfsp	%sr1,%r2
--- arch/parisc/kernel/hpmc.S.old	Wed Aug 29 03:39:10 2001
+++ arch/parisc/kernel/hpmc.S	Wed Aug 29 03:40:14 2001
@@ -265,7 +265,6 @@ os_hpmc_5:
 
 	rsm 8,%r0           /* Clear Q bit */
 	ldi     1,%r8       /* Set trap code to "1" for HPMC */
-	mtctl   %r0,%cr30   /* Force interruptions to use hpmc stack */
 	ldil    L%PA(intr_save), %r1
 	ldo     R%PA(intr_save)(%r1), %r1
 	be      0(%sr7,%r1)
--- arch/parisc/kernel/binfmt_elf32.c.old	Wed Aug 29 03:39:10 2001
+++ arch/parisc/kernel/binfmt_elf32.c	Wed Aug 29 03:40:14 2001
@@ -14,6 +14,7 @@
 
 typedef unsigned int elf_greg_t;
 
+#include <linux/spinlock.h>
 #include <asm/processor.h>
 #include <linux/module.h>
 #include <linux/config.h>
--- arch/parisc/kernel/irq.c.old	Wed Aug 29 03:39:10 2001
+++ arch/parisc/kernel/irq.c	Wed Aug 29 05:02:49 2001
@@ -390,6 +390,7 @@ void do_irq(struct irqaction *action, in
 void do_cpu_irq_mask(unsigned long mask, struct irq_region *region, struct pt_regs *regs)
 {
 	unsigned long bit;
+	unsigned long orig_eiem;
 	int irq;
 
 #ifdef DEBUG_IRQ
@@ -397,6 +398,15 @@ void do_cpu_irq_mask(unsigned long mask,
 	    printk(KERN_DEBUG "do_irq_mask %08lx %p %p\n", mask, region, regs);
 #endif
 
+	/*
+	 * do_cpu_irq_mask is called with the PSW_I bit off. we don't
+	 * enable it until we've masked (cleared the bits in the eiem)
+	 * the current set of interrupts we are processing.
+	 */
+
+	orig_eiem = get_eiem();
+	set_eiem(orig_eiem & ~mask);
+	local_irq_enable();
 	for(bit=(1L<<MAX_CPU_IRQ), irq = 0; mask && bit; bit>>=1, irq++) {
 		int irq_num;
 		if(!(bit&mask))
@@ -404,11 +414,11 @@ void do_cpu_irq_mask(unsigned long mask,
 
 		mask &= ~bit;	/* clear bit in mask - can exit loop sooner */
 		irq_num = region->data.irqbase + irq;
-
-		CLEAR_EIEM_BIT(bit);	/* mask_cpu_irq(NULL, irq) */
 		do_irq(&region->action[irq], irq_num, regs);
-		SET_EIEM_BIT(bit);	/* unmask_cpu_irq(NULL, irq) */
 	}
+	set_eiem(orig_eiem);
+
+	/* Leaving with PSW_I bit set */
 }
 
 
--- arch/parisc/kernel/setup.c.old	Wed Aug 29 03:39:10 2001
+++ arch/parisc/kernel/setup.c	Wed Aug 29 06:27:08 2001
@@ -169,6 +169,9 @@ cpu_driver_callback(struct parisc_device
 	p->hpa = dev->hpa;	/* save CPU hpa */
 	p->cpuid = cpuid;	/* save CPU id */
 	p->txn_addr = txn_addr;	/* save CPU hpa */
+#ifdef CONFIG_SMP
+	p->lock = SPIN_LOCK_UNLOCKED;
+#endif
 
 	/*
 	** CONFIG_SMP: init_smp_config() will attempt to get CPU's into
@@ -267,7 +270,7 @@ void __init collect_boot_cpu_data(void)
 ** NEWS FLASH: wide kernels need FP coprocessor enabled to handle
 ** formatted printing of %lx for example (double divides I think)
 */
-static int __init
+int __init
 init_per_cpu(int cpuid)
 {
 	int ret;
--- arch/parisc/lib/lusercopy.S.old	Wed Aug 29 03:39:10 2001
+++ arch/parisc/lib/lusercopy.S	Wed Aug 29 03:40:14 2001
@@ -41,15 +41,8 @@
 	 * on the flag stored in the task structure.
 	 */
 
-	/* FIXME! depi below has hardcoded idea of kernel stack size */
-
 	.macro  get_sr
-	copy        %r30,%r1        ;! Get task structure
-#ifdef __LP64__
-	depdi       0,63,14,%r1     ;! into r1
-#else
-	depi        0,31,14,%r1     ;! into r1
-#endif
+	mfctl       %cr30,%r1
 	ldw         TASK_SEGMENT(%r1),%r22
 	mfsp        %sr3,%r1
 	or,<>       %r22,%r0,%r0
--- arch/parisc/lib/bitops.c.old	Wed Aug 29 03:39:10 2001
+++ arch/parisc/lib/bitops.c	Wed Aug 29 03:40:14 2001
@@ -60,25 +60,27 @@ unsigned long __xchg8(char x, char *ptr)
 
 
 #ifdef __LP64__
-unsigned long __cmpxchg_u64(volatile long *ptr, unsigned long old, unsigned long new)
+unsigned long __cmpxchg_u64(volatile unsigned long *ptr, unsigned long old, unsigned long new)
 {
 	unsigned long flags;
+	unsigned long prev;
 
 	SPIN_LOCK_IRQSAVE(ATOMIC_HASH(ptr), flags);
-	old = *ptr;
-	*ptr = new;
+	if ((prev = *ptr) == old)
+		*ptr = new;
 	SPIN_UNLOCK_IRQRESTORE(ATOMIC_HASH(ptr), flags);
-	return (old == new);
+	return prev;
 }
 #endif
 
-unsigned long __cmpxchg_u32(volatile int *ptr, int old, int new)  
+unsigned long __cmpxchg_u32(volatile unsigned int *ptr, unsigned int old, unsigned int new)
 {
 	unsigned long flags;
+	unsigned int prev;
 
 	SPIN_LOCK_IRQSAVE(ATOMIC_HASH(ptr), flags);
-	old = *ptr;
-	*ptr = new;
+	if ((prev = *ptr) == old)
+		*ptr = new;
 	SPIN_UNLOCK_IRQRESTORE(ATOMIC_HASH(ptr), flags);
-	return (old == new);
+	return (unsigned long)prev;
 }
--- arch/parisc/mm/init.c.old	Wed Aug 29 03:39:10 2001
+++ arch/parisc/mm/init.c	Wed Sep  5 07:07:17 2001
@@ -472,11 +472,9 @@ static void __init pagetable_init(void)
 	unsigned long ro_end;
 	unsigned long fv_addr;
 	int range;
-	extern  const int stext;
-	extern  int data_start;
 	extern  const unsigned long fault_vector_20;
 
-	ro_start = __pa((unsigned long)&stext);
+	ro_start = __pa((unsigned long)&_text);
 	ro_end   = __pa((unsigned long)&data_start);
 	fv_addr  = __pa((unsigned long)&fault_vector_20) & PAGE_MASK;
 
@@ -619,6 +617,8 @@ static void __init gateway_init(void)
 	return;
 }
 
+extern void flush_tlb_all_local(void);
+
 void __init paging_init(void)
 {
 	int i;
@@ -626,7 +626,8 @@ void __init paging_init(void)
 	setup_bootmem();
 	pagetable_init();
 	gateway_init();
-	flush_all_caches(); /* start with a known state */
+	flush_cache_all_local(); /* start with known state */
+	flush_tlb_all_local();
 
 	for (i = 0; i < npmem_ranges; i++) {
 		unsigned long zones_size[MAX_NR_ZONES] = { 0, 0, 0, };
@@ -686,15 +687,30 @@ badchunkmap2:
 #endif
 }
 
+#ifdef CONFIG_PA20
+
+/*
+ * Currently, all PA20 chips have 18 bit protection id's, which is the
+ * limiting factor (space ids are 32 bits).
+ */
+
+#define NR_SPACE_IDS 262144
+
+#else
+
 /*
  * Currently we have a one-to-one relationship between space id's and
  * protection id's. Older parisc chips (PCXS, PCXT, PCXL, PCXL2) only
  * support 15 bit protection id's, so that is the limiting factor.
- * We could do some type of run time initialization here, but let's
- * make sure there is a need first.
+ * PCXT' has 18 bit protection id's, but only 16 bit spaceids, so it's
+ * probably not worth the effort for a special case here.
  */
 
-#define NR_SPACE_IDS    32768
+#define NR_SPACE_IDS 32768
+
+#endif  /* !CONFIG_PA20 */
+
+#define RECYCLE_THRESHOLD (NR_SPACE_IDS / 2)
 #define SID_ARRAY_SIZE  (NR_SPACE_IDS / (8 * sizeof(long)))
 
 static unsigned long space_id[SID_ARRAY_SIZE] = { 1 }; /* disallow space 0 */
@@ -705,11 +721,10 @@ static unsigned long dirty_space_ids = 0
 
 static spinlock_t sid_lock = SPIN_LOCK_UNLOCKED;
 
-#define SPACEID_SHIFT (PAGE_SHIFT + (PT_NLEVELS)*(PAGE_SHIFT - PT_NLEVELS) - 32)
-
 unsigned long alloc_sid(void)
 {
 	unsigned long index;
+
 	spin_lock(&sid_lock);
 
 	if (free_space_ids == 0) {
@@ -752,11 +767,43 @@ void free_sid(unsigned long spaceid)
 	spin_unlock(&sid_lock);
 }
 
-/*
- * flush_tlb_all() calls recycle_sids(), since whenever the entire tlb is
- * purged, we can safely reuse the space ids that were released but
- * not flushed from the tlb.
- */
+
+#ifdef CONFIG_SMP
+static void get_dirty_sids(unsigned long *ndirtyptr,unsigned long *dirty_array)
+{
+	int i;
+
+	/* NOTE: sid_lock must be held upon entry */
+
+	*ndirtyptr = dirty_space_ids;
+	if (dirty_space_ids != 0) {
+	    for (i = 0; i < SID_ARRAY_SIZE; i++) {
+		dirty_array[i] = dirty_space_id[i];
+		dirty_space_id[i] = 0;
+	    }
+	    dirty_space_ids = 0;
+	}
+
+	return;
+}
+
+static void recycle_sids(unsigned long ndirty,unsigned long *dirty_array)
+{
+	int i;
+
+	/* NOTE: sid_lock must be held upon entry */
+
+	if (ndirty != 0) {
+		for (i = 0; i < SID_ARRAY_SIZE; i++) {
+			space_id[i] ^= dirty_array[i];
+		}
+
+		free_space_ids += ndirty;
+		space_id_index = 0;
+	}
+}
+
+#else /* CONFIG_SMP */
 
 static void recycle_sids(void)
 {
@@ -775,16 +822,53 @@ static void recycle_sids(void)
 		space_id_index = 0;
 	}
 }
+#endif
 
-extern void __flush_tlb_all(void);
+/*
+ * flush_tlb_all() calls recycle_sids(), since whenever the entire tlb is
+ * purged, we can safely reuse the space ids that were released but
+ * not flushed from the tlb.
+ */
+
+#ifdef CONFIG_SMP
+
+static unsigned long recycle_ndirty;
+static unsigned long recycle_dirty_array[SID_ARRAY_SIZE];
+static unsigned int recycle_inuse = 0;
+
+void flush_tlb_all(void)
+{
+	int do_recycle;
 
+	do_recycle = 0;
+	spin_lock(&sid_lock);
+	if (dirty_space_ids > RECYCLE_THRESHOLD) {
+	    if (recycle_inuse) {
+		BUG();  /* FIXME: Use a semaphore/wait queue here */
+	    }
+	    get_dirty_sids(&recycle_ndirty,recycle_dirty_array);
+	    recycle_inuse++;
+	    do_recycle++;
+	}
+	spin_unlock(&sid_lock);
+	smp_call_function((void (*)(void *))flush_tlb_all_local, NULL, 1, 1);
+	flush_tlb_all_local();
+	if (do_recycle) {
+	    spin_lock(&sid_lock);
+	    recycle_sids(recycle_ndirty,recycle_dirty_array);
+	    recycle_inuse = 0;
+	    spin_unlock(&sid_lock);
+	}
+}
+#else
 void flush_tlb_all(void)
 {
 	spin_lock(&sid_lock);
-	__flush_tlb_all();
+	flush_tlb_all_local();
 	recycle_sids();
 	spin_unlock(&sid_lock);
 }
+#endif
 
 #ifdef CONFIG_BLK_DEV_INITRD
 void free_initrd_mem(unsigned long start, unsigned long end)
--- arch/parisc/tools/offset.c.old	Wed Aug 29 03:39:10 2001
+++ arch/parisc/tools/offset.c	Wed Aug 29 05:23:17 2001
@@ -121,7 +121,6 @@ void output_task_ptreg_defines(void)
 	offset("#define TASK_PT_IAOQ0   ", struct task_struct, thread.regs.iaoq[0]);
 	offset("#define TASK_PT_IAOQ1   ", struct task_struct, thread.regs.iaoq[1]);
 	offset("#define TASK_PT_CR27    ", struct task_struct, thread.regs.cr27);
-	offset("#define TASK_PT_CR30    ", struct task_struct, thread.regs.cr30);
 	offset("#define TASK_PT_ORIG_R28 ", struct task_struct, thread.regs.orig_r28);
 	offset("#define TASK_PT_KSP     ", struct task_struct, thread.regs.ksp);
 	offset("#define TASK_PT_KPC     ", struct task_struct, thread.regs.kpc);
@@ -214,7 +213,6 @@ void output_ptreg_defines(void)
 	offset("#define PT_IAOQ0   ", struct pt_regs, iaoq[0]);
 	offset("#define PT_IAOQ1   ", struct pt_regs, iaoq[1]);
 	offset("#define PT_CR27    ", struct pt_regs, cr27);
-	offset("#define PT_CR30    ", struct pt_regs, cr30);
 	offset("#define PT_ORIG_R28 ", struct pt_regs, orig_r28);
 	offset("#define PT_KSP     ", struct pt_regs, ksp);
 	offset("#define PT_KPC     ", struct pt_regs, kpc);
--- arch/parisc/vmlinux.lds.old	Wed Aug 29 03:39:10 2001
+++ arch/parisc/vmlinux.lds	Wed Aug 29 05:24:41 2001
@@ -50,6 +50,7 @@ SECTIONS
 
 
   init_task BLOCK(16384) : { *(init_task) }  /* The initial task and kernel stack */
+  init_istack BLOCK(32768) : { *(init_istack) }  /* The initial interrupt stack */
 
   _edata = .;			/* End of data section */
 
--- arch/parisc/vmlinux64.lds.old	Wed Aug 29 03:39:10 2001
+++ arch/parisc/vmlinux64.lds	Wed Aug 29 03:40:14 2001
@@ -53,6 +53,7 @@ SECTIONS
 
 
   init_task BLOCK(16384) : { *(init_task) }  /* The initial task and kernel stack */
+  init_istack BLOCK(32768) : { *(init_task) }  /* The initial interrupt stack */
 
   _edata = .;			/* End of data section */
 
--- include/asm-parisc/assembly.h.old	Wed Aug 29 03:39:10 2001
+++ include/asm-parisc/assembly.h	Thu Sep  6 02:55:01 2001
@@ -158,11 +158,9 @@
 	/* r29 is saved in get_stack and used to point to saved registers */
 	/* r30 stack pointer saved in get_stack */
 	STREG %r31, PT_GR31(\regs)
-	SAVE_CR (%cr31, PT_CR27(\regs)) /* XXX: we shouldn't have to store cr31 at all, and this is the wrong place for it. */
 	.endm
 
 	.macro	rest_general	regs
-	REST_CR (%cr31, PT_CR27(\regs)) /* XXX: we shouldn't have to store cr31 at all, and this is the wrong place for it. */
 	/* r1 used as a temp in rest_stack and is restored there */
 	LDREG PT_GR2 (\regs), %r2
 	LDREG PT_GR3 (\regs), %r3
--- include/asm-parisc/cache.h.old	Wed Aug 29 03:39:10 2001
+++ include/asm-parisc/cache.h	Thu Sep  6 02:49:48 2001
@@ -28,9 +28,16 @@
 
 #define __cacheline_aligned __attribute__((__aligned__(L1_CACHE_BYTES)))
 
+extern void flush_data_cache_local(void);  /* flushes local data-cache only */
+extern void flush_instruction_cache_local(void); /* flushes local code-cache only */
+#ifdef CONFIG_SMP
+extern void flush_data_cache(void); /* flushes data-cache only (all processors) */
+#else
+#define flush_data_cache flush_data_cache_local
+#define flush_instruction_cache flush_instruction_cache_local
+#endif
+
 extern void cache_init(void);		/* initializes cache-flushing */
-extern void flush_data_cache(void);	/* flushes data-cache only */
-extern void flush_instruction_cache(void);/* flushes code-cache only */
 extern void flush_all_caches(void);     /* flush everything (tlb & cache) */
 extern int get_cache_info(char *);
 extern void flush_user_icache_range_asm(unsigned long, unsigned long);
--- include/asm-parisc/current.h.old	Wed Aug 29 03:39:10 2001
+++ include/asm-parisc/current.h	Thu Sep  6 02:49:48 2001
@@ -7,8 +7,10 @@ struct task_struct;
 
 static inline struct task_struct * get_current(void)
 {
-	register unsigned long sp asm ("%r30");
-	return (struct task_struct *)(sp & ~(THREAD_SIZE-1));
+	register unsigned long cr;
+
+	__asm__ __volatile__("mfctl %%cr30,%0" : "=r" (cr) );
+	return (struct task_struct *)cr;
 }
  
 #define current get_current()
--- include/asm-parisc/mmu_context.h.old	Wed Aug 29 03:39:10 2001
+++ include/asm-parisc/mmu_context.h	Thu Sep  6 02:49:48 2001
@@ -1,6 +1,8 @@
 #ifndef __PARISC_MMU_CONTEXT_H
 #define __PARISC_MMU_CONTEXT_H
 
+#include <asm/pgalloc.h>
+
 static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu)
 {
 }
@@ -33,8 +35,7 @@ static inline void switch_mm(struct mm_s
 
 	if (prev != next) {
 		mtctl(__pa(next->pgd), 25);
-		mtsp(next->context,3);
-		mtctl(next->context << 1,8);
+		load_context(next->context);
 	}
 }
 
--- include/asm-parisc/pgalloc.h.old	Wed Aug 29 03:39:10 2001
+++ include/asm-parisc/pgalloc.h	Thu Sep  6 02:49:48 2001
@@ -19,13 +19,23 @@ flush_page_to_ram(struct page *page)
 {
 }
 
-#define flush_cache_mm(mm)                     flush_cache_all()
+extern void flush_cache_all_local(void);
 
+#ifdef CONFIG_SMP
 static inline void flush_cache_all(void)
 {
-	flush_instruction_cache();
-	flush_data_cache();
+	smp_call_function((void (*)(void *))flush_cache_all_local, NULL, 1, 1);
+	flush_cache_all_local();
 }
+#else
+#define flush_cache_all flush_cache_all_local
+#endif
+
+#ifdef CONFIG_SMP
+#define flush_cache_mm(mm) flush_cache_all()
+#else
+#define flush_cache_mm(mm) flush_cache_all_local()
+#endif
 
 /* The following value needs to be tuned and probably scaled with the
  * cache size.
@@ -36,19 +46,27 @@ static inline void flush_cache_all(void)
 static inline void
 flush_user_dcache_range(unsigned long start, unsigned long end)
 {
+#ifdef CONFIG_SMP
+	flush_user_dcache_range_asm(start,end);
+#else
 	if ((end - start) < FLUSH_THRESHOLD)
 		flush_user_dcache_range_asm(start,end);
 	else
 		flush_data_cache();
+#endif
 }
 
 static inline void
 flush_user_icache_range(unsigned long start, unsigned long end)
 {
+#ifdef CONFIG_SMP
+	flush_user_icache_range_asm(start,end);
+#else
 	if ((end - start) < FLUSH_THRESHOLD)
 		flush_user_icache_range_asm(start,end);
 	else
 		flush_instruction_cache();
+#endif
 }
 
 static inline void
@@ -66,8 +84,7 @@ flush_cache_range(struct mm_struct *mm, 
 		flush_user_dcache_range(start,end);
 		flush_user_icache_range(start,end);
 	} else {
-		flush_data_cache();
-		flush_instruction_cache();
+		flush_cache_all();
 	}
 }
 
@@ -87,9 +104,10 @@ flush_cache_page(struct vm_area_struct *
 		if (vma->vm_flags & VM_EXEC)
 			flush_user_icache_range(vmaddr,vmaddr + PAGE_SIZE);
 	} else {
-		flush_data_cache();
 		if (vma->vm_flags & VM_EXEC)
-			flush_instruction_cache();
+			flush_cache_all();
+		else
+			flush_data_cache();
 	}
 }
 
@@ -111,6 +129,16 @@ static inline void flush_dcache_page(str
 
 extern void flush_tlb_all(void);
 
+static inline void load_context(mm_context_t context)
+{
+	mtsp(context, 3);
+#if SPACEID_SHIFT == 0
+	mtctl(context << 1,8);
+#else
+	mtctl(context >> (SPACEID_SHIFT - 1),8);
+#endif
+}
+
 /*
  * flush_tlb_mm()
  *
@@ -131,18 +159,16 @@ static inline void flush_tlb_mm(struct m
 {
 	if (mm == &init_mm) BUG(); /* Should never happen */
 
-#if 1
+#ifdef CONFIG_SMP
+	flush_tlb_all();
+#else
 	if (mm) {
 		if (mm->context != 0)
 			free_sid(mm->context);
 		mm->context = alloc_sid();
-		if (mm == current->active_mm) {
-			mtsp(mm->context, 3);
-			mtctl(mm->context << 1,8);
-		}
+		if (mm == current->active_mm)
+			load_context(mm->context);
 	}
-#else
-	flush_tlb_all();
 #endif
 }
 
--- include/asm-parisc/processor.h.old	Wed Aug 29 03:39:10 2001
+++ include/asm-parisc/processor.h	Thu Sep  6 02:49:48 2001
@@ -16,6 +16,7 @@
 #include <asm/pdc.h>
 #include <asm/ptrace.h>
 #include <asm/types.h>
+#include <asm/system.h>
 #endif /* __ASSEMBLY__ */
 
 /*
@@ -67,18 +68,20 @@ struct system_cpuinfo_parisc {
 struct cpuinfo_parisc {
 
 	struct irq_region *region;
-	unsigned long it_value; /* Interval Timer value at last timer Intr */
-	unsigned long it_delta; /* Interval Timer delta (tic_10ms / HZ * 100) */
-	unsigned long irq_count;	/* number of IRQ's since boot */
-	unsigned long irq_max_cr16;	/* longest time to handle a single IRQ */
-	unsigned long bh_count;		/* number of times bh was invoked */
-
-	unsigned long cpuid;	/* aka slot_number or set to NO_PROC_ID */
-	unsigned long hpa;	/* Host Physical address */
-	unsigned long txn_addr;	/* MMIO addr of EIR or id_eid */
-	unsigned long pending_ipi;      /* bitmap of type ipi_message_type */
-	unsigned long ipi_count;	/* number ipi Interrupts */
-	unsigned long prof_counter;	/* per CPU profiling support */
+	unsigned long it_value;     /* Interval Timer value at last timer Intr */
+	unsigned long it_delta;     /* Interval Timer delta (tic_10ms / HZ * 100) */
+	unsigned long irq_count;    /* number of IRQ's since boot */
+	unsigned long irq_max_cr16; /* longest time to handle a single IRQ */
+	unsigned long cpuid;        /* aka slot_number or set to NO_PROC_ID */
+	unsigned long hpa;          /* Host Physical address */
+	unsigned long txn_addr;     /* MMIO addr of EIR or id_eid */
+#ifdef CONFIG_SMP
+	spinlock_t lock;            /* synchronization for ipi's */
+	unsigned long pending_ipi;  /* bitmap of type ipi_message_type */
+	unsigned long ipi_count;    /* number ipi Interrupts */
+#endif
+	unsigned long bh_count;     /* number of times bh was invoked */
+	unsigned long prof_counter; /* per CPU profiling support */
 	unsigned long prof_multiplier;	/* per CPU profiling support */
 	unsigned long fp_rev;
 	unsigned long fp_model;
@@ -92,8 +95,6 @@ extern struct cpuinfo_parisc cpu_data[NR
 
 #define CPU_HVERSION ((boot_cpu_data.hversion >> 4) & 0x0FFF)
 
-extern void identify_cpu(struct cpuinfo_parisc *);
-
 extern int EISA_bus;
 
 #define MCA_bus 0
@@ -180,8 +181,6 @@ static inline unsigned long thread_saved
 	get_user(regs->gr[25],&sp[-1]); 		\
 	get_user(regs->gr[24],&sp[-2]); 		\
 	get_user(regs->gr[23],&sp[-3]); 		\
-							\
-	regs->cr30 = (u32) current;			\
 } while(0)
 
 /* The ELF abi wants things done a "wee bit" differently than
@@ -278,8 +277,6 @@ on downward growing arches, it looks lik
 	get_user(regs->gr[25], (argv - 1));		\
 	regs->gr[24] = argv;				\
 	regs->gr[23] = 0;				\
-							\
-	regs->cr30 = (u32) current;			\
 } while(0)
 
 struct task_struct;
--- include/asm-parisc/psw.h.old	Wed Aug 29 03:39:10 2001
+++ include/asm-parisc/psw.h	Wed Aug 29 03:40:14 2001
@@ -30,6 +30,10 @@
 #define	PSW_Z	0x40000000	/* PA1.x only */
 #define	PSW_Y	0x80000000	/* PA1.x only */
 
+#ifdef __LP64__
+#define PSW_HI_CB 0x000000ff    /* PA2.0 only */
+#endif
+
 /* PSW bits to be used with ssm/rsm */
 #define PSW_SM_I        0x1
 #define PSW_SM_D        0x2
@@ -46,10 +50,13 @@
 #  define USER_PSW      (PSW_C | PSW_Q | PSW_P | PSW_D | PSW_I)
 #  define KERNEL_PSW    (PSW_W | PSW_C | PSW_Q | PSW_P | PSW_D)
 #  define REAL_MODE_PSW (PSW_W | PSW_Q)
+#  define USER_PSW_MASK (PSW_W | PSW_T | PSW_N | PSW_X | PSW_B | PSW_V | PSW_CB)
+#  define USER_PSW_HI_MASK (PSW_HI_CB)
 #else
 #  define USER_PSW      (PSW_C | PSW_Q | PSW_P | PSW_D | PSW_I)
 #  define KERNEL_PSW    (PSW_C | PSW_Q | PSW_P | PSW_D)
 #  define REAL_MODE_PSW (PSW_Q)
+#  define USER_PSW_MASK (PSW_T | PSW_N | PSW_X | PSW_B | PSW_V | PSW_CB)
 #endif
 
 #endif
--- include/asm-parisc/ptrace.h.old	Wed Aug 29 03:39:10 2001
+++ include/asm-parisc/ptrace.h	Thu Sep  6 02:49:47 2001
@@ -8,7 +8,12 @@
 #include <linux/types.h>
 
 /* This struct defines the way the registers are stored on the 
-   stack during a system call. */
+ * stack during a system call.
+ *
+ * N.B. gdb/strace care about the size and offsets within this
+ * structure. If you change things, you may break object compatibility
+ * for those applications.
+ */
 
 struct pt_regs {
 	unsigned long gr[32];	/* PSW is in gr[0] */
@@ -17,7 +22,7 @@ struct pt_regs {
 	unsigned long iasq[2];
 	unsigned long iaoq[2];
 	unsigned long cr27;
-	unsigned long cr30;
+	unsigned long pad0;     /* available for other uses */
 	unsigned long orig_r28;
 	unsigned long ksp;
 	unsigned long kpc;
--- include/asm-parisc/bitops.h.old	Wed Aug 29 03:39:10 2001
+++ include/asm-parisc/bitops.h	Thu Sep  6 02:49:48 2001
@@ -28,50 +28,70 @@ static __inline__ int test_and_set_bit(i
 	unsigned long flags;
 
 	addr += (nr >> SHIFT_PER_LONG);
-	SPIN_LOCK_IRQSAVE(ATOMIC_HASH(addr), flags);
-
 	mask = 1L << CHOP_SHIFTCOUNT(nr);
+	SPIN_LOCK_IRQSAVE(ATOMIC_HASH(addr), flags);
 	oldbit = (*addr & mask) ? 1 : 0;
 	*addr |= mask;
-
 	SPIN_UNLOCK_IRQRESTORE(ATOMIC_HASH(addr), flags);
 
 	return oldbit;
 }
 
-static __inline__ int test_and_clear_bit(int nr, void * address)
+static __inline__ void set_bit(int nr, void * address)
 {
 	unsigned long mask;
 	unsigned long *addr = (unsigned long *) address;
-	int oldbit;
 	unsigned long flags;
 
 	addr += (nr >> SHIFT_PER_LONG);
+	mask = 1L << CHOP_SHIFTCOUNT(nr);
 	SPIN_LOCK_IRQSAVE(ATOMIC_HASH(addr), flags);
+	*addr |= mask;
+	SPIN_UNLOCK_IRQRESTORE(ATOMIC_HASH(addr), flags);
+}
 
+static __inline__ int test_and_clear_bit(int nr, void * address)
+{
+	unsigned long mask;
+	unsigned long *addr = (unsigned long *) address;
+	int oldbit;
+	unsigned long flags;
+
+	addr += (nr >> SHIFT_PER_LONG);
 	mask = 1L << CHOP_SHIFTCOUNT(nr);
+	SPIN_LOCK_IRQSAVE(ATOMIC_HASH(addr), flags);
 	oldbit = (*addr & mask) ? 1 : 0;
 	*addr &= ~mask;
-
 	SPIN_UNLOCK_IRQRESTORE(ATOMIC_HASH(addr), flags);
 
 	return oldbit;
 }
 
-static __inline__ int test_and_change_bit(int nr, void * address)
+static __inline__ void clear_bit(int nr, void * address)
 {
 	unsigned long mask;
 	unsigned long *addr = (unsigned long *) address;
-	int oldbit;
 	unsigned long flags;
 
 	addr += (nr >> SHIFT_PER_LONG);
+	mask = 1L << CHOP_SHIFTCOUNT(nr);
 	SPIN_LOCK_IRQSAVE(ATOMIC_HASH(addr), flags);
+	*addr &= ~mask;
+	SPIN_UNLOCK_IRQRESTORE(ATOMIC_HASH(addr), flags);
+}
+
+static __inline__ int test_and_change_bit(int nr, void * address)
+{
+	unsigned long mask;
+	unsigned long *addr = (unsigned long *) address;
+	int oldbit;
+	unsigned long flags;
 
+	addr += (nr >> SHIFT_PER_LONG);
 	mask = 1L << CHOP_SHIFTCOUNT(nr);
+	SPIN_LOCK_IRQSAVE(ATOMIC_HASH(addr), flags);
 	oldbit = (*addr & mask) ? 1 : 0;
 	*addr ^= mask;
-
 	SPIN_UNLOCK_IRQRESTORE(ATOMIC_HASH(addr), flags);
 
 	return oldbit;
@@ -92,6 +112,19 @@ static __inline__ int __test_and_change_
 	return oldbit;
 }
 
+static __inline__ void change_bit(int nr, void * address)
+{
+	unsigned long mask;
+	unsigned long *addr = (unsigned long *) address;
+	unsigned long flags;
+
+	addr += (nr >> SHIFT_PER_LONG);
+	mask = 1L << CHOP_SHIFTCOUNT(nr);
+	SPIN_LOCK_IRQSAVE(ATOMIC_HASH(addr), flags);
+	*addr ^= mask;
+	SPIN_UNLOCK_IRQRESTORE(ATOMIC_HASH(addr), flags);
+}
+
 /* see asm-i386/bitops.h */
 static __inline__ void __change_bit(int nr, void * address)
 {
@@ -99,7 +132,6 @@ static __inline__ void __change_bit(int 
 	unsigned long *addr = (unsigned long *) address;
 
 	addr += (nr >> SHIFT_PER_LONG);
-
 	mask = 1L << CHOP_SHIFTCOUNT(nr);
 	*addr ^= mask;
 }
@@ -117,12 +149,8 @@ static __inline__ int test_bit(int nr, c
 	return !!(*addr & mask);
 }
 
-/* sparc does this, other arch's don't -- what's the right answer? XXX */
-#define smp_mb__before_clear_bit()	do { } while(0)
-#define smp_mb__after_clear_bit()	do { } while(0)
-#define set_bit(nr,addr)	((void)test_and_set_bit(nr,addr))
-#define clear_bit(nr,addr)	((void)test_and_clear_bit(nr,addr))
-#define change_bit(nr,addr)	((void)test_and_change_bit(nr,addr))
+#define smp_mb__before_clear_bit()      smp_mb()
+#define smp_mb__after_clear_bit()       smp_mb()
 
 /* XXX We'd need some binary search here */
 
--- include/asm-parisc/atomic.h.old	Wed Aug 29 03:39:10 2001
+++ include/asm-parisc/atomic.h	Thu Sep  6 02:49:48 2001
@@ -114,8 +114,8 @@ static __inline__ unsigned long __xchg(u
 extern void __cmpxchg_called_with_bad_pointer(void);
 
 /* __cmpxchg_u32/u64 defined in arch/parisc/lib/bitops.c */
-extern unsigned long __cmpxchg_u32(volatile int *m, int old, int new);
-extern unsigned long __cmpxchg_u64(volatile long *ptr, unsigned long old, unsigned long new);
+extern unsigned long __cmpxchg_u32(volatile unsigned int *m, unsigned int old, unsigned int new);
+extern unsigned long __cmpxchg_u64(volatile unsigned long *ptr, unsigned long old, unsigned long new);
 
 /* don't worry...optimizer will get rid of most of this */
 static __inline__ unsigned long
@@ -123,9 +123,9 @@ __cmpxchg(volatile void *ptr, unsigned l
 {
 	switch(size) {
 #ifdef __LP64__
-	case 8: return __cmpxchg_u64(ptr, old, new);
+	case 8: return __cmpxchg_u64((unsigned long *)ptr, old, new);
 #endif
-	case 4: return __cmpxchg_u32((int *)ptr, (int) old, (int) new);
+	case 4: return __cmpxchg_u32((unsigned int *)ptr, (unsigned int) old, (unsigned int) new);
 	}
 	__cmpxchg_called_with_bad_pointer();
 	return old;
--- include/asm-parisc/pgtable.h.old	Wed Aug 29 03:39:10 2001
+++ include/asm-parisc/pgtable.h	Thu Sep  6 02:49:48 2001
@@ -8,8 +8,10 @@
  * we simulate an x86-style page table for the linux mm code
  */
 
+#include <linux/spinlock.h>
 #include <asm/processor.h>
 #include <asm/cache.h>
+#include <asm/bitops.h>
 
 #define ARCH_STACK_GROWSUP
 
@@ -46,9 +48,13 @@
 #define pgd_ERROR(e) \
 	printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
 
-/*
- * pgd entries used up by user/kernel:
- */
+ /* Note: If you change ISTACK_SIZE, you need to change the corresponding
+  * values in vmlinux.lds and vmlinux64.lds (init_istack section). Also,
+  * the "order" and size need to agree.
+  */
+
+#define  ISTACK_SIZE  32768 /* Interrupt Stack Size */
+#define  ISTACK_ORDER 3
 
 /*
  * NOTE: Many of the below macros use PT_NLEVELS because
@@ -68,6 +74,8 @@
 #define MAX_ADDRBITS (PAGE_SHIFT + (PT_NLEVELS)*(PAGE_SHIFT - PT_NLEVELS))
 #define MAX_ADDRESS (1UL << MAX_ADDRBITS)
 
+#define SPACEID_SHIFT (MAX_ADDRBITS - 32)
+
 /* Definitions for 1st level */
 
 #define PGDIR_SHIFT  (PAGE_SHIFT + (PT_NLEVELS - 1)*(PAGE_SHIFT - PT_NLEVELS))
@@ -91,6 +99,10 @@
 
 #define PTRS_PER_PTE    (1UL << (PAGE_SHIFT - PT_NLEVELS))
 
+/*
+ * pgd entries used up by user/kernel:
+ */
+
 #define FIRST_USER_PGD_NR	0
 
 #ifndef __ASSEMBLY__
@@ -101,36 +113,43 @@ extern  void *vmalloc_start;
 #define VMALLOC_END	(FIXADDR_START)
 #endif
 
-#define _PAGE_READ	0x001	/* read access allowed */
-#define _PAGE_WRITE	0x002	/* write access allowed */
-#define _PAGE_RW (_PAGE_READ | _PAGE_WRITE)
-#define _PAGE_EXEC	0x004	/* execute access allowed */
-#define _PAGE_GATEWAY	0x008	/* privilege promotion allowed */
-#define _PAGE_GATEWAY_BIT 28	/* _PAGE_GATEWAY & _PAGE_GATEWAY_BIT need */
-				/* to agree. One could be defined in relation */
-				/* to the other, but that's kind of ugly. */
-
-				/* 0x010 reserved (B bit) */
-#define _PAGE_DIRTY	0x020	/* D: dirty */
-				/* 0x040 reserved (T bit) */
-#define _PAGE_NO_CACHE  0x080   /* Software: Uncacheable */
-#define _PAGE_NO_CACHE_BIT 24   /* Needs to agree with _PAGE_NO_CACHE above */
-#define _PAGE_ACCESSED	0x100	/* R: page cache referenced */
-#define _PAGE_PRESENT   0x200   /* Software: pte contains a translation */
-#define _PAGE_PRESENT_BIT  22   /* Needs to agree with _PAGE_PRESENT above */
-#define _PAGE_USER      0x400   /* Software: User accessable page */
-#define _PAGE_USER_BIT     21   /* Needs to agree with _PAGE_USER above */
-				/* 0x800 still available */
-
-#ifdef __ASSEMBLY__
-#define _PGB_(x)	(1 << (63 - (x)))
-#define __PAGE_O	_PGB_(13)
-#define __PAGE_U	_PGB_(12)
-#define __PAGE_T	_PGB_(2)
-#define __PAGE_D	_PGB_(3)
-#define __PAGE_B	_PGB_(4)
-#define __PAGE_P	_PGB_(14)
-#endif
+/* NB: The tlb miss handlers make certain assumptions about the order */
+/*     of the following bits, so be careful (One example, bits 25-31  */
+/*     are moved together in one instruction).                        */
+
+#define _PAGE_READ_BIT     31   /* (0x001) read access allowed */
+#define _PAGE_WRITE_BIT    30   /* (0x002) write access allowed */
+#define _PAGE_EXEC_BIT     29   /* (0x004) execute access allowed */
+#define _PAGE_GATEWAY_BIT  28   /* (0x008) privilege promotion allowed */
+#define _PAGE_DMB_BIT      27   /* (0x010) Data Memory Break enable (B bit) */
+#define _PAGE_DIRTY_BIT    26   /* (0x020) Page Dirty (D bit) */
+#define _PAGE_REFTRAP_BIT  25   /* (0x040) Page Ref. Trap enable (T bit) */
+#define _PAGE_NO_CACHE_BIT 24   /* (0x080) Uncached Page (U bit) */
+#define _PAGE_ACCESSED_BIT 23   /* (0x100) Software: Page Accessed */
+#define _PAGE_PRESENT_BIT  22   /* (0x200) Software: translation valid */
+#define _PAGE_FLUSH_BIT    21   /* (0x400) Software: translation valid */
+				/*             for cache flushing only */
+#define _PAGE_USER_BIT     20   /* (0x800) Software: User accessable page */
+
+/* N.B. The bits are defined in terms of a 32 bit word above, so the */
+/*      following macro is ok for both 32 and 64 bit.                */
+
+#define xlate_pabit(x) (31 - x)
+
+#define _PAGE_READ     (1 << xlate_pabit(_PAGE_READ_BIT))
+#define _PAGE_WRITE    (1 << xlate_pabit(_PAGE_WRITE_BIT))
+#define _PAGE_RW       (_PAGE_READ | _PAGE_WRITE)
+#define _PAGE_EXEC     (1 << xlate_pabit(_PAGE_EXEC_BIT))
+#define _PAGE_GATEWAY  (1 << xlate_pabit(_PAGE_GATEWAY_BIT))
+#define _PAGE_DMB      (1 << xlate_pabit(_PAGE_DMB_BIT))
+#define _PAGE_DIRTY    (1 << xlate_pabit(_PAGE_DIRTY_BIT))
+#define _PAGE_REFTRAP  (1 << xlate_pabit(_PAGE_REFTRAP_BIT))
+#define _PAGE_NO_CACHE (1 << xlate_pabit(_PAGE_NO_CACHE_BIT))
+#define _PAGE_ACCESSED (1 << xlate_pabit(_PAGE_ACCESSED_BIT))
+#define _PAGE_PRESENT  (1 << xlate_pabit(_PAGE_PRESENT_BIT))
+#define _PAGE_FLUSH    (1 << xlate_pabit(_PAGE_FLUSH_BIT))
+#define _PAGE_USER     (1 << xlate_pabit(_PAGE_USER_BIT))
+
 #define _PAGE_TABLE	(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE |  _PAGE_DIRTY | _PAGE_ACCESSED)
 #define _PAGE_CHG_MASK	(PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
 #define _PAGE_KERNEL	(_PAGE_PRESENT | _PAGE_EXEC | _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY | _PAGE_ACCESSED)
@@ -151,6 +170,7 @@ extern  void *vmalloc_start;
 #define PAGE_KERNEL_RO	__pgprot(_PAGE_PRESENT | _PAGE_EXEC | _PAGE_READ | _PAGE_DIRTY | _PAGE_ACCESSED)
 #define PAGE_KERNEL_UNC	__pgprot(_PAGE_KERNEL | _PAGE_NO_CACHE)
 #define PAGE_GATEWAY    __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_GATEWAY| _PAGE_READ)
+#define PAGE_FLUSH      __pgprot(_PAGE_FLUSH)
 
 
 /*
@@ -197,7 +217,7 @@ extern unsigned long *empty_zero_page;
 
 #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
 
-#define pte_none(x)	(!pte_val(x))
+#define pte_none(x)     ((pte_val(x) == 0) || (pte_val(x) & _PAGE_FLUSH))
 #define pte_present(x)	(pte_val(x) & _PAGE_PRESENT)
 #define pte_clear(xp)	do { pte_val(*(xp)) = 0; } while (0)
 
@@ -331,16 +351,89 @@ extern void update_mmu_cache(struct vm_a
 
 /* Encode and de-code a swap entry */
 
-#define SWP_TYPE(x)                     ((x).val & 0x3f)
-#define SWP_OFFSET(x)                   ( (((x).val >> 6) &  0x7) | \
-					  (((x).val >> 7) & ~0x7) )
+#define SWP_TYPE(x)                     ((x).val & 0x1f)
+#define SWP_OFFSET(x)                   ( (((x).val >> 5) &  0xf) | \
+					  (((x).val >> 7) & ~0xf) )
 #define SWP_ENTRY(type, offset)         ((swp_entry_t) { (type) | \
-					    ((offset &  0x7) << 6) | \
-					    ((offset & ~0x7) << 7) })
+					    ((offset &  0xf) << 5) | \
+					    ((offset & ~0xf) << 7) })
 #define pte_to_swp_entry(pte)		((swp_entry_t) { pte_val(pte) })
 #define swp_entry_to_pte(x)		((pte_t) { (x).val })
 
-#include <asm-generic/pgtable.h>
+static inline int ptep_test_and_clear_young(pte_t *ptep)
+{
+#ifdef CONFIG_SMP
+	return test_and_clear_bit(xlate_pabit(_PAGE_ACCESSED_BIT), ptep);
+#else
+	pte_t pte = *ptep;
+	if (!pte_young(pte))
+		return 0;
+	set_pte(ptep, pte_mkold(pte));
+	return 1;
+#endif
+}
+
+static inline int ptep_test_and_clear_dirty(pte_t *ptep)
+{
+#ifdef CONFIG_SMP
+	return test_and_clear_bit(xlate_pabit(_PAGE_DIRTY_BIT), ptep);
+#else
+	pte_t pte = *ptep;
+	if (!pte_dirty(pte))
+		return 0;
+	set_pte(ptep, pte_mkclean(pte));
+	return 1;
+#endif
+}
+
+#ifdef CONFIG_SMP
+extern spinlock_t pa_dbit_lock;
+#else
+static int pa_dbit_lock; /* dummy to keep the compilers happy */
+#endif
+
+static inline pte_t ptep_get_and_clear(pte_t *ptep)
+{
+	pte_t old_pte;
+	pte_t pte;
+
+	spin_lock(&pa_dbit_lock);
+	pte = old_pte = *ptep;
+	pte_val(pte) &= ~_PAGE_PRESENT;
+	pte_val(pte) |= _PAGE_FLUSH;
+	set_pte(ptep,pte);
+	spin_unlock(&pa_dbit_lock);
+
+	return old_pte;
+}
+
+static inline void ptep_set_wrprotect(pte_t *ptep)
+{
+#ifdef CONFIG_SMP
+	unsigned long new, old;
+
+	do {
+		old = pte_val(*ptep);
+		new = pte_val(pte_wrprotect(__pte (old)));
+	} while (cmpxchg((unsigned long *) ptep, old, new) != old);
+#else
+	pte_t old_pte = *ptep;
+	set_pte(ptep, pte_wrprotect(old_pte));
+#endif
+}
+
+static inline void ptep_mkdirty(pte_t *ptep)
+{
+#ifdef CONFIG_SMP
+	set_bit(xlate_pabit(_PAGE_DIRTY_BIT), ptep);
+#else
+	pte_t old_pte = *ptep;
+	set_pte(ptep, pte_mkdirty(old_pte));
+#endif
+}
+
+#define pte_same(A,B)	(pte_val(A) == pte_val(B))
+
 
 #endif /* !__ASSEMBLY__ */
 
--- include/asm-parisc/smp.h.old	Tue Sep  4 04:57:18 2001
+++ include/asm-parisc/smp.h	Thu Sep  6 02:49:48 2001
@@ -18,28 +18,12 @@ typedef unsigned long address_t;
 /*
  *	Private routines/data
  */
- 
-extern int smp_found_config;
-extern unsigned long smp_alloc_memory(unsigned long mem_base);
-extern unsigned long smp_pdc_entry(unsigned long cpuid);
-extern void smp_pdc_setup(int cpuid,unsigned long entry_point);
-extern unsigned char boot_cpu_id;
-extern unsigned long cpu_present_map;
-extern unsigned long cpu_online_map;
 extern volatile int __cpu_number_map[NR_CPUS];
 static inline int cpu_number_map(int cpu)
 {
 	return __cpu_number_map[cpu];
 }
-extern volatile unsigned long smp_invalidate_needed;
-extern void smp_flush_tlb(void);
-extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs);
 extern void smp_send_reschedule(int cpu);
-extern unsigned long ipi_count;
-extern void smp_invalidate_rcv(void);		/* Process an NMI */
-extern void smp_local_timer_interrupt(struct pt_regs * regs);
-extern void (*mtrr_hook) (void);
-extern void setup_APIC_clock (void);
 extern volatile int __cpu_logical_map[NR_CPUS];
 static inline int cpu_logical_map(int cpu)
 {
@@ -48,15 +32,11 @@ static inline int cpu_logical_map(int cp
 
 #endif /* !ASSEMBLY */
 
-
 /*
  *	This magic constant controls our willingness to transfer
- *	a process across CPUs. Such a transfer incurs misses on the L1
- *	cache, and on a P6 or P5 with multiple L2 caches L2 hits. My
- *	gut feeling is this will vary by board in value. For a board
- *	with separate L2 cache it probably depends also on the RSS, and
- *	for a board with shared L2 cache it ought to decay fast as other
- *	processes are run.
+ *      a process across CPUs. Such a transfer incurs cache and tlb
+ *      misses. The current value is inherited from i386. Still needs
+ *      to be tuned for parisc.
  */
  
 #define PROC_CHANGE_PENALTY	15		/* Schedule penalty */
@@ -68,11 +48,6 @@ static inline int cpu_logical_map(int cp
 #define STATE_RUNNING				2
 #define STATE_HALTED				3
 #endif
-
-
-#define IPI_FUNC_FLUSHTLB       0 
-#define IPI_FUNC_PROBE 	        1
-#define IPI_FUNC_PRINTREGS      2
 
 #define smp_processor_id() (current->processor)
 
--- mm/vmscan.c.old	Wed Aug 29 03:39:10 2001
+++ mm/vmscan.c	Wed Aug 29 04:33:22 2001
@@ -111,11 +111,7 @@ static void try_to_swap_out(struct mm_st
 	 * is needed on CPUs which update the accessed and dirty
 	 * bits in hardware.
 	 */
-#ifdef __hppa__
-	pte = *page_table; /* Add ptep_get() if we stick with this fix */
-#else
 	pte = ptep_get_and_clear(page_table);
-#endif
 	flush_tlb_page(vma, address);
 
 	/*
@@ -154,7 +150,6 @@ drop_pte:
 	 * some real work in the future in "refill_inactive()".
 	 */
 	flush_cache_page(vma, address);
-	pte_clear(page_table);
 	if (!pte_dirty(pte))
 		goto drop_pte;
 
--- Documentation/parisc/registers.old	Tue Apr 17 15:37:26 2001
+++ Documentation/parisc/registers	Thu Sep  6 03:31:04 2001
@@ -25,8 +25,8 @@ CR26 (TR 2)			not used
 CR27 (TR 3)			Thread descriptor pointer
 CR28 (TR 4)			not used
 CR29 (TR 5)			not used
-CR30 (TR 6)			current / 0
-CR31 (TR 7)			Temporary register, used in various places
+CR30 (TR 6)			current
+CR31 (TR 7)			interrupt stack base
 
 	Space Registers (kernel mode)
 
--- Makefile.old	Wed Sep  5 07:09:45 2001
+++ Makefile	Wed Sep  5 07:09:53 2001
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 4
 SUBLEVEL = 9
-EXTRAVERSION = -pa15
+EXTRAVERSION = -pa16
 
 KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION)