[parisc-linux-cvs] How to get an SMP machine stable [HACK]

Matthew Wilcox willy@ldl.fc.hp.com
Thu, 22 Feb 2001 02:00:14 -0700


This is a diff against my tree.  This appears stable on a 4-way SMP
system.  Note that this is a hack.  Processes are currently constrained to
never move off the CPU they started on [jsm's idea, my implementation].
Clearly there is something wrong and it needs to be fixed properly.
But if anyone wants to boot an SMP machine today, apply this diff and
I claim it should work.

Index: arch/parisc/kernel/smp.c
===================================================================
RCS file: /home/cvs/parisc/linux/arch/parisc/kernel/smp.c,v
retrieving revision 1.7
diff -u -p -r1.7 smp.c
--- smp.c	2001/02/13 22:04:41	1.7
+++ smp.c	2001/02/22 08:51:18
@@ -154,8 +154,8 @@ ipi_interrupt(int irq, void *dev_id, str
 	cpu_data[this_cpu].ipi_count++;
 
 	mb();	/* Order interrupt and bit testing. */
-printk("ipiIRQ() CPU%d  %lx\n", this_cpu, *ipis);
-mdelay(200);
+//printk("ipiIRQ() CPU%d  %lx\n", this_cpu, *ipis);
+//mdelay(200);
 
 	while ((ops = xchg(ipis, 0)) != 0) {
 	  mb();	/* Order bit clearing and data access. */
@@ -165,7 +165,7 @@ mdelay(200);
 		switch (which) {
 		case IPI_RESCHEDULE:
 #if (kDEBUG>=100)
-			printk("CPU%d IPI_RESCHEDULE\n",this_cpu);
+//			printk("CPU%d IPI_RESCHEDULE\n",this_cpu);
 #endif /* kDEBUG */
 			ops &= ~(1 << IPI_RESCHEDULE);
 			/* 
@@ -176,7 +176,7 @@ mdelay(200);
 			
 		case IPI_CALL_FUNC: 
 #if (kDEBUG>=100)
-			printk("CPU%d IPI_CALL_FUNC\n",this_cpu);
+//			printk("CPU%d IPI_CALL_FUNC\n",this_cpu);
 #endif /* kDEBUG */
 			ops &= ~(1 << IPI_CALL_FUNC);
 			{
@@ -483,6 +483,13 @@ smp_cpu_init(int cpunum)
 		machine_halt();
 	}  
 
+	/* Initialise the idle task for this CPU */
+	atomic_inc(&init_mm.mm_count);
+	current->active_mm = &init_mm;
+	if(current->mm)
+		BUG();
+//	enter_lazy_tlb(&init_mm, current, nr);
+
 	init_IRQ();   /* make sure no IRQ's are enabled or pending */
 }
 
@@ -497,11 +504,10 @@ void __init smp_callin(unsigned long pdc
 	int slave_id = cpu_now_booting;
 
 	smp_cpu_init(slave_id);
-	current->active_mm = &init_mm;
 
 	local_irq_enable();  /* Interrupts have been off until now */
 
-	/* Slave's wait here until Big Poppa daddy say "jump" */
+	/* Slaves wait here until Big Poppa daddy say "jump" */
 	mb();	/* PARANOID */
 	while (!smp_commenced) ;
 	mb();	/* PARANOID */
@@ -670,10 +676,10 @@ void __init smp_boot_cpus(void)
 	/* Setup BSP mappings */
 	__cpu_number_map[bootstrap_processor] = 0;
 	__cpu_logical_map[0] = bootstrap_processor;
-        printk("SMP: bootstrap CPU ID is %d\n",bootstrap_processor);
+	printk("SMP: bootstrap CPU ID is %d\n",bootstrap_processor);
 	init_task.processor = 0; 
 	current->processor = 0;	/*These are set already*/
-        current->active_mm = &init_mm;
+	current->active_mm = &init_mm;
 	cpu_callin_map = 1; /* Mark Boostrap processor as present */
 
 #ifdef ENTRY_SYS_CPUS
Index: include/asm-parisc/mmu_context.h
===================================================================
RCS file: /home/cvs/parisc/linux/include/asm-parisc/mmu_context.h,v
retrieving revision 1.11
diff -u -p -r1.11 mmu_context.h
--- mmu_context.h	2001/02/07 14:19:50	1.11
+++ mmu_context.h	2001/02/22 08:51:19
@@ -18,6 +18,7 @@ init_new_context(struct task_struct *tsk
 	    BUG();
 
 	mm->context = alloc_sid();
+	printk("init_new_context: pid %d got sid %ld\n", tsk->pid, mm->context);
 	return 0;
 }
 
@@ -52,6 +53,7 @@ static inline void activate_mm(struct mm
 
 	if (next->context == 0)
 	    next->context = alloc_sid();
+	printk("process %d has sid %ld\n", current->pid, next->context);
 
 	switch_mm(prev,next,current,0);
 }
Index: include/asm-parisc/pgalloc.h
===================================================================
RCS file: /home/cvs/parisc/linux/include/asm-parisc/pgalloc.h,v
retrieving revision 1.26
diff -u -p -r1.26 pgalloc.h
--- pgalloc.h	2001/02/13 11:46:14	1.26
+++ pgalloc.h	2001/02/22 08:51:19
@@ -136,6 +136,7 @@ static inline void flush_tlb_mm(struct m
 		if (mm->context != 0)
 			free_sid(mm->context);
 		mm->context = alloc_sid();
+		printk("process %d has sid %ld\n", current->pid, mm->context);
 		if (mm == current->active_mm) {
 			mtsp(mm->context, 3);
 			mtctl(mm->context << 1,8);
Index: kernel/sched.c
===================================================================
RCS file: /home/cvs/parisc/linux/kernel/sched.c,v
retrieving revision 1.13
diff -u -p -r1.13 sched.c
--- sched.c	2001/02/02 03:37:17	1.13
+++ sched.c	2001/02/22 08:51:20
@@ -516,6 +516,8 @@ asmlinkage void schedule(void)
 need_resched_back:
 	prev = current;
 	this_cpu = prev->processor;
+	if (current->cpus_allowed == -1)
+		current->cpus_allowed = (1 << this_cpu);
 
 	if (in_interrupt())
 		goto scheduling_in_interrupt;
@@ -568,6 +570,7 @@ repeat_schedule:
 still_running_back:
 	list_for_each(tmp, &runqueue_head) {
 		p = list_entry(tmp, struct task_struct, run_list);
+//		printk("cpu %d: process %d (has_cpu: %d) on run queue\n", this_cpu, p->pid, p->has_cpu);
 		if (can_schedule(p, this_cpu)) {
 			int weight = goodness(p, this_cpu, prev->active_mm);
 			if (weight > c)
@@ -583,6 +586,7 @@ still_running_back:
 	 * switching to the next task, save this fact in
 	 * sched_data.
 	 */
+//	printk("cpu %d: switching to process %d\n", this_cpu, next->pid);
 	sched_data->curr = next;
 #ifdef CONFIG_SMP
  	next->has_cpu = 1;