[parisc-linux-cvs] linux grundler

Grant Grundler grundler@dsl2.external.hp.com
Sat, 8 Feb 2003 15:27:46 -0700


On Sat, Feb 08, 2003 at 03:22:42PM -0700, Grant Grundler wrote:
> Log message:
> 2.4.20-pa24 PA20 memory ordering
> Kudos to John David Anglin and Carlos O'Donnell for realizing
> PA 2.0 is not strongly ordered like PA1.x is.
> Read appendix G or PA-RISC 2.0 Architecture (Gerry Kane) for
> details on "Memory Ordering Model".
> 
> o use ldwc,co and stw,co to enforcing memory transaction ordering.
> (Could also use ",o" but ",co" can operate on cached data and
> that's all we need for spinlocks) Ie make sure all protected
> data is visible to other CPU before releasing spinlocks.
> 
> o recently released PA20 errata clarifies that using "ldcw,co"
> also relaxes the alignment requirement to the "native size"
> (ie 4 bytes for word operations). "ldcw" otherwise needs
> to 16 byte aligned data.
> 
> o moved disable_sr_hash() from SMP to common code path so all
> CPU's (including monarch) have this disabled.
> 
> o We don't strictly need ",o" for readl/writel on current machines.
> But I don't want to debug problems on N-class/L3000 and similar machines
> should we someday get a chance to work on those. Unfortunately,
> "as" (binutils) doesn't recognize "ldwa,o". It's been added
> upstream but we don't have a debian package with this change.
> I've uploaded palinux-20030208.tgz to dsl2 and will try to build
> a binutils.deb with this change as well.



Index: Makefile
===================================================================
RCS file: /var/cvs/linux/Makefile,v
retrieving revision 1.383
diff -u -p -r1.383 Makefile
--- Makefile	28 Jan 2003 07:49:07 -0000	1.383
+++ Makefile	8 Feb 2003 06:21:32 -0000
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 4
 SUBLEVEL = 20
-EXTRAVERSION = -pa23
+EXTRAVERSION = -pa24
 
 KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION)
 
Index: arch/parisc/Makefile
===================================================================
RCS file: /var/cvs/linux/arch/parisc/Makefile,v
retrieving revision 1.31
diff -u -p -r1.31 Makefile
--- arch/parisc/Makefile	16 Nov 2002 07:00:31 -0000	1.31
+++ arch/parisc/Makefile	8 Feb 2003 06:21:32 -0000
@@ -32,6 +32,10 @@ CROSS_COMPILE := hppa-linux-
 endif
 endif
 
+ifdef CONFIG_PA20
+CFLAGS += -mpa-risc-2-0
+endif
+
 OFFSET_H := $(TOPDIR)/include/asm-parisc/offset.h
 FINAL_LD=$(CROSS_COMPILE)ld --warn-common --warn-section-align 
 
Index: arch/parisc/kernel/cache.c
===================================================================
RCS file: /var/cvs/linux/arch/parisc/kernel/cache.c,v
retrieving revision 1.22
diff -u -p -r1.22 cache.c
--- arch/parisc/kernel/cache.c	13 Apr 2002 22:12:27 -0000	1.22
+++ arch/parisc/kernel/cache.c	8 Feb 2003 06:21:32 -0000
@@ -221,4 +221,6 @@ void disable_sr_hashing(void)
     }
 
     disable_sr_hashing_asm(srhash_type);
+
+    mb();
 }
Index: arch/parisc/kernel/processor.c
===================================================================
RCS file: /var/cvs/linux/arch/parisc/kernel/processor.c,v
retrieving revision 1.15
diff -u -p -r1.15 processor.c
--- arch/parisc/kernel/processor.c	17 Nov 2002 20:44:11 -0000	1.15
+++ arch/parisc/kernel/processor.c	8 Feb 2003 06:21:32 -0000
@@ -273,6 +273,7 @@ int __init init_per_cpu(int cpunum)
 {
 	int ret;
 	struct pdc_coproc_cfg coproc_cfg;
+	extern void disable_sr_hashing(void);	/* from cache.c */
 
 	ret = pdc_coproc_cfg(&coproc_cfg);
 
@@ -309,6 +310,8 @@ int __init init_per_cpu(int cpunum)
 
 	/* FUTURE: Enable Performance Monitor : ccr bit 0x20 */
 	init_percpu_prof(cpunum);
+
+        disable_sr_hashing();
 
 	return ret;
 }
Index: arch/parisc/kernel/smp.c
===================================================================
RCS file: /var/cvs/linux/arch/parisc/kernel/smp.c,v
retrieving revision 1.20
diff -u -p -r1.20 smp.c
--- arch/parisc/kernel/smp.c	7 Jul 2002 06:23:38 -0000	1.20
+++ arch/parisc/kernel/smp.c	8 Feb 2003 06:21:33 -0000
@@ -436,10 +436,6 @@ smp_cpu_init(int cpunum)
 	/* Set modes and Enable floating point coprocessor */
 	(void) init_per_cpu(cpunum);
 
-	disable_sr_hashing();
-
-	mb();
-
 	/* Well, support 2.4 linux scheme as well. */
 	if (test_and_set_bit(cpunum, (unsigned long *) (&cpu_online_map)))
 	{
Index: include/asm-parisc/io.h
===================================================================
RCS file: /var/cvs/linux/include/asm-parisc/io.h,v
retrieving revision 1.28
diff -u -p -r1.28 io.h
--- include/asm-parisc/io.h	5 Apr 2002 08:02:02 -0000	1.28
+++ include/asm-parisc/io.h	8 Feb 2003 06:21:34 -0000
@@ -51,7 +51,11 @@ extern __inline__ unsigned char __raw_re
 
 	__asm__ __volatile__(
 	"	rsm	2,%0\n"
+#ifdef CONFIG_PA20
+	"	ldb,o	0(%2),%1\n"
+#else
 	"	ldbx	0(%2),%1\n"
+#endif
 	"	mtsm	%0\n"
 	: "=&r" (flags), "=r" (ret) : "r" (addr) );
 
@@ -65,7 +69,11 @@ extern __inline__ unsigned short __raw_r
 
 	__asm__ __volatile__(
 	"	rsm	2,%0\n"
+#ifdef CONFIG_PA20
+	"	ldh,o	0(%2),%1\n"
+#else
 	"	ldhx	0(%2),%1\n"
+#endif
 	"	mtsm	%0\n"
 	: "=&r" (flags), "=r" (ret) : "r" (addr) );
 
@@ -77,7 +85,11 @@ extern __inline__ unsigned int __raw_rea
 	u32 ret;
 
 	__asm__ __volatile__(
+#ifdef CONFIG_PA20
+	"	ldwa,o	0(%1),%0\n"
+#else
 	"	ldwax	0(%1),%0\n"
+#endif
 	: "=r" (ret) : "r" (addr) );
 
 	return ret;
@@ -88,7 +100,7 @@ extern __inline__ unsigned long long __r
 	unsigned long long ret;
 #ifdef __LP64__
 	__asm__ __volatile__(
-	"	ldda	0(%1),%0\n"
+	"	ldda,o	0(%1),%0\n"
 	:  "=r" (ret) : "r" (addr) );
 #else
 	/* two reads may have side effects.. */
@@ -103,7 +115,11 @@ extern __inline__ void __raw_writeb(unsi
 	long flags;
 	__asm__ __volatile__(
 	"	rsm	2,%0\n"
+#ifdef CONFIG_PA20
+	"	stb,o	%1,0(%2)\n"
+#else
 	"	stbs	%1,0(%2)\n"
+#endif
 	"	mtsm	%0\n"
 	: "=&r" (flags) :  "r" (val), "r" (addr) );
 }
@@ -113,7 +129,11 @@ extern __inline__ void __raw_writew(unsi
 	long flags;
 	__asm__ __volatile__(
 	"	rsm	2,%0\n"
+#ifdef CONFIG_PA20
+	"	sth,o	%1,0(%2)\n"
+#else
 	"	sths	%1,0(%2)\n"
+#endif
 	"	mtsm	%0\n"
 	: "=&r" (flags) :  "r" (val), "r" (addr) );
 }
@@ -121,7 +141,11 @@ extern __inline__ void __raw_writew(unsi
 extern __inline__ void __raw_writel(unsigned int val, unsigned long addr)
 {
 	__asm__ __volatile__(
+#ifdef CONFIG_PA20
+	"	stwa,o	%0,0(%1)\n"
+#else
 	"	stwas	%0,0(%1)\n"
+#endif
 	: :  "r" (val), "r" (addr) );
 }
 
@@ -129,7 +153,7 @@ extern __inline__ void __raw_writeq(unsi
 {
 #ifdef __LP64__
 	__asm__ __volatile__(
-	"	stda	%0,0(%1)\n"
+	"	stda,o	%0,0(%1)\n"
 	: :  "r" (val), "r" (addr) );
 #else
 	/* two writes may have side effects.. */
Index: include/asm-parisc/spinlock_t.h
===================================================================
RCS file: /var/cvs/linux/include/asm-parisc/spinlock_t.h,v
retrieving revision 1.2
diff -u -p -r1.2 spinlock_t.h
--- include/asm-parisc/spinlock_t.h	16 Nov 2002 06:39:42 -0000	1.2
+++ include/asm-parisc/spinlock_t.h	8 Feb 2003 06:21:34 -0000
@@ -6,18 +6,40 @@
  * Note that PA-RISC has to use `1' to mean unlocked and `0' to mean locked
  * since it only has load-and-zero.
  */
+#ifdef CONFIG_PA20
+/* 
+> From: "Jim Hull" <jim.hull of hp.com>
+> Delivery-date: Wed, 29 Jan 2003 13:57:05 -0500
+> I've attached a summary of the change, but basically, for PA 2.0, as
+> long as the ",CO" (coherent operation) completer is specified, then the
+> 16-byte alignment requirement for ldcw and ldcd is relaxed, and instead
+> they only require "natural" alignment (4-byte for ldcw, 8-byte for
+> ldcd).
+*/
+
+#define __ldcw(a) ({ \
+	unsigned __ret; \
+	__asm__ __volatile__("ldcw,co 0(%1),%0" : "=r" (__ret) : "r" (a)); \
+	__ret; \
+})
+#else
 #define __ldcw(a) ({ \
 	unsigned __ret; \
 	__asm__ __volatile__("ldcw 0(%1),%0" : "=r" (__ret) : "r" (a)); \
 	__ret; \
 })
+#endif
 
 /*
  * Your basic SMP spinlocks, allowing only a single CPU anywhere
  */
 
 typedef struct {
+#ifdef CONFIG_PA20
+	volatile unsigned int lock;
+#else
 	volatile unsigned int __attribute__((aligned(16))) lock;
+#endif
 #ifdef CONFIG_DEBUG_SPINLOCK
 	volatile unsigned long owner_pc;
 	volatile unsigned long owner_cpu;
@@ -32,7 +54,26 @@ typedef struct {
 #define spin_lock_init(x)       do { (x)->lock = 1; } while(0)
 #define spin_is_locked(x)       ((x)->lock == 0)
 #define spin_trylock(x)		(__ldcw(&(x)->lock) != 0)
+#ifdef CONFIG_PA20
+/* PA2.0 is not strongly ordered. ldcw enforces ordering
+ * and we need to make sure ordering is enforced on the unlock too.
+ */
+#define spin_unlock(x) \
+		__asm__ __volatile__ ("stw,o  %%sp,0(%0)" : : "r" (x) : "memory" )
+
+/* 
+ * Alternative in case ,o doesn't work.
+ * Using "sync" messes with cache flush ordering.
+ *
+ * #define spin_unlock(x)	__asm__ __volatile__ ("sync\n" \
+ *				"\tstw	1,0(%0)" : : "r" (x) : "memory" )
+ */
+#else
+
+/* PA1.1 is strongly ordered. No issues here. */
 #define spin_unlock(x)		do { (x)->lock = 1; } while(0)
+
+#endif
 
 #define spin_unlock_wait(x)     do { barrier(); } while(((volatile spinlock_t *)(x))->lock == 0)