[parisc-linux-cvs] linux grundler
Grant Grundler
grundler@dsl2.external.hp.com
Sat, 8 Feb 2003 15:27:46 -0700
On Sat, Feb 08, 2003 at 03:22:42PM -0700, Grant Grundler wrote:
> Log message:
> 2.4.20-pa24 PA20 memory ordering
> Kudos to John David Anglin and Carlos O'Donnell for realizing
> PA 2.0 is not strongly ordered like PA1.x is.
> Read appendix G or PA-RISC 2.0 Architecture (Gerry Kane) for
> details on "Memory Ordering Model".
>
> o use ldwc,co and stw,co to enforcing memory transaction ordering.
> (Could also use ",o" but ",co" can operate on cached data and
> that's all we need for spinlocks) Ie make sure all protected
> data is visible to other CPU before releasing spinlocks.
>
> o recently released PA20 errata clarifies that using "ldcw,co"
> also relaxes the alignment requirement to the "native size"
> (ie 4 bytes for word operations). "ldcw" otherwise needs
> to 16 byte aligned data.
>
> o moved disable_sr_hash() from SMP to common code path so all
> CPU's (including monarch) have this disabled.
>
> o We don't strictly need ",o" for readl/writel on current machines.
> But I don't want to debug problems on N-class/L3000 and similar machines
> should we someday get a chance to work on those. Unfortunately,
> "as" (binutils) doesn't recognize "ldwa,o". It's been added
> upstream but we don't have a debian package with this change.
> I've uploaded palinux-20030208.tgz to dsl2 and will try to build
> a binutils.deb with this change as well.
Index: Makefile
===================================================================
RCS file: /var/cvs/linux/Makefile,v
retrieving revision 1.383
diff -u -p -r1.383 Makefile
--- Makefile 28 Jan 2003 07:49:07 -0000 1.383
+++ Makefile 8 Feb 2003 06:21:32 -0000
@@ -1,7 +1,7 @@
VERSION = 2
PATCHLEVEL = 4
SUBLEVEL = 20
-EXTRAVERSION = -pa23
+EXTRAVERSION = -pa24
KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION)
Index: arch/parisc/Makefile
===================================================================
RCS file: /var/cvs/linux/arch/parisc/Makefile,v
retrieving revision 1.31
diff -u -p -r1.31 Makefile
--- arch/parisc/Makefile 16 Nov 2002 07:00:31 -0000 1.31
+++ arch/parisc/Makefile 8 Feb 2003 06:21:32 -0000
@@ -32,6 +32,10 @@ CROSS_COMPILE := hppa-linux-
endif
endif
+ifdef CONFIG_PA20
+CFLAGS += -mpa-risc-2-0
+endif
+
OFFSET_H := $(TOPDIR)/include/asm-parisc/offset.h
FINAL_LD=$(CROSS_COMPILE)ld --warn-common --warn-section-align
Index: arch/parisc/kernel/cache.c
===================================================================
RCS file: /var/cvs/linux/arch/parisc/kernel/cache.c,v
retrieving revision 1.22
diff -u -p -r1.22 cache.c
--- arch/parisc/kernel/cache.c 13 Apr 2002 22:12:27 -0000 1.22
+++ arch/parisc/kernel/cache.c 8 Feb 2003 06:21:32 -0000
@@ -221,4 +221,6 @@ void disable_sr_hashing(void)
}
disable_sr_hashing_asm(srhash_type);
+
+ mb();
}
Index: arch/parisc/kernel/processor.c
===================================================================
RCS file: /var/cvs/linux/arch/parisc/kernel/processor.c,v
retrieving revision 1.15
diff -u -p -r1.15 processor.c
--- arch/parisc/kernel/processor.c 17 Nov 2002 20:44:11 -0000 1.15
+++ arch/parisc/kernel/processor.c 8 Feb 2003 06:21:32 -0000
@@ -273,6 +273,7 @@ int __init init_per_cpu(int cpunum)
{
int ret;
struct pdc_coproc_cfg coproc_cfg;
+ extern void disable_sr_hashing(void); /* from cache.c */
ret = pdc_coproc_cfg(&coproc_cfg);
@@ -309,6 +310,8 @@ int __init init_per_cpu(int cpunum)
/* FUTURE: Enable Performance Monitor : ccr bit 0x20 */
init_percpu_prof(cpunum);
+
+ disable_sr_hashing();
return ret;
}
Index: arch/parisc/kernel/smp.c
===================================================================
RCS file: /var/cvs/linux/arch/parisc/kernel/smp.c,v
retrieving revision 1.20
diff -u -p -r1.20 smp.c
--- arch/parisc/kernel/smp.c 7 Jul 2002 06:23:38 -0000 1.20
+++ arch/parisc/kernel/smp.c 8 Feb 2003 06:21:33 -0000
@@ -436,10 +436,6 @@ smp_cpu_init(int cpunum)
/* Set modes and Enable floating point coprocessor */
(void) init_per_cpu(cpunum);
- disable_sr_hashing();
-
- mb();
-
/* Well, support 2.4 linux scheme as well. */
if (test_and_set_bit(cpunum, (unsigned long *) (&cpu_online_map)))
{
Index: include/asm-parisc/io.h
===================================================================
RCS file: /var/cvs/linux/include/asm-parisc/io.h,v
retrieving revision 1.28
diff -u -p -r1.28 io.h
--- include/asm-parisc/io.h 5 Apr 2002 08:02:02 -0000 1.28
+++ include/asm-parisc/io.h 8 Feb 2003 06:21:34 -0000
@@ -51,7 +51,11 @@ extern __inline__ unsigned char __raw_re
__asm__ __volatile__(
" rsm 2,%0\n"
+#ifdef CONFIG_PA20
+ " ldb,o 0(%2),%1\n"
+#else
" ldbx 0(%2),%1\n"
+#endif
" mtsm %0\n"
: "=&r" (flags), "=r" (ret) : "r" (addr) );
@@ -65,7 +69,11 @@ extern __inline__ unsigned short __raw_r
__asm__ __volatile__(
" rsm 2,%0\n"
+#ifdef CONFIG_PA20
+ " ldh,o 0(%2),%1\n"
+#else
" ldhx 0(%2),%1\n"
+#endif
" mtsm %0\n"
: "=&r" (flags), "=r" (ret) : "r" (addr) );
@@ -77,7 +85,11 @@ extern __inline__ unsigned int __raw_rea
u32 ret;
__asm__ __volatile__(
+#ifdef CONFIG_PA20
+ " ldwa,o 0(%1),%0\n"
+#else
" ldwax 0(%1),%0\n"
+#endif
: "=r" (ret) : "r" (addr) );
return ret;
@@ -88,7 +100,7 @@ extern __inline__ unsigned long long __r
unsigned long long ret;
#ifdef __LP64__
__asm__ __volatile__(
- " ldda 0(%1),%0\n"
+ " ldda,o 0(%1),%0\n"
: "=r" (ret) : "r" (addr) );
#else
/* two reads may have side effects.. */
@@ -103,7 +115,11 @@ extern __inline__ void __raw_writeb(unsi
long flags;
__asm__ __volatile__(
" rsm 2,%0\n"
+#ifdef CONFIG_PA20
+ " stb,o %1,0(%2)\n"
+#else
" stbs %1,0(%2)\n"
+#endif
" mtsm %0\n"
: "=&r" (flags) : "r" (val), "r" (addr) );
}
@@ -113,7 +129,11 @@ extern __inline__ void __raw_writew(unsi
long flags;
__asm__ __volatile__(
" rsm 2,%0\n"
+#ifdef CONFIG_PA20
+ " sth,o %1,0(%2)\n"
+#else
" sths %1,0(%2)\n"
+#endif
" mtsm %0\n"
: "=&r" (flags) : "r" (val), "r" (addr) );
}
@@ -121,7 +141,11 @@ extern __inline__ void __raw_writew(unsi
extern __inline__ void __raw_writel(unsigned int val, unsigned long addr)
{
__asm__ __volatile__(
+#ifdef CONFIG_PA20
+ " stwa,o %0,0(%1)\n"
+#else
" stwas %0,0(%1)\n"
+#endif
: : "r" (val), "r" (addr) );
}
@@ -129,7 +153,7 @@ extern __inline__ void __raw_writeq(unsi
{
#ifdef __LP64__
__asm__ __volatile__(
- " stda %0,0(%1)\n"
+ " stda,o %0,0(%1)\n"
: : "r" (val), "r" (addr) );
#else
/* two writes may have side effects.. */
Index: include/asm-parisc/spinlock_t.h
===================================================================
RCS file: /var/cvs/linux/include/asm-parisc/spinlock_t.h,v
retrieving revision 1.2
diff -u -p -r1.2 spinlock_t.h
--- include/asm-parisc/spinlock_t.h 16 Nov 2002 06:39:42 -0000 1.2
+++ include/asm-parisc/spinlock_t.h 8 Feb 2003 06:21:34 -0000
@@ -6,18 +6,40 @@
* Note that PA-RISC has to use `1' to mean unlocked and `0' to mean locked
* since it only has load-and-zero.
*/
+#ifdef CONFIG_PA20
+/*
+> From: "Jim Hull" <jim.hull of hp.com>
+> Delivery-date: Wed, 29 Jan 2003 13:57:05 -0500
+> I've attached a summary of the change, but basically, for PA 2.0, as
+> long as the ",CO" (coherent operation) completer is specified, then the
+> 16-byte alignment requirement for ldcw and ldcd is relaxed, and instead
+> they only require "natural" alignment (4-byte for ldcw, 8-byte for
+> ldcd).
+*/
+
+#define __ldcw(a) ({ \
+ unsigned __ret; \
+ __asm__ __volatile__("ldcw,co 0(%1),%0" : "=r" (__ret) : "r" (a)); \
+ __ret; \
+})
+#else
#define __ldcw(a) ({ \
unsigned __ret; \
__asm__ __volatile__("ldcw 0(%1),%0" : "=r" (__ret) : "r" (a)); \
__ret; \
})
+#endif
/*
* Your basic SMP spinlocks, allowing only a single CPU anywhere
*/
typedef struct {
+#ifdef CONFIG_PA20
+ volatile unsigned int lock;
+#else
volatile unsigned int __attribute__((aligned(16))) lock;
+#endif
#ifdef CONFIG_DEBUG_SPINLOCK
volatile unsigned long owner_pc;
volatile unsigned long owner_cpu;
@@ -32,7 +54,26 @@ typedef struct {
#define spin_lock_init(x) do { (x)->lock = 1; } while(0)
#define spin_is_locked(x) ((x)->lock == 0)
#define spin_trylock(x) (__ldcw(&(x)->lock) != 0)
+#ifdef CONFIG_PA20
+/* PA2.0 is not strongly ordered. ldcw enforces ordering
+ * and we need to make sure ordering is enforced on the unlock too.
+ */
+#define spin_unlock(x) \
+ __asm__ __volatile__ ("stw,o %%sp,0(%0)" : : "r" (x) : "memory" )
+
+/*
+ * Alternative in case ,o doesn't work.
+ * Using "sync" messes with cache flush ordering.
+ *
+ * #define spin_unlock(x) __asm__ __volatile__ ("sync\n" \
+ * "\tstw 1,0(%0)" : : "r" (x) : "memory" )
+ */
+#else
+
+/* PA1.1 is strongly ordered. No issues here. */
#define spin_unlock(x) do { (x)->lock = 1; } while(0)
+
+#endif
#define spin_unlock_wait(x) do { barrier(); } while(((volatile spinlock_t *)(x))->lock == 0)