[parisc-linux] patch to unaligned.c

LaMont Jones lamont@b180.mmjgroup.com
Thu, 19 Dec 2002 16:09:40 -0700


The patch below fixes base register modificiation in the unaligned
load/store code.  It still doesn't do floating point load/stores, and
I'll work on those later.  In the meantime, this will deal with lots
of issues around base reg modification not happening.

Part of the unrolling is a precursor to adding floating point stuff.

lamont

Index: arch/parisc/kernel/unaligned.c
===================================================================
RCS file: /var/cvs/linux/arch/parisc/kernel/unaligned.c,v
retrieving revision 1.11
diff -u -r1.11 unaligned.c
--- arch/parisc/kernel/unaligned.c	24 Sep 2002 05:52:46 -0000	1.11
+++ arch/parisc/kernel/unaligned.c	19 Dec 2002 23:00:36 -0000
@@ -66,28 +66,28 @@
 #define OPCODE3_MASK	OPCODE3(0x3f,1)
 #define OPCODE4_MASK    OPCODE4(0x3f)
 
-/* skip LDB (index) */
+/* skip LDB - never unaligned (index) */
 #define OPCODE_LDH_I	OPCODE1(0x03,0,0x1)
 #define OPCODE_LDW_I	OPCODE1(0x03,0,0x2)
 #define OPCODE_LDD_I	OPCODE1(0x03,0,0x3)
 #define OPCODE_LDDA_I	OPCODE1(0x03,0,0x4)
-/* skip LDCD (index) */
+#define OPCODE_LDCD_I	OPCODE1(0x03,0,0x5)
 #define OPCODE_LDWA_I	OPCODE1(0x03,0,0x6)
-/* skip LDCW (index) */
-/* skip LDB (short) */
+#define OPCODE_LDCW_I	OPCODE1(0x03,0,0x7)
+/* skip LDB - never unaligned (short) */
 #define OPCODE_LDH_S	OPCODE1(0x03,1,0x1)
 #define OPCODE_LDW_S	OPCODE1(0x03,1,0x2)
 #define OPCODE_LDD_S	OPCODE1(0x03,1,0x3)
 #define OPCODE_LDDA_S	OPCODE1(0x03,1,0x4)
-/* skip LDCD (short) */
+#define OPCODE_LDCD_S	OPCODE1(0x03,1,0x5)
 #define OPCODE_LDWA_S	OPCODE1(0x03,1,0x6)
-/* skip LDCW (short) */
-/* skip STB */
+#define OPCODE_LDCW_S	OPCODE1(0x03,1,0x7)
+/* skip STB - never unaligned */
 #define OPCODE_STH	OPCODE1(0x03,1,0x9)
 #define OPCODE_STW	OPCODE1(0x03,1,0xa)
 #define OPCODE_STD	OPCODE1(0x03,1,0xb)
-/* skip STBY */
-/* skip STDBY */
+/* skip STBY - never unaligned */
+/* skip STDBY - never unaligned */
 #define OPCODE_STWA	OPCODE1(0x03,1,0xe)
 #define OPCODE_STDA	OPCODE1(0x03,1,0xf)
 
@@ -103,15 +103,107 @@
 
 #define OPCODE_LDH_L    OPCODE4(0x11)
 #define OPCODE_LDW_L    OPCODE4(0x12)
-#define OPCODE_LDW_L2   OPCODE4(0x13)
+#define OPCODE_LDWM     OPCODE4(0x13)
 #define OPCODE_STH_L    OPCODE4(0x19)
 #define OPCODE_STW_L    OPCODE4(0x1A)
-#define OPCODE_STW_L2   OPCODE4(0x1B)
+#define OPCODE_STWM     OPCODE4(0x1B)
+
+#define MAJOR_OP(i) (((i)>>26)&0x3f)
+#define R1(i) (((i)>>21)&0x1f)
+#define R2(i) (((i)>>16)&0x1f)
+#define R3(i) ((i)&0x1f)
+#define IM(i,n) (((i)>>1&((1<<(n-1))-1))|((i)&1?((0-1L)<<(n-1)):0))
+#define IM5_2(i) IM((i)>>16,5)
+#define IM5_3(i) IM((i),5)
+#define IM14(i) IM((i),14)
 
 int unaligned_enabled = 1;
 
 void die_if_kernel (char *str, struct pt_regs *regs, long err);
 
+static int emulate_ldh(struct pt_regs *regs, int toreg)
+{
+	unsigned long saddr = regs->ior;
+	unsigned long val = 0;
+
+	DPRINTF("load " RFMT ":" RFMT " to r%d for 2 bytes\n", 
+		regs->isr, regs->ior, toreg);
+
+	__asm__ __volatile__  (
+"       mtsp	%3, %%sr1\n"
+"	ldbs	0(%%sr1,%2), %%r20\n"
+"	ldbs	1(%%sr1,%2), %0\n"
+	"depw	%%r20, 23, 24, %0\n"
+	: "=r" (val)
+	: "0" (val), "r" (saddr), "r" (regs->isr)
+	: "r20" );
+
+	DPRINTF("val = 0x" RFMT "\n", val);
+
+	if (toreg)
+		regs->gr[toreg] = val;
+
+	return 0;
+}
+static int emulate_ldw(struct pt_regs *regs, int toreg)
+{
+	unsigned long saddr = regs->ior;
+	unsigned long val = 0;
+
+	DPRINTF("load " RFMT ":" RFMT " to r%d for 4 bytes\n", 
+		regs->isr, regs->ior, toreg);
+
+	__asm__ __volatile__  (
+"	zdep	%2,28,2,%%r19\n"		/* r19=(ofs&3)*8 */
+"	mtsp	%3, %%sr1\n"
+"	depw	%%r0,31,2,%2\n"
+"	ldw	0(%%sr1,%2),%0\n"
+"	ldw	4(%%sr1,%2),%%r20\n"
+"	subi	32,%%r19,%%r19\n"
+"	mtctl	%%r19,11\n"
+"	vshd	%0,%%r20,%0\n"
+	: "=r" (val)
+	: "0" (val), "r" (saddr), "r" (regs->isr)
+	: "r19", "r20" );
+
+	DPRINTF("val = 0x" RFMT "\n", val);
+
+	if (toreg)
+		regs->gr[toreg] = val;
+
+	return 0;
+}
+#ifdef __LP64__
+static int emulate_ldd(struct pt_regs *regs, int toreg)
+{
+	unsigned long saddr = regs->ior;
+	unsigned long val = 0;
+
+	DPRINTF("load " RFMT ":" RFMT " to r%d for 8 bytes\n", 
+		regs->isr, regs->ior, toreg);
+
+	__asm__ __volatile__  (
+"	zdepd	%2,60,3,%%r19\n"		/* r19=(ofs&7)*8 */
+"       mtsp	%3, %%sr1\n"
+"	depd	%%r0,63,3,%2\n"
+"	ldd	0(%%sr1,%2),%0\n"
+"	ldd	8(%%sr1,%2),%%r20\n"
+"	subi	64,%%r19,%%r19\n"
+"	mtsar	%%r19\n"
+"	shrpd	%0,%%r20,%sar,%0\n"
+	: "=r" (val)
+	: "0" (val), "r" (saddr), "r" (regs->isr)
+	: "r19", "r20" );
+
+	DPRINTF("val = 0x" RFMT "\n", val);
+
+	if (toreg)
+		regs->gr[toreg] = val;
+
+	return 0;
+}
+#endif
+#if 0
 static int emulate_load(struct pt_regs *regs, int len, int toreg)
 {
 	unsigned long saddr = regs->ior;
@@ -159,19 +251,144 @@
 
 	DPRINTF("val = 0x" RFMT "\n", val);
 
-	regs->gr[toreg] = val;
+	if (toreg)
+		regs->gr[toreg] = val;
 
 	return ret;
 }
+#endif
+
+static int emulate_sth(struct pt_regs *regs, int frreg)
+{
+	unsigned long val = regs->gr[frreg];
+	if (!frreg)
+		val = 0;
+
+	DPRINTF("store r%d (0x" RFMT ") to " RFMT ":" RFMT " for 2 bytes\n", frreg, 
+		regs->gr[frreg], regs->isr, regs->ior);
+
+	__asm__ __volatile__ (
+"       mtsp %2, %%sr1\n"
+"	extrw,u %0, 23, 8, %%r19\n"
+"	stb %0, 1(%%sr1, %1)\n"
+"	stb %%r19, 0(%%sr1, %1)\n"
+	:
+	: "r" (val), "r" (regs->ior), "r" (regs->isr)
+	: "r19" );
+
+	return 0;
+}
+static int emulate_stw(struct pt_regs *regs, int frreg)
+{
+	unsigned long val = regs->gr[frreg];
+	if (!frreg)
+		val = 0;
+
+	DPRINTF("store r%d (0x" RFMT ") to " RFMT ":" RFMT " for 4 bytes\n", frreg, 
+		regs->gr[frreg], regs->isr, regs->ior);
+
+
+	__asm__ __volatile__ (
+"       mtsp %2, %%sr1\n"
+#if 0
+"	extru	%0,  7, 8, %%r19\n"
+"	extru	%0, 15, 8, %%r20\n"
+"	extru	%0, 23, 8, %%r21\n"
+"	stb	%%r19, 0(%%sr1, %1)\n"
+"	stb	%%r20, 1(%%sr1, %1)\n"
+"	stb	%%r21, 2(%%sr1, %1)\n"
+"	stb	%0, 3(%%sr1, %1)\n"
+	:
+	: "r" (val), "r" (regs->ior), "r" (regs->isr)
+	: "r19", "r20", "r21" );
+#else
+"	zdep	%1, 28, 2, %%r19\n"
+"	dep	%%r0, 31, 2, %1\n"
+"	mtsar	%%r19\n"
+"	zvdepi	-2, 32, %%r19\n"
+"	ldw	0(%%sr1,%1),%%r20\n"
+"	ldw	4(%%sr1,%1),%%r21\n"
+"	vshd	%%r0, %0, %%r22\n"
+"	vshd	%0, %%r0, %%r1\n"
+"	and	%%r20, %%r19, %%r20\n"
+"	andcm	%%r21, %%r19, %%r21\n"
+"	or	%%r22, %%r20, %%r20\n"
+"	or	%%r1, %%r21, %%r21\n"
+"	stw	%%r20,0(%%sr1,%1)\n"
+"	stw	%%r21,4(%%sr1,%1)\n"
+	:
+	: "r" (val), "r" (regs->ior), "r" (regs->isr)
+	: "r19", "r20", "r21", "r22", "r1" );
+#endif
 
+	return 0;
+}
+#ifdef __LP64__
+static int emulate_std(struct pt_regs *regs, int frreg)
+{
+	unsigned long val = regs->gr[frreg];
+	if (!frreg)
+		val = 0;
+
+	DPRINTF("store r%d (0x" RFMT ") to " RFMT ":" RFMT " for 8 bytes\n", frreg, 
+		regs->gr[frreg], regs->isr, regs->ior);
+
+
+	__asm__ __volatile__ (
+"       mtsp %2, %%sr1\n"
+#if 0
+"	extrd	%0,  7, 8, %%r19\n"
+"	extrd	%0, 15, 8, %%r20\n"
+"	stb	%%r19, 0(%%sr1, %1)\n"
+"	stb	%%r20, 1(%%sr1, %1)\n"
+"	extrd	%0, 23, 8, %%r19\n"
+"	extrd	%0, 31, 8, %%r20\n"
+"	stb	%%r19, 2(%%sr1, %1)\n"
+"	stb	%%r20, 3(%%sr1, %1)\n"
+"	extrd	%0, 39, 8, %%r19\n"
+"	extrd	%0, 47, 8, %%r20\n"
+"	extrd	%0, 55, 8, %%r21\n"
+"	stb	%%r19, 4(%%sr1, %1)\n"
+"	stb	%%r20, 5(%%sr1, %1)\n"
+"	stb	%%r21, 6(%%sr1, %1)\n"
+"	stb	%0, 7(%%sr1, %1)\n"
+	: 
+	: "r" (val), "r" (regs->ior), "r" (regs->isr)
+	: "r19", "r20", "r21" );
+#else
+"	depd,z	%1, 60, 3, %%r19\n"
+"	depd	%%r0, 63, 3, %1\n"
+"	mtsar	%%r19\n"
+"	depdi,z	-2, 64, %%r19\n"
+"	ldd	0(%%sr1,%1),%%r20\n"
+"	ldd	8(%%sr1,%1),%%r21\n"
+"	shrpd	%%r0, %0, %sar, %%r22\n"
+"	shrpd	%0, %%r0, %sar, %%r1\n"
+"	and	%%r20, %%r19, %%r20\n"
+"	andcm	%%r21, %%r19, %%r21\n"
+"	or	%%r22, %%r20, %%r20\n"
+"	or	%%r1, %%r21, %%r21\n"
+"	std	%%r20,0(%%sr1,%1)\n"
+"	std	%%r21,8(%%sr1,%1)\n"
+	:
+	: "r" (val), "r" (regs->ior), "r" (regs->isr)
+	: "r19", "r20", "r21", "r22", "r1" );
+#endif
+
+	return 0;
+}
+#endif
+#if 0
 static int emulate_store(struct pt_regs *regs, int len, int frreg)
 {
 	int ret = 0;
 #ifdef __LP64__
-	unsigned long val = regs->gr[frreg] << (64 - (len << 3));
+        unsigned long val = regs->gr[frreg] << (64 - (len << 3));
 #else
-	unsigned long val = regs->gr[frreg] << (32 - (len << 3));
+        unsigned long val = regs->gr[frreg] << (32 - (len << 3));
 #endif
+	if (!frreg)
+		val = 0;
 
 	if (regs->isr != regs->sr[7])
 	{
@@ -220,12 +437,15 @@
 
 	return ret;
 }
+#endif
 
 
 void handle_unaligned(struct pt_regs *regs)
 {
 	unsigned long unaligned_count = 0;
 	unsigned long last_time = 0;
+	unsigned long newbase = regs->gr[R1(regs->iir)];
+	int modify = 0;
 	int ret = -1;
 	struct siginfo si;
 
@@ -284,83 +504,169 @@
 	if (!unaligned_enabled)
 		goto force_sigbus;
 
+	/* handle modification - OK, it's ugly, see the instruction manual */
+	switch (MAJOR_OP(regs->iir))
+	{
+	case 0x03:
+	case 0x09:
+	case 0x0b:
+		if (regs->iir&0x20)
+		{
+			modify = 1;
+			if (regs->iir&0x1000)		/* short loads */
+				if (regs->iir&0x200)
+					newbase += IM5_3(regs->iir);
+				else
+					newbase += IM5_2(regs->iir);
+			else if (regs->iir&0x2000)	/* scaled indexed */
+			{
+				int shift=0;
+				switch (regs->iir & OPCODE1_MASK)
+				{
+				case OPCODE_LDH_I:
+					shift= 1; break;
+				case OPCODE_LDW_I:
+					shift= 2; break;
+				case OPCODE_LDD_I:
+				case OPCODE_LDDA_I:
+					shift= 3; break;
+				}
+				newbase += regs->gr[R2(regs->iir)]<<shift;
+			} else				/* simple indexed */
+				newbase += regs->gr[R2(regs->iir)];
+		}
+		break;
+	case 0x13:
+	case 0x1b:
+		modify = 1;
+		newbase += IM14(regs->iir);
+		break;
+	case 0x14:
+	case 0x1c:
+		if (regs->iir&8)
+		{
+			modify = 1;
+			newbase += IM14(regs->iir&~0xe);
+		}
+		break;
+	case 0x16:
+	case 0x1e:
+		modify = 1;
+		newbase += IM14(regs->iir&6);
+		break;
+	case 0x17:
+	case 0x1f:
+		if (regs->iir&4)
+		{
+			modify = 1;
+			newbase += IM14(regs->iir&~4);
+		}
+		break;
+	}
+
+	if (regs->isr != regs->sr[7])
+	{
+		printk(KERN_CRIT "isr verification failed (isr: " RFMT ", sr7: " RFMT "\n",
+			regs->isr, regs->sr[7]);
+
+		/* don't kill him though, since he has appropriate access to the page, or we
+		 * would never have gotten here.
+		 */
+	}
+
 	/* TODO: make this cleaner... */
 	switch (regs->iir & OPCODE1_MASK)
 	{
 	case OPCODE_LDH_I:
 	case OPCODE_LDH_S:
-		ret = emulate_load(regs, 2, regs->iir & 0x1f);
+		ret = emulate_ldh(regs, R3(regs->iir));
 		break;
 
 	case OPCODE_LDW_I:
 	case OPCODE_LDWA_I:
 	case OPCODE_LDW_S:
 	case OPCODE_LDWA_S:
-		ret = emulate_load(regs, 4, regs->iir&0x1f);
-		break;
-
-	case OPCODE_LDD_I:
-	case OPCODE_LDDA_I:
-	case OPCODE_LDD_S:
-	case OPCODE_LDDA_S:
-		ret = emulate_load(regs, 8, regs->iir&0x1f);
+		ret = emulate_ldw(regs, R3(regs->iir));
 		break;
 
 	case OPCODE_STH:
-		ret = emulate_store(regs, 2, (regs->iir>>16)&0x1f);
+		ret = emulate_sth(regs, R2(regs->iir));
 		break;
 
 	case OPCODE_STW:
 	case OPCODE_STWA:
-		ret = emulate_store(regs, 4, (regs->iir>>16)&0x1f);
+		ret = emulate_stw(regs, R2(regs->iir));
+		break;
+
+#ifdef __LP64__
+	case OPCODE_LDD_I:
+	case OPCODE_LDDA_I:
+	case OPCODE_LDD_S:
+	case OPCODE_LDDA_S:
+		ret = emulate_ldd(regs, R3(regs->iir));
 		break;
 
 	case OPCODE_STD:
 	case OPCODE_STDA:
-		ret = emulate_store(regs, 8, (regs->iir>>16)&0x1f);
+		ret = emulate_std(regs, R2(regs->iir));
+		break;
+#endif
+
+	case OPCODE_LDCD_I:
+	case OPCODE_LDCW_I:
+	case OPCODE_LDCD_S:
+	case OPCODE_LDCW_S:
+		ret = -1;	/* "undefined", but lets kill them. */
 		break;
 	}
+#ifdef __LP64__
 	switch (regs->iir & OPCODE2_MASK)
 	{
 	case OPCODE_LDD_L:
 	case OPCODE_FLDD_L:
-		ret = emulate_load(regs, 8, (regs->iir>>16)&0x1f);
+		ret = emulate_ldd(regs, R2(regs->iir));
 		break;
 
 	case OPCODE_STD_L:
 	case OPCODE_FSTD_L:
-		ret = emulate_store(regs, 8, (regs->iir>>16)&0x1f);
+		ret = emulate_std(regs, R2(regs->iir));
 		break;
 	}
+#endif
 	switch (regs->iir & OPCODE3_MASK)
 	{
 	case OPCODE_LDW_M:
 	case OPCODE_FLDW_L:
-		ret = emulate_load(regs, 4, (regs->iir>>16)&0x1f);
+		ret = emulate_ldw(regs, R2(regs->iir));
 		break;
 
 	case OPCODE_FSTW_L:
 	case OPCODE_STW_M:
-		ret = emulate_store(regs, 4, (regs->iir>>16)&0x1f);
+		ret = emulate_stw(regs, R2(regs->iir));
 		break;
 	}
 	switch (regs->iir & OPCODE4_MASK)
 	{
 	case OPCODE_LDH_L:
-		ret = emulate_load(regs, 2, (regs->iir>>16)&0x1f);
+		ret = emulate_ldh(regs, R2(regs->iir));
 		break;
 	case OPCODE_LDW_L:
-	case OPCODE_LDW_L2:
-		ret = emulate_load(regs, 4, (regs->iir>>16)&0x1f);
+	case OPCODE_LDWM:
+		ret = emulate_ldw(regs, R2(regs->iir));
 		break;
 	case OPCODE_STH_L:
-		ret = emulate_store(regs, 2, (regs->iir>>16)&0x1f);
+		ret = emulate_sth(regs, R2(regs->iir));
 		break;
 	case OPCODE_STW_L:
-	case OPCODE_STW_L2:
-		ret = emulate_store(regs, 4, (regs->iir>>16)&0x1f);
+	case OPCODE_STWM:
+		ret = emulate_stw(regs, R2(regs->iir));
 		break;
 	}
+	/* XXX LJ - need to handle float load/store */
+
+	if (modify)
+		regs->gr[R1(regs->iir)] = newbase;
+
 
 	if (ret < 0)
 		printk(KERN_CRIT "Not-handled unaligned insn 0x%08lx\n", regs->iir);
@@ -424,9 +730,9 @@
 			align_mask = 1UL;
 			break;
 		case OPCODE_LDW_L:
-		case OPCODE_LDW_L2:
+		case OPCODE_LDWM:
 		case OPCODE_STW_L:
-		case OPCODE_STW_L2:
+		case OPCODE_STWM:
 			align_mask = 3UL;
 			break;
 		}