[parisc-linux] patch to unaligned.c
LaMont Jones
lamont@b180.mmjgroup.com
Thu, 19 Dec 2002 16:09:40 -0700
The patch below fixes base register modificiation in the unaligned
load/store code. It still doesn't do floating point load/stores, and
I'll work on those later. In the meantime, this will deal with lots
of issues around base reg modification not happening.
Part of the unrolling is a precursor to adding floating point stuff.
lamont
Index: arch/parisc/kernel/unaligned.c
===================================================================
RCS file: /var/cvs/linux/arch/parisc/kernel/unaligned.c,v
retrieving revision 1.11
diff -u -r1.11 unaligned.c
--- arch/parisc/kernel/unaligned.c 24 Sep 2002 05:52:46 -0000 1.11
+++ arch/parisc/kernel/unaligned.c 19 Dec 2002 23:00:36 -0000
@@ -66,28 +66,28 @@
#define OPCODE3_MASK OPCODE3(0x3f,1)
#define OPCODE4_MASK OPCODE4(0x3f)
-/* skip LDB (index) */
+/* skip LDB - never unaligned (index) */
#define OPCODE_LDH_I OPCODE1(0x03,0,0x1)
#define OPCODE_LDW_I OPCODE1(0x03,0,0x2)
#define OPCODE_LDD_I OPCODE1(0x03,0,0x3)
#define OPCODE_LDDA_I OPCODE1(0x03,0,0x4)
-/* skip LDCD (index) */
+#define OPCODE_LDCD_I OPCODE1(0x03,0,0x5)
#define OPCODE_LDWA_I OPCODE1(0x03,0,0x6)
-/* skip LDCW (index) */
-/* skip LDB (short) */
+#define OPCODE_LDCW_I OPCODE1(0x03,0,0x7)
+/* skip LDB - never unaligned (short) */
#define OPCODE_LDH_S OPCODE1(0x03,1,0x1)
#define OPCODE_LDW_S OPCODE1(0x03,1,0x2)
#define OPCODE_LDD_S OPCODE1(0x03,1,0x3)
#define OPCODE_LDDA_S OPCODE1(0x03,1,0x4)
-/* skip LDCD (short) */
+#define OPCODE_LDCD_S OPCODE1(0x03,1,0x5)
#define OPCODE_LDWA_S OPCODE1(0x03,1,0x6)
-/* skip LDCW (short) */
-/* skip STB */
+#define OPCODE_LDCW_S OPCODE1(0x03,1,0x7)
+/* skip STB - never unaligned */
#define OPCODE_STH OPCODE1(0x03,1,0x9)
#define OPCODE_STW OPCODE1(0x03,1,0xa)
#define OPCODE_STD OPCODE1(0x03,1,0xb)
-/* skip STBY */
-/* skip STDBY */
+/* skip STBY - never unaligned */
+/* skip STDBY - never unaligned */
#define OPCODE_STWA OPCODE1(0x03,1,0xe)
#define OPCODE_STDA OPCODE1(0x03,1,0xf)
@@ -103,15 +103,107 @@
#define OPCODE_LDH_L OPCODE4(0x11)
#define OPCODE_LDW_L OPCODE4(0x12)
-#define OPCODE_LDW_L2 OPCODE4(0x13)
+#define OPCODE_LDWM OPCODE4(0x13)
#define OPCODE_STH_L OPCODE4(0x19)
#define OPCODE_STW_L OPCODE4(0x1A)
-#define OPCODE_STW_L2 OPCODE4(0x1B)
+#define OPCODE_STWM OPCODE4(0x1B)
+
+#define MAJOR_OP(i) (((i)>>26)&0x3f)
+#define R1(i) (((i)>>21)&0x1f)
+#define R2(i) (((i)>>16)&0x1f)
+#define R3(i) ((i)&0x1f)
+#define IM(i,n) (((i)>>1&((1<<(n-1))-1))|((i)&1?((0-1L)<<(n-1)):0))
+#define IM5_2(i) IM((i)>>16,5)
+#define IM5_3(i) IM((i),5)
+#define IM14(i) IM((i),14)
int unaligned_enabled = 1;
void die_if_kernel (char *str, struct pt_regs *regs, long err);
+static int emulate_ldh(struct pt_regs *regs, int toreg)
+{
+ unsigned long saddr = regs->ior;
+ unsigned long val = 0;
+
+ DPRINTF("load " RFMT ":" RFMT " to r%d for 2 bytes\n",
+ regs->isr, regs->ior, toreg);
+
+ __asm__ __volatile__ (
+" mtsp %3, %%sr1\n"
+" ldbs 0(%%sr1,%2), %%r20\n"
+" ldbs 1(%%sr1,%2), %0\n"
+ "depw %%r20, 23, 24, %0\n"
+ : "=r" (val)
+ : "0" (val), "r" (saddr), "r" (regs->isr)
+ : "r20" );
+
+ DPRINTF("val = 0x" RFMT "\n", val);
+
+ if (toreg)
+ regs->gr[toreg] = val;
+
+ return 0;
+}
+static int emulate_ldw(struct pt_regs *regs, int toreg)
+{
+ unsigned long saddr = regs->ior;
+ unsigned long val = 0;
+
+ DPRINTF("load " RFMT ":" RFMT " to r%d for 4 bytes\n",
+ regs->isr, regs->ior, toreg);
+
+ __asm__ __volatile__ (
+" zdep %2,28,2,%%r19\n" /* r19=(ofs&3)*8 */
+" mtsp %3, %%sr1\n"
+" depw %%r0,31,2,%2\n"
+" ldw 0(%%sr1,%2),%0\n"
+" ldw 4(%%sr1,%2),%%r20\n"
+" subi 32,%%r19,%%r19\n"
+" mtctl %%r19,11\n"
+" vshd %0,%%r20,%0\n"
+ : "=r" (val)
+ : "0" (val), "r" (saddr), "r" (regs->isr)
+ : "r19", "r20" );
+
+ DPRINTF("val = 0x" RFMT "\n", val);
+
+ if (toreg)
+ regs->gr[toreg] = val;
+
+ return 0;
+}
+#ifdef __LP64__
+static int emulate_ldd(struct pt_regs *regs, int toreg)
+{
+ unsigned long saddr = regs->ior;
+ unsigned long val = 0;
+
+ DPRINTF("load " RFMT ":" RFMT " to r%d for 8 bytes\n",
+ regs->isr, regs->ior, toreg);
+
+ __asm__ __volatile__ (
+" zdepd %2,60,3,%%r19\n" /* r19=(ofs&7)*8 */
+" mtsp %3, %%sr1\n"
+" depd %%r0,63,3,%2\n"
+" ldd 0(%%sr1,%2),%0\n"
+" ldd 8(%%sr1,%2),%%r20\n"
+" subi 64,%%r19,%%r19\n"
+" mtsar %%r19\n"
+" shrpd %0,%%r20,%sar,%0\n"
+ : "=r" (val)
+ : "0" (val), "r" (saddr), "r" (regs->isr)
+ : "r19", "r20" );
+
+ DPRINTF("val = 0x" RFMT "\n", val);
+
+ if (toreg)
+ regs->gr[toreg] = val;
+
+ return 0;
+}
+#endif
+#if 0
static int emulate_load(struct pt_regs *regs, int len, int toreg)
{
unsigned long saddr = regs->ior;
@@ -159,19 +251,144 @@
DPRINTF("val = 0x" RFMT "\n", val);
- regs->gr[toreg] = val;
+ if (toreg)
+ regs->gr[toreg] = val;
return ret;
}
+#endif
+
+static int emulate_sth(struct pt_regs *regs, int frreg)
+{
+ unsigned long val = regs->gr[frreg];
+ if (!frreg)
+ val = 0;
+
+ DPRINTF("store r%d (0x" RFMT ") to " RFMT ":" RFMT " for 2 bytes\n", frreg,
+ regs->gr[frreg], regs->isr, regs->ior);
+
+ __asm__ __volatile__ (
+" mtsp %2, %%sr1\n"
+" extrw,u %0, 23, 8, %%r19\n"
+" stb %0, 1(%%sr1, %1)\n"
+" stb %%r19, 0(%%sr1, %1)\n"
+ :
+ : "r" (val), "r" (regs->ior), "r" (regs->isr)
+ : "r19" );
+
+ return 0;
+}
+static int emulate_stw(struct pt_regs *regs, int frreg)
+{
+ unsigned long val = regs->gr[frreg];
+ if (!frreg)
+ val = 0;
+
+ DPRINTF("store r%d (0x" RFMT ") to " RFMT ":" RFMT " for 4 bytes\n", frreg,
+ regs->gr[frreg], regs->isr, regs->ior);
+
+
+ __asm__ __volatile__ (
+" mtsp %2, %%sr1\n"
+#if 0
+" extru %0, 7, 8, %%r19\n"
+" extru %0, 15, 8, %%r20\n"
+" extru %0, 23, 8, %%r21\n"
+" stb %%r19, 0(%%sr1, %1)\n"
+" stb %%r20, 1(%%sr1, %1)\n"
+" stb %%r21, 2(%%sr1, %1)\n"
+" stb %0, 3(%%sr1, %1)\n"
+ :
+ : "r" (val), "r" (regs->ior), "r" (regs->isr)
+ : "r19", "r20", "r21" );
+#else
+" zdep %1, 28, 2, %%r19\n"
+" dep %%r0, 31, 2, %1\n"
+" mtsar %%r19\n"
+" zvdepi -2, 32, %%r19\n"
+" ldw 0(%%sr1,%1),%%r20\n"
+" ldw 4(%%sr1,%1),%%r21\n"
+" vshd %%r0, %0, %%r22\n"
+" vshd %0, %%r0, %%r1\n"
+" and %%r20, %%r19, %%r20\n"
+" andcm %%r21, %%r19, %%r21\n"
+" or %%r22, %%r20, %%r20\n"
+" or %%r1, %%r21, %%r21\n"
+" stw %%r20,0(%%sr1,%1)\n"
+" stw %%r21,4(%%sr1,%1)\n"
+ :
+ : "r" (val), "r" (regs->ior), "r" (regs->isr)
+ : "r19", "r20", "r21", "r22", "r1" );
+#endif
+ return 0;
+}
+#ifdef __LP64__
+static int emulate_std(struct pt_regs *regs, int frreg)
+{
+ unsigned long val = regs->gr[frreg];
+ if (!frreg)
+ val = 0;
+
+ DPRINTF("store r%d (0x" RFMT ") to " RFMT ":" RFMT " for 8 bytes\n", frreg,
+ regs->gr[frreg], regs->isr, regs->ior);
+
+
+ __asm__ __volatile__ (
+" mtsp %2, %%sr1\n"
+#if 0
+" extrd %0, 7, 8, %%r19\n"
+" extrd %0, 15, 8, %%r20\n"
+" stb %%r19, 0(%%sr1, %1)\n"
+" stb %%r20, 1(%%sr1, %1)\n"
+" extrd %0, 23, 8, %%r19\n"
+" extrd %0, 31, 8, %%r20\n"
+" stb %%r19, 2(%%sr1, %1)\n"
+" stb %%r20, 3(%%sr1, %1)\n"
+" extrd %0, 39, 8, %%r19\n"
+" extrd %0, 47, 8, %%r20\n"
+" extrd %0, 55, 8, %%r21\n"
+" stb %%r19, 4(%%sr1, %1)\n"
+" stb %%r20, 5(%%sr1, %1)\n"
+" stb %%r21, 6(%%sr1, %1)\n"
+" stb %0, 7(%%sr1, %1)\n"
+ :
+ : "r" (val), "r" (regs->ior), "r" (regs->isr)
+ : "r19", "r20", "r21" );
+#else
+" depd,z %1, 60, 3, %%r19\n"
+" depd %%r0, 63, 3, %1\n"
+" mtsar %%r19\n"
+" depdi,z -2, 64, %%r19\n"
+" ldd 0(%%sr1,%1),%%r20\n"
+" ldd 8(%%sr1,%1),%%r21\n"
+" shrpd %%r0, %0, %sar, %%r22\n"
+" shrpd %0, %%r0, %sar, %%r1\n"
+" and %%r20, %%r19, %%r20\n"
+" andcm %%r21, %%r19, %%r21\n"
+" or %%r22, %%r20, %%r20\n"
+" or %%r1, %%r21, %%r21\n"
+" std %%r20,0(%%sr1,%1)\n"
+" std %%r21,8(%%sr1,%1)\n"
+ :
+ : "r" (val), "r" (regs->ior), "r" (regs->isr)
+ : "r19", "r20", "r21", "r22", "r1" );
+#endif
+
+ return 0;
+}
+#endif
+#if 0
static int emulate_store(struct pt_regs *regs, int len, int frreg)
{
int ret = 0;
#ifdef __LP64__
- unsigned long val = regs->gr[frreg] << (64 - (len << 3));
+ unsigned long val = regs->gr[frreg] << (64 - (len << 3));
#else
- unsigned long val = regs->gr[frreg] << (32 - (len << 3));
+ unsigned long val = regs->gr[frreg] << (32 - (len << 3));
#endif
+ if (!frreg)
+ val = 0;
if (regs->isr != regs->sr[7])
{
@@ -220,12 +437,15 @@
return ret;
}
+#endif
void handle_unaligned(struct pt_regs *regs)
{
unsigned long unaligned_count = 0;
unsigned long last_time = 0;
+ unsigned long newbase = regs->gr[R1(regs->iir)];
+ int modify = 0;
int ret = -1;
struct siginfo si;
@@ -284,83 +504,169 @@
if (!unaligned_enabled)
goto force_sigbus;
+ /* handle modification - OK, it's ugly, see the instruction manual */
+ switch (MAJOR_OP(regs->iir))
+ {
+ case 0x03:
+ case 0x09:
+ case 0x0b:
+ if (regs->iir&0x20)
+ {
+ modify = 1;
+ if (regs->iir&0x1000) /* short loads */
+ if (regs->iir&0x200)
+ newbase += IM5_3(regs->iir);
+ else
+ newbase += IM5_2(regs->iir);
+ else if (regs->iir&0x2000) /* scaled indexed */
+ {
+ int shift=0;
+ switch (regs->iir & OPCODE1_MASK)
+ {
+ case OPCODE_LDH_I:
+ shift= 1; break;
+ case OPCODE_LDW_I:
+ shift= 2; break;
+ case OPCODE_LDD_I:
+ case OPCODE_LDDA_I:
+ shift= 3; break;
+ }
+ newbase += regs->gr[R2(regs->iir)]<<shift;
+ } else /* simple indexed */
+ newbase += regs->gr[R2(regs->iir)];
+ }
+ break;
+ case 0x13:
+ case 0x1b:
+ modify = 1;
+ newbase += IM14(regs->iir);
+ break;
+ case 0x14:
+ case 0x1c:
+ if (regs->iir&8)
+ {
+ modify = 1;
+ newbase += IM14(regs->iir&~0xe);
+ }
+ break;
+ case 0x16:
+ case 0x1e:
+ modify = 1;
+ newbase += IM14(regs->iir&6);
+ break;
+ case 0x17:
+ case 0x1f:
+ if (regs->iir&4)
+ {
+ modify = 1;
+ newbase += IM14(regs->iir&~4);
+ }
+ break;
+ }
+
+ if (regs->isr != regs->sr[7])
+ {
+ printk(KERN_CRIT "isr verification failed (isr: " RFMT ", sr7: " RFMT "\n",
+ regs->isr, regs->sr[7]);
+
+ /* don't kill him though, since he has appropriate access to the page, or we
+ * would never have gotten here.
+ */
+ }
+
/* TODO: make this cleaner... */
switch (regs->iir & OPCODE1_MASK)
{
case OPCODE_LDH_I:
case OPCODE_LDH_S:
- ret = emulate_load(regs, 2, regs->iir & 0x1f);
+ ret = emulate_ldh(regs, R3(regs->iir));
break;
case OPCODE_LDW_I:
case OPCODE_LDWA_I:
case OPCODE_LDW_S:
case OPCODE_LDWA_S:
- ret = emulate_load(regs, 4, regs->iir&0x1f);
- break;
-
- case OPCODE_LDD_I:
- case OPCODE_LDDA_I:
- case OPCODE_LDD_S:
- case OPCODE_LDDA_S:
- ret = emulate_load(regs, 8, regs->iir&0x1f);
+ ret = emulate_ldw(regs, R3(regs->iir));
break;
case OPCODE_STH:
- ret = emulate_store(regs, 2, (regs->iir>>16)&0x1f);
+ ret = emulate_sth(regs, R2(regs->iir));
break;
case OPCODE_STW:
case OPCODE_STWA:
- ret = emulate_store(regs, 4, (regs->iir>>16)&0x1f);
+ ret = emulate_stw(regs, R2(regs->iir));
+ break;
+
+#ifdef __LP64__
+ case OPCODE_LDD_I:
+ case OPCODE_LDDA_I:
+ case OPCODE_LDD_S:
+ case OPCODE_LDDA_S:
+ ret = emulate_ldd(regs, R3(regs->iir));
break;
case OPCODE_STD:
case OPCODE_STDA:
- ret = emulate_store(regs, 8, (regs->iir>>16)&0x1f);
+ ret = emulate_std(regs, R2(regs->iir));
+ break;
+#endif
+
+ case OPCODE_LDCD_I:
+ case OPCODE_LDCW_I:
+ case OPCODE_LDCD_S:
+ case OPCODE_LDCW_S:
+ ret = -1; /* "undefined", but lets kill them. */
break;
}
+#ifdef __LP64__
switch (regs->iir & OPCODE2_MASK)
{
case OPCODE_LDD_L:
case OPCODE_FLDD_L:
- ret = emulate_load(regs, 8, (regs->iir>>16)&0x1f);
+ ret = emulate_ldd(regs, R2(regs->iir));
break;
case OPCODE_STD_L:
case OPCODE_FSTD_L:
- ret = emulate_store(regs, 8, (regs->iir>>16)&0x1f);
+ ret = emulate_std(regs, R2(regs->iir));
break;
}
+#endif
switch (regs->iir & OPCODE3_MASK)
{
case OPCODE_LDW_M:
case OPCODE_FLDW_L:
- ret = emulate_load(regs, 4, (regs->iir>>16)&0x1f);
+ ret = emulate_ldw(regs, R2(regs->iir));
break;
case OPCODE_FSTW_L:
case OPCODE_STW_M:
- ret = emulate_store(regs, 4, (regs->iir>>16)&0x1f);
+ ret = emulate_stw(regs, R2(regs->iir));
break;
}
switch (regs->iir & OPCODE4_MASK)
{
case OPCODE_LDH_L:
- ret = emulate_load(regs, 2, (regs->iir>>16)&0x1f);
+ ret = emulate_ldh(regs, R2(regs->iir));
break;
case OPCODE_LDW_L:
- case OPCODE_LDW_L2:
- ret = emulate_load(regs, 4, (regs->iir>>16)&0x1f);
+ case OPCODE_LDWM:
+ ret = emulate_ldw(regs, R2(regs->iir));
break;
case OPCODE_STH_L:
- ret = emulate_store(regs, 2, (regs->iir>>16)&0x1f);
+ ret = emulate_sth(regs, R2(regs->iir));
break;
case OPCODE_STW_L:
- case OPCODE_STW_L2:
- ret = emulate_store(regs, 4, (regs->iir>>16)&0x1f);
+ case OPCODE_STWM:
+ ret = emulate_stw(regs, R2(regs->iir));
break;
}
+ /* XXX LJ - need to handle float load/store */
+
+ if (modify)
+ regs->gr[R1(regs->iir)] = newbase;
+
if (ret < 0)
printk(KERN_CRIT "Not-handled unaligned insn 0x%08lx\n", regs->iir);
@@ -424,9 +730,9 @@
align_mask = 1UL;
break;
case OPCODE_LDW_L:
- case OPCODE_LDW_L2:
+ case OPCODE_LDWM:
case OPCODE_STW_L:
- case OPCODE_STW_L2:
+ case OPCODE_STWM:
align_mask = 3UL;
break;
}