[parisc-linux] [patch] linux/include/asm-parisc/checksum.h
Randolph Chung
randolph@tausq.org
Sun, 22 Oct 2000 14:21:39 -0700
After all the assembly questions yesterday, I offer you my first patch
:-)
here's a patch to implement ip_fast_csum in
include/asm-parisc/checksum.h. In my simple tests it's about 4x faster
than the original implementation.
I've also been working on the other checksum routines (csum_partial and
friends) but they are not ready yet.
Any suggestions on how to further optimize this will be much
appreciated.
randolph
Index: checksum.h
===================================================================
RCS file: /home/cvs/parisc/linux/include/asm-parisc/checksum.h,v
retrieving revision 1.8
diff -u -r1.8 checksum.h
--- checksum.h 2000/08/04 17:29:12 1.8
+++ checksum.h 2000/10/22 21:16:30
@@ -52,12 +52,46 @@
}
/*
- * FIXME: Needs to be optimized for IP headers,
- * which always checksum on 4 octet boundaries.
+ * Optimized for IP headers, which always checksum on 4 octet boundaries.
+ *
+ * Written by Randolph Chung <tausq@debian.org>
*/
static inline unsigned short ip_fast_csum(unsigned char * iph,
unsigned int ihl) {
- return ~csum_partial (iph, ihl << 2, 0);
+ unsigned int sum;
+
+
+ __asm__ __volatile__ ("
+ ldws,ma 4(%1), %0
+ addi -4, %2, %2
+ comib,>= 0, %2, 2f
+
+ ldws,ma 4(%1), %%r19
+ add %0, %%r19, %0
+ ldws,ma 4(%1), %%r19
+ addc %0, %%r19, %0
+ ldws,ma 4(%1), %%r19
+ addc %0, %%r19, %0
+1: ldws,ma 4(%1), %%r19
+ addib,<> -1, %2, 1b
+ addc %0, %%r19, %0
+ addc %0, %%r0, %0
+
+ zdepi -1, 31, 16, %%r19
+ and %0, %%r19, %%r20
+ extru %0, 15, 16, %%r21
+ add %%r20, %%r21, %0
+ and %0, %%r19, %%r20
+ extru %0, 15, 16, %%r21
+ add %%r20, %%r21, %0
+ subi -1, %0, %0
+2:
+ "
+ : "=r" (sum), "=r" (iph), "=r" (ihl)
+ : "1" (iph), "2" (ihl)
+ : "r19", "r20", "r21" );
+
+ return(sum);
}
/*
--
@..@ http://www.TauSq.org/
(----)
( >__< )
^^ ~~ ^^