[parisc-linux] [patch 2/2] backport of sba sg list management to ccio-dma
Joel Soete
soete.joel at scarlet.be
Sun Oct 28 08:45:24 MDT 2007
Grant Grundler wrote:
> On Fri, Oct 26, 2007 at 03:20:55PM +0200, Joel Soete wrote:
> ...
>>>> (why not push this in upstream? [I mean ./include/asm-generic/page.h])
>>> Erm, push what?
>> Sorry for shortcut, here is more detailed idea:
>>
>> get_iovp_order came to me when reading the ia64/hp sba implementation where
>> you can read:
>> arch/ia64/hp/common/sba_iommu.c:
>> /**
>> * For most cases the normal get_order is sufficient, however it limits us
>> * to PAGE_SIZE being the minimum mapping alignment and TC flush granularity.
>> * It only incurs about 1 clock cycle to use this one with the static variable
>> * and makes the code more intuitive.
>> */
>> static SBA_INLINE int
>> get_iovp_order (unsigned long size)
>> [snip]
>
> Understood - ia64 has an instruction to implement FFS/FLS. PA-RISC does not.
>
>> but it's ia64 and I look for something I could test on parisc and this generic
>> code looked to me ideal:
>>
>> include/asm-generic/page.h
>>
>> /* Pure 2^n version of get_order */
>> static __inline__ __attribute_const__ int get_order(unsigned long size)
>> {
>> int order;
>>
>> size = (size - 1) >> (PAGE_SHIFT - 1);
>> order = -1;
>> do {
>> size >>= 1;
>> order++;
>> } while (size);
>> return order;
>> }
>
> Prove this code is faster than the __ffs asm in include/asm-parisc/bitops.h
> and we can talk about it. Unless gcc has gotten phenomenally better, I'm
> not inclined to believe the generic code will be faster.
>
Same idea but I again failed to pay attention to function declaration and size of elements. (see below)
>> just want to change PAGE_SHIFT with IOVP_SHIFT to make the drill ;-)
>> (this was this first idea)
>> static CCIO_INLINE int
>> get_iovp_order(unsigned long size)
>> {
>> int order;
>>
>> size = (size - 1) >> (IOVP_SHIFT - 1);
>> order = -1;
>> do {
>> size >>= 1;
>> order++;
>> } while (size);
>> return order;
>> }
>>
>> which make you though to ffs() which in turn make me proposed following:
>> static CCIO_INLINE int
>> get_iovp_order(unsigned long size)
>> {
>> return fls((size - 1) >> (IOVP_SHIFT));
>> }
>>
>> (test case comparison loop shows well it's the same even for size=0)
>
> Sure - submit a patch for that. That sounds fine to me.
>
>
>> I did some test with different values of IOVP_SHIFT and even with this case
>> IOVP_SHIFT==PAGE_SHIFT which is to me get_order().
>>
>> static __inline__ __attribute_const__ int get_order(unsigned long size)
>> {
>> return fls((size - 1) >> (PAGE_SHIFT));
>> }
>
> This looks fine to me.
> I've not checked if all possible values map correctly.
> Can you try (for n in 0-31), and verify (2^n) and (2^n)+1 values are the
> same for the above and the original get_order() function?
>
e.g.
r# more GetOrder-poc.c
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <limits.h>
#define PAGE_SHIFT 12
#define PAGE_SIZE (1UL << PAGE_SHIFT)
#define PAGE_MASK (~(PAGE_SIZE-1))
/* Pure 2^n version of get_order */
static __inline__
int get_order(unsigned long size)
{
int order;
size = (size-1) >> (PAGE_SHIFT-1);
order = -1;
do {
size >>= 1;
order++;
} while (size);
return order;
}
/**
* fls - find last (most-significant) bit set
* @x: the word to search
*
* This is defined the same way as ffs.
* Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
*/
static inline int fls(int x)
{
int r = 32;
if (!x)
return 0;
if (!(x & 0xffff0000u)) {
x <<= 16;
r -= 16;
}
if (!(x & 0xff000000u)) {
x <<= 8;
r -= 8;
}
if (!(x & 0xf0000000u)) {
x <<= 4;
r -= 4;
}
if (!(x & 0xc0000000u)) {
x <<= 2;
r -= 2;
}
if (!(x & 0x80000000u)) {
x <<= 1;
r -= 1;
}
return r;
}
/* I used generic stuff for a proof of concept */
static __inline__
int get_order_poc(unsigned long size)
{
return fls((size - 1) >> (PAGE_SHIFT));
}
int main(int argc, char * * argv, char * * env) {
unsigned int a, n;
unsigned long ul;
/* for proof of concept
*/
for (n=0; n<32; n++) {
ul = 1 << n;
printf("get_order(%lu) = %lu (0x%x);\n", ul, a = get_order(ul), a);
printf("get_order_poc(%lu) = %lu (0x%x).\n\n", ul, a = get_order_poc(ul), a);
printf("get_order(%lu) = %lu (0x%x);\n", ul+1, a = get_order(ul+1), a);
printf("get_order_poc(%lu) = %lu (0x%x).\n\n", ul+1, a = get_order_poc(ul+1), a);
}
return 0;
}
give me well same results:
# ./GetOrder-poc
get_order(1) = 0 (0x0);
get_order_poc(1) = 0 (0x0).
get_order(2) = 0 (0x0);
get_order_poc(2) = 0 (0x0).
get_order(2) = 0 (0x0);
get_order_poc(2) = 0 (0x0).
get_order(3) = 0 (0x0);
get_order_poc(3) = 0 (0x0).
get_order(4) = 0 (0x0);
get_order_poc(4) = 0 (0x0).
get_order(5) = 0 (0x0);
get_order_poc(5) = 0 (0x0).
get_order(8) = 0 (0x0);
get_order_poc(8) = 0 (0x0).
get_order(9) = 0 (0x0);
get_order_poc(9) = 0 (0x0).
get_order(16) = 0 (0x0);
get_order_poc(16) = 0 (0x0).
get_order(17) = 0 (0x0);
get_order_poc(17) = 0 (0x0).
get_order(32) = 0 (0x0);
get_order_poc(32) = 0 (0x0).
get_order(33) = 0 (0x0);
get_order_poc(33) = 0 (0x0).
get_order(64) = 0 (0x0);
get_order_poc(64) = 0 (0x0).
get_order(65) = 0 (0x0);
get_order_poc(65) = 0 (0x0).
get_order(128) = 0 (0x0);
get_order_poc(128) = 0 (0x0).
get_order(129) = 0 (0x0);
get_order_poc(129) = 0 (0x0).
get_order(256) = 0 (0x0);
get_order_poc(256) = 0 (0x0).
get_order(257) = 0 (0x0);
get_order_poc(257) = 0 (0x0).
get_order(512) = 0 (0x0);
get_order_poc(512) = 0 (0x0).
get_order(513) = 0 (0x0);
get_order_poc(513) = 0 (0x0).
get_order(1024) = 0 (0x0);
get_order_poc(1024) = 0 (0x0).
get_order(1025) = 0 (0x0);
get_order_poc(1025) = 0 (0x0).
get_order(2048) = 0 (0x0);
get_order_poc(2048) = 0 (0x0).
get_order(2049) = 0 (0x0);
get_order_poc(2049) = 0 (0x0).
get_order(4096) = 0 (0x0);
get_order_poc(4096) = 0 (0x0).
get_order(4097) = 1 (0x1);
get_order_poc(4097) = 1 (0x1).
get_order(8192) = 1 (0x1);
get_order_poc(8192) = 1 (0x1).
get_order(8193) = 2 (0x2);
get_order_poc(8193) = 2 (0x2).
get_order(16384) = 2 (0x2);
get_order_poc(16384) = 2 (0x2).
get_order(16385) = 3 (0x3);
get_order_poc(16385) = 3 (0x3).
get_order(32768) = 3 (0x3);
get_order_poc(32768) = 3 (0x3).
get_order(32769) = 4 (0x4);
get_order_poc(32769) = 4 (0x4).
get_order(65536) = 4 (0x4);
get_order_poc(65536) = 4 (0x4).
get_order(65537) = 5 (0x5);
get_order_poc(65537) = 5 (0x5).
get_order(131072) = 5 (0x5);
get_order_poc(131072) = 5 (0x5).
get_order(131073) = 6 (0x6);
get_order_poc(131073) = 6 (0x6).
get_order(262144) = 6 (0x6);
get_order_poc(262144) = 6 (0x6).
get_order(262145) = 7 (0x7);
get_order_poc(262145) = 7 (0x7).
get_order(524288) = 7 (0x7);
get_order_poc(524288) = 7 (0x7).
get_order(524289) = 8 (0x8);
get_order_poc(524289) = 8 (0x8).
get_order(1048576) = 8 (0x8);
get_order_poc(1048576) = 8 (0x8).
get_order(1048577) = 9 (0x9);
get_order_poc(1048577) = 9 (0x9).
get_order(2097152) = 9 (0x9);
get_order_poc(2097152) = 9 (0x9).
get_order(2097153) = 10 (0xa);
get_order_poc(2097153) = 10 (0xa).
get_order(4194304) = 10 (0xa);
get_order_poc(4194304) = 10 (0xa).
get_order(4194305) = 11 (0xb);
get_order_poc(4194305) = 11 (0xb).
get_order(8388608) = 11 (0xb);
get_order_poc(8388608) = 11 (0xb).
get_order(8388609) = 12 (0xc);
get_order_poc(8388609) = 12 (0xc).
get_order(16777216) = 12 (0xc);
get_order_poc(16777216) = 12 (0xc).
get_order(16777217) = 13 (0xd);
get_order_poc(16777217) = 13 (0xd).
get_order(33554432) = 13 (0xd);
get_order_poc(33554432) = 13 (0xd).
get_order(33554433) = 14 (0xe);
get_order_poc(33554433) = 14 (0xe).
get_order(67108864) = 14 (0xe);
get_order_poc(67108864) = 14 (0xe).
get_order(67108865) = 15 (0xf);
get_order_poc(67108865) = 15 (0xf).
get_order(134217728) = 15 (0xf);
get_order_poc(134217728) = 15 (0xf).
get_order(134217729) = 16 (0x10);
get_order_poc(134217729) = 16 (0x10).
get_order(268435456) = 16 (0x10);
get_order_poc(268435456) = 16 (0x10).
get_order(268435457) = 17 (0x11);
get_order_poc(268435457) = 17 (0x11).
get_order(536870912) = 17 (0x11);
get_order_poc(536870912) = 17 (0x11).
get_order(536870913) = 18 (0x12);
get_order_poc(536870913) = 18 (0x12).
get_order(1073741824) = 18 (0x12);
get_order_poc(1073741824) = 18 (0x12).
get_order(1073741825) = 19 (0x13);
get_order_poc(1073741825) = 19 (0x13).
get_order(2147483648) = 19 (0x13);
get_order_poc(2147483648) = 19 (0x13).
get_order(2147483649) = 20 (0x14);
get_order_poc(2147483649) = 20 (0x14).
But I need to check more on a detail:
here is the prototype of fls():
static inline int fls(int x)
so I need to be sure that the value of ((size - 1) >> (PAGE_SHIFT)) would always stand in a int (not unsigned but sign) for
32bit (iirc size_of(unsigned long)==32) and 64bit (size_of(unsigned long)==64) implementations?
>> or imho even better
>>
>> return (size ? fls((size - 1) >> (PAGE_SHIFT)): 0);
>
> fls is inlined already and does the same test for size.
Yes but here the error (well imo it's an error) came from the unsign long of (-1 >> PAGE_SHIFT) <> 0
> Adding another test here doesn't help.
> The first one you posted is correct.
>
>> Is it a bit more clear?
>> (I just hope you will not ask me to make the mathematical demonstration ;-))
>
> I don't need a mathematical demonstration...just a short test to demonstrate
> (2^n) and (2^n+1) still produce the same results.
>
>> What's your opinion?
>
> In general, sounds good to me! :)
>
> hth,
> grant
>
>
Tx,
J.
More information about the parisc-linux
mailing list