[parisc-linux] Re: teaching the kernel to do division
Joel Soete
soete.joel@tiscali.be
Sat, 25 Oct 2003 15:42:45 +0000
Hi all,
this test case would explaining better (i hope at least) than long
sentences what i mean:
#include <linux/types.h>
#include <stdio.h>
#include <limits.h>
/*
/* Algo 1: the goal */
uint32_t div64(uint64_t *n, uint32_t base)
{
uint32_t rem;
rem = *n % base;
*n = *n / base;
return rem;
}
/* Algo 2: original 2.6 */
uint32_t div64(uint64_t *n, uint32_t base)
{
uint32_t low, low2, high, rem;
low = *n & 0xffffffff;
high = *n >> 32;
rem = high % (uint32_t)base;
high = high / (uint32_t)base;
low2 = low >> 16;
low2 += rem << 16;
rem = low2 % (uint32_t)base;
low2 = low2 / (uint32_t)base;
low = low & 0xffff;
low += rem << 16;
rem = low % (uint32_t)base;
low = low / (uint32_t)base;
*n = low +
((uint64_t)low2 << 16) +
((uint64_t)high << 32);
return rem;
}
/* Algo 3: 2.4 method */
#define div64(n,base) \
({ \
unsigned long __low, __low2, __high, __rem; \
__low = (n) & 0xffffffff; \
__high = (n) >> 32; \
if (__high) { \
__rem = __high % (unsigned long)base; \
__high = __high / (unsigned long)base; \
__low2 = __low >> 16; \
__low2 += __rem << 16; \
__rem = __low2 % (unsigned long)base; \
__low2 = __low2 / (unsigned long)base; \
__low = __low & 0xffff; \
__low += __rem << 16; \
__rem = __low % (unsigned long)base; \
__low = __low / (unsigned long)base; \
n = __low + ((long long)__low2 << 16) + \
((long long) __high << 32); \
} else { \
__rem = __low % (unsigned long)base; \
n = (__low / (unsigned long)base); \
} \
__rem; \
})
*/
/* Algo 4: the Randolph patch OK*/
uint32_t div64(uint64_t *n, uint32_t base)
{
uint64_t rem = *n;
uint64_t b = base;
uint64_t res = 0, d = 1;
if (b > 0) {
while (b < rem) {
b <<= 1;
d <<= 1;
}
}
do {
if (rem >= b) {
rem -= b;
res += d;
}
b >>= 1;
d >>= 1;
} while (d);
*n = res;
return rem;
}
main()
{
uint32_t Rem;
uint64_t Num;
Num=(uint64_t) 28999591392ULL;
/*
printf("Length of Rem: %d\n", sizeof(Rem));
printf("Length of Num: %d\n", sizeof(Num));
*/
printf ("Num = %#018Lx (%#lld)\n", Num, Num);
Rem = div64(&Num, 1000000000UL);
printf ("Rem = %#012x (%#ld)\n", Rem, Rem);
}
With corresponding results:
Algo 1 (Ok)
Num = 0x00000006c082a5e0 (28999591392)
Rem = 0x003b948de0 (999591392)
Algo 2 (Ko)
Num = 0x00000006c082a5e0 (28999591392)
Rem = 0x000db247e0 (229787616)
Algo 3 (Ko)
Num = 0x00000006c082a5e0 (28999591392)
Rem = 0x000db247e0 (229787616)
Algo 4 (Ok)
Num = 0x00000006c082a5e0 (28999591392)
Rem = 0x003b948de0 (999591392)
hth,
Joel
PS: All seems well presented in my mozilla window and hope that will stay ;)
Joel Soete wrote:
> Hi Rnadolph and all,
>
> I reach to figure out what your pb is.
> This new algo is Ok and the previous formula was definitely wrong (even
> in 2.4 :( ). So it could be interesting to fix it also.
>
> For this I start from basic math:
> Dividend = divisor * quotient + reminder
> ie D = d * q + r (D, d, q, r: being integers)
>
> I can also split D in two 32bits words and write
> D = D1 * 2^32 + D0
>
> But I always have only one equation with two unknown var (q and r).
>
> Is somebody knows where can I find the right solution?
> (or should I find it back in objdump from :
> uint32_t div64(uint64_t *n, uint32_t base)
> {
> uint32_t rem;
>
> rem = *n % base;
> *n = *n / base;
>
> return rem;
> })
>
> Thanks in advance,
> Joel
>
>
> Randolph Chung wrote:
>
>>> does anyone want to look into fixing it, and/or writing an optimized
>>> version of that function for pa? :-) it needs to do basically this (but
>>> be standalone)
>>
>>
>>
>> Here's one way to do it, but maybe it can be optimized a bit. Any
>> suggestions before i send it upstream?
>>
>> randolph
>>
>> Index: lib/div64.c
>> ===================================================================
>> RCS file: /var/cvs/linux-2.6/lib/div64.c,v
>> retrieving revision 1.1
>> diff -u -p -r1.1 div64.c
>> --- lib/div64.c 29 Jul 2003 17:02:19 -0000 1.1
>> +++ lib/div64.c 23 Oct 2003 23:36:08 -0000
>> @@ -25,26 +25,28 @@
>>
>> uint32_t __div64_32(uint64_t *n, uint32_t base)
>> {
>> - uint32_t low, low2, high, rem;
>> + uint64_t rem = *n;
>> + uint64_t b = base;
>> + uint64_t res = 0, d = 1;
>>
>> - low = *n & 0xffffffff;
>> - high = *n >> 32;
>> - rem = high % (uint32_t)base;
>> - high = high / (uint32_t)base;
>> - low2 = low >> 16;
>> - low2 += rem << 16;
>> - rem = low2 % (uint32_t)base;
>> - low2 = low2 / (uint32_t)base;
>> - low = low & 0xffff;
>> - low += rem << 16;
>> - rem = low % (uint32_t)base;
>> - low = low / (uint32_t)base;
>> + if (b > 0) {
>> + while (b < rem) {
>> + b <<= 1;
>> + d <<= 1;
>> + }
>> + }
>> + + do {
>> + if (rem >= b) {
>> + rem -= b;
>> + res += d;
>> + }
>> + b >>= 1;
>> + d >>= 1;
>> + } while (d);
>>
>> - *n = low +
>> - ((uint64_t)low2 << 16) +
>> - ((uint64_t)high << 32);
>> -
>> - return rem;
>> + *n = res;
>> + return rem;
>> }
>>
>> EXPORT_SYMBOL(__div64_32);
>>
>
>