Full 64-bit multitplication and division added to u64 library

This commit is contained in:
Erik van der Kouwe 2010-05-17 16:44:26 +00:00
parent 5fa734b708
commit 7570df267f
10 changed files with 477 additions and 4 deletions

View file

@ -15,13 +15,18 @@ u64_t add64ul(u64_t i, unsigned long j);
u64_t sub64(u64_t i, u64_t j); u64_t sub64(u64_t i, u64_t j);
u64_t sub64u(u64_t i, unsigned j); u64_t sub64u(u64_t i, unsigned j);
u64_t sub64ul(u64_t i, unsigned long j); u64_t sub64ul(u64_t i, unsigned long j);
int bsr64(u64_t i);
unsigned diff64(u64_t i, u64_t j); unsigned diff64(u64_t i, u64_t j);
u64_t cvu64(unsigned i); u64_t cvu64(unsigned i);
u64_t cvul64(unsigned long i); u64_t cvul64(unsigned long i);
unsigned cv64u(u64_t i); unsigned cv64u(u64_t i);
unsigned long cv64ul(u64_t i); unsigned long cv64ul(u64_t i);
u64_t div64(u64_t i, u64_t j);
unsigned long div64u(u64_t i, unsigned j); unsigned long div64u(u64_t i, unsigned j);
u64_t div64u64(u64_t i, unsigned j);
u64_t rem64(u64_t i, u64_t j);
unsigned rem64u(u64_t i, unsigned j); unsigned rem64u(u64_t i, unsigned j);
u64_t mul64(u64_t i, u64_t j);
u64_t mul64u(unsigned long i, unsigned j); u64_t mul64u(unsigned long i, unsigned j);
int cmp64(u64_t i, u64_t j); int cmp64(u64_t i, u64_t j);
int cmp64u(u64_t i, unsigned j); int cmp64u(u64_t i, unsigned j);

View file

@ -4,13 +4,16 @@
SRCS+= \ SRCS+= \
add64.S \ add64.S \
add64u.S \ add64u.S \
bsr64.S \
cmp64.S \ cmp64.S \
cv64u.S \ cv64u.S \
cvu64.S \ cvu64.S \
diff64.S \ diff64.S \
div64.c \
div64u.S \ div64u.S \
ex64.S \ ex64.S \
make64.S \ make64.S \
mul64.c \
mul64u.S \ mul64u.S \
sub64.S \ sub64.S \
sub64u.S sub64u.S

View file

@ -0,0 +1,17 @@
/* bsr64() - 64 bit bit scan reverse Author: Erik van der Kouwe */
/* 15 May 2010 */
#include <minix/compiler.h>
.text
.globl _bsr64
_bsr64:
/* int bsr64(u64_t i); */
bsr 8(%esp), %eax /* check high-order DWORD */
jnz 0f /* non-zero: return index+32 */
bsr 4(%esp), %eax /* check low-order DWORD */
jnz 1f /* non-zero: return index */
movl $-1, %eax /* both were zero, return -1 */
jmp 1f
0: addl $32, %eax /* add 32 to high-order index */
1: ret

View file

@ -0,0 +1,87 @@
/* div64() - full 64-bit division */
/* rem64() - full 64-bit modulo */
/* Author: Erik van der Kouwe */
/* 14 May 2010 */
#include <assert.h>
#include <minix/u64.h>
static u32_t shl64hi(u64_t i, unsigned shift)
{
/* compute the high-order 32-bit value in (i << shift) */
if (shift == 0)
return i.hi;
else if (shift < 32)
return (i.hi << shift) | (i.lo >> (32 - shift));
else if (shift == 32)
return i.lo;
else if (shift < 64)
return i.lo << (shift - 32);
else
return 0;
}
static u64_t divrem64(u64_t *i, u64_t j)
{
u32_t i32, j32, q;
u64_t result = { 0, 0 };
unsigned shift;
assert(i);
/* this function is not suitable for small divisors */
assert(ex64hi(j) != 0);
/* as long as i >= j we work on reducing i */
while (cmp64(*i, j) >= 0) {
/* shift to obtain the 32 most significant bits */
shift = 63 - bsr64(*i);
i32 = shl64hi(*i, shift);
j32 = shl64hi(j, shift);
/* find a lower bound for *i/j */
if (j32 + 1 < j32) {
/* avoid overflow, since *i >= j we know q >= 1 */
q = 1;
} else {
/* use 32-bit division, round j32 up to ensure that
* we obtain a lower bound
*/
q = i32 / (j32 + 1);
/* since *i >= j we know q >= 1 */
if (q < 1) q = 1;
}
/* perform the division using the lower bound we found */
*i = sub64(*i, mul64(j, cvu64(q)));
result = add64u(result, q);
}
/* if we get here then *i < j; because we round down we are finished */
return result;
}
u64_t div64(u64_t i, u64_t j)
{
/* divrem64 is unsuitable for small divisors, especially zero which would
* trigger a infinite loop; use assembly function in this case
*/
if (!ex64hi(j)) {
return div64u64(i, ex64lo(j));
}
return divrem64(&i, j);
}
u64_t rem64(u64_t i, u64_t j)
{
/* divrem64 is unsuitable for small divisors, especially zero which would
* trigger a infinite loop; use assembly function in this case
*/
if (!ex64hi(j)) {
return cvu64(rem64u(i, ex64lo(j)));
}
divrem64(&i, j);
return i;
}

View file

@ -1,8 +1,10 @@
/* div64u() - 64 bit divided by unsigned giving unsigned long */ /* div64u() - 64 bit divided by unsigned giving unsigned long */
/* Author: Kees J. Bot */ /* Author: Kees J. Bot */
/* 7 Dec 1995 */ /* 7 Dec 1995 */
#include <minix/compiler.h>
.text .text
.globl _div64u, _rem64u .globl _div64u, _div64u64, _rem64u
_div64u: _div64u:
/* unsigned long div64u(u64_t i, unsigned j); */ /* unsigned long div64u(u64_t i, unsigned j); */
@ -13,6 +15,19 @@ _div64u:
divl 12(%esp) /* i / j = (q<<32) + ((r<<32) + il) / j */ divl 12(%esp) /* i / j = (q<<32) + ((r<<32) + il) / j */
ret ret
_div64u64:
/* u64_t div64u64(u64_t i, unsigned j); */
xorl %edx, %edx
movl 12(%esp), %eax /* i = (ih<<32) + il */
divl 16(%esp) /* ih = q * j + r */
movl 4(%esp), %ecx /* get pointer to result */
movl %eax, 4(%ecx) /* store high-order result */
movl 8(%esp), %eax
divl 16(%esp) /* i / j = (q<<32) + ((r<<32) + il) / j */
movl %eax, 0(%ecx) /* store low result */
movl %ecx, %eax /* return pointer to result struct */
ret BYTES_TO_POP_ON_STRUCT_RETURN
_rem64u: _rem64u:
/* unsigned rem64u(u64_t i, unsigned j); */ /* unsigned rem64u(u64_t i, unsigned j); */
pop %ecx pop %ecx

View file

@ -0,0 +1,20 @@
#include <minix/u64.h>
u64_t mul64(u64_t i, u64_t j)
{
u64_t result;
/* Compute as follows:
* i * j =
* (i.hi << 32 + i.lo) * (j.hi << 32 + j.lo) =
* (i.hi << 32) * (j.hi << 32 + j.lo) + i.lo * (j.hi << 32 + j.lo) =
* (i.hi * j.hi) << 64 + (i.hi * j.lo) << 32 + (i.lo * j.hi << 32) + i.lo * j.lo
*
* 64-bit-result multiply only needed for (i.lo * j.lo)
* upper 32 bits overflow for (i.lo * j.hi) and (i.hi * j.lo)
* all overflows for (i.hi * j.hi)
*/
result = mul64u(i.lo, j.lo);
result.hi += i.hi * j.lo + i.lo * j.hi;
return result;
}

View file

@ -1,6 +1,6 @@
.TH INT64 3 .TH INT64 3
.SH NAME .SH NAME
int64, add64, add64u, add64ul, sub64, sub64u, sub64ul, diff64, cvu64, cvul64, cv64u, cv64ul, div64u, rem64u, mul64u, cmp64, cmp64u, cmp64ul, ex64lo, ex64hi, make64 \- 64 bit disk offset computations int64, add64, add64u, add64ul, sub64, sub64u, sub64ul, diff64, bsr64, cvu64, cvul64, cv64u, cv64ul, div64, div64u, div64u64, rem64, rem64u, mul64, mul64u, cmp64, cmp64u, cmp64ul, ex64lo, ex64hi, make64 \- 64 bit disk offset computations
.SH SYNOPSIS .SH SYNOPSIS
.ft B .ft B
.nf .nf
@ -13,12 +13,17 @@ u64_t sub64(u64_t \fIi\fP, u64_t \fIj\fP)
u64_t sub64u(u64_t \fIi\fP, unsigned \fIj\fP) u64_t sub64u(u64_t \fIi\fP, unsigned \fIj\fP)
u64_t sub64ul(u64_t \fIi\fP, unsigned long \fIj\fP) u64_t sub64ul(u64_t \fIi\fP, unsigned long \fIj\fP)
unsigned diff64(u64_t \fIi\fP, u64_t \fIj\fP) unsigned diff64(u64_t \fIi\fP, u64_t \fIj\fP)
int bsr64(u64_t \fIi\fP)
u64_t cvu64(unsigned \fIi\fP) u64_t cvu64(unsigned \fIi\fP)
u64_t cvul64(unsigned long \fIi\fP) u64_t cvul64(unsigned long \fIi\fP)
unsigned cv64u(u64_t \fIi\fP) unsigned cv64u(u64_t \fIi\fP)
unsigned long cv64ul(u64_t \fIi\fP) unsigned long cv64ul(u64_t \fIi\fP)
u64_t div64(u64_t \fIi\fP, u64_t \fIj\fP)
unsigned long div64u(u64_t \fIi\fP, unsigned \fIj\fP) unsigned long div64u(u64_t \fIi\fP, unsigned \fIj\fP)
u64_t div64u64(u64_t \fIi\fP, unsigned \fIj\fP)
u64_t rem64(u64_t \fIi\fP, u64_t \fIj\fP)
unsigned rem64u(u64_t \fIi\fP, unsigned \fIj\fP) unsigned rem64u(u64_t \fIi\fP, unsigned \fIj\fP)
u64_t mul64(u64_t \fIi\fP, u64_t \fIj\fP)
u64_t mul64u(unsigned long \fIi\fP, unsigned \fIj\fP) u64_t mul64u(unsigned long \fIi\fP, unsigned \fIj\fP)
int cmp64(u64_t \fIi\fP, u64_t \fIj\fP) int cmp64(u64_t \fIi\fP, u64_t \fIj\fP)
int cmp64u(u64_t \fIi\fP, unsigned \fIj\fP) int cmp64u(u64_t \fIi\fP, unsigned \fIj\fP)
@ -95,6 +100,9 @@ from the 64 bit number
.I i .I i
forming an unsigned. Overflow is not checked. forming an unsigned. Overflow is not checked.
.TP .TP
.B "int bsr64(u64_t \fIi\fP)"
Return the index of the highest-order bit set. If the value is zero, -1 is returned.
.TP
.B "u64_t cvu64(unsigned \fIi\fP)" .B "u64_t cvu64(unsigned \fIi\fP)"
Convert an unsigned to a 64 bit number. Convert an unsigned to a 64 bit number.
.TP .TP
@ -109,6 +117,12 @@ Convert a 64 bit number to an unsigned if it fits, otherwise return
Convert a 64 bit number to an unsigned long if it fits, otherwise return Convert a 64 bit number to an unsigned long if it fits, otherwise return
.BR ULONG_MAX . .BR ULONG_MAX .
.TP .TP
.B "u64_t div64(u64_t \fIi\fP, u64_t \fIj\fP)"
Divide the 64 bit number
.I i
by the 64 bit number
.I j
giving a 64 bit number.
.B "unsigned long div64u(u64_t \fIi\fP, unsigned \fIj\fP)" .B "unsigned long div64u(u64_t \fIi\fP, unsigned \fIj\fP)"
Divide the 64 bit number Divide the 64 bit number
.I i .I i
@ -116,6 +130,19 @@ by the unsigned
.I j .I j
giving an unsigned long. Overflow is not checked. (Typical "byte offset to giving an unsigned long. Overflow is not checked. (Typical "byte offset to
block number" conversion.) block number" conversion.)
.B "u64_t div64u64(u64_t \fIi\fP, unsigned \fIj\fP)"
Divide the 64 bit number
.I i
by the unsigned
.I j
giving a 64 bit number.
.TP
.B "u64_t rem64(u64_t \fIi\fP, u64_t \fIj\fP)"
Compute the remainder of the division of the 64 bit number
.I i
by the 64 bit number
.I j
as a 64 bit number.
.TP .TP
.B "unsigned rem64u(u64_t \fIi\fP, unsigned \fIj\fP)" .B "unsigned rem64u(u64_t \fIi\fP, unsigned \fIj\fP)"
Compute the remainder of the division of the 64 bit number Compute the remainder of the division of the 64 bit number
@ -124,6 +151,13 @@ by the unsigned
.I j .I j
as an unsigned. (Typical "byte offset within a block" computation.) as an unsigned. (Typical "byte offset within a block" computation.)
.TP .TP
.B "u64_t mul64(u64_t \fIi\fP, u64_t \fIj\fP)"
Multiply the 64 bit number
.I i
by the 64 bit number
.I j
giving a 64 bit number.
.TP
.B "u64_t mul64u(unsigned long \fIi\fP, unsigned \fIj\fP)" .B "u64_t mul64u(unsigned long \fIi\fP, unsigned \fIj\fP)"
Multiply the unsigned long Multiply the unsigned long
.I i .I i

View file

@ -12,7 +12,7 @@ OBJ= test1 test2 test3 test4 test5 test6 test7 test8 test9 \
test21 test22 test23 test25 test26 test27 test28 test29 \ test21 test22 test23 test25 test26 test27 test28 test29 \
test30 test31 test32 test34 test35 test36 test37 test38 \ test30 test31 test32 test34 test35 test36 test37 test38 \
test39 t10a t11a t11b test40 t40a t40b t40c t40d t40e t40f test41 \ test39 t10a t11a t11b test40 t40a t40b t40c t40d t40e t40f test41 \
test42 test44 test45 test47 test48 test49 test50 test51 test52 test42 test44 test45 test47 test48 test49 test50 test51 test52 test53
BIGOBJ= test20 test24 BIGOBJ= test20 test24
ROOTOBJ= test11 test33 test43 test46 ROOTOBJ= test11 test33 test43 test46

View file

@ -14,7 +14,7 @@ badones= # list of tests that failed
tests=" 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 \ tests=" 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 \
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 \ 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 \
41 42 43 44 45 45-gcc 46 47 48 49 49-gcc 50 \ 41 42 43 44 45 45-gcc 46 47 48 49 49-gcc 50 \
51 51-gcc 52 52-gcc \ 51 51-gcc 52 52-gcc 53 \
sh1.sh sh2.sh" sh1.sh sh2.sh"
tests_no=`expr 0` tests_no=`expr 0`

292
test/test53.c Normal file
View file

@ -0,0 +1,292 @@
#include <assert.h>
#include <minix/u64.h>
#include <setjmp.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <unistd.h>
#define ERR err(__LINE__)
#define MAX_ERROR 4
#define TIMED 0
static volatile int errct;
static volatile expect_SIGFPE;
static u64_t i, j, k;
static jmp_buf jmpbuf_SIGFPE, jmpbuf_main;
static void quit(void)
{
if (errct == 0) {
printf("ok\n");
exit(0);
} else {
printf("%d errors\n", errct);
exit(1);
}
assert(0); /* not reachable */
}
static void err(int line)
{
/* print error information */
printf("error line %d; i=0x%.8x%.8x; j=0x%.8x%.8x; k=0x%.8x%.8x\n",
line,
ex64hi(i), ex64lo(i),
ex64hi(j), ex64lo(j),
ex64hi(k), ex64lo(k));
/* quit after too many errors */
if (errct++ > MAX_ERROR) {
printf("Too many errors; test aborted\n");
quit();
}
}
#define LENGTHOF(arr) (sizeof(arr) / sizeof(arr[0]))
static u64_t getargval(int index, int *done)
{
u32_t values[] = {
/* corner cases */
0,
1,
0x7fffffff,
0x80000000,
0x80000001,
0xffffffff,
/* random values */
0xa9,
0x0d88,
0x242811,
0xeb44d1bc,
0x5b,
0xfb50,
0x569c02,
0xb23c8f7d,
0xc3,
0x2366,
0xfabb73,
0xcb4e8aef,
0xe9,
0xffdc,
0x05842d,
0x3fff902d};
assert(done);
/* values with corner case and random 32-bit components */
if (index < LENGTHOF(values) * LENGTHOF(values))
return make64(values[index / LENGTHOF(values)], values[index % LENGTHOF(values)]);
index -= LENGTHOF(values) * LENGTHOF(values);
/* small numbers */
if (index < 16) return make64(index + 2, 0);
index -= 16;
/* big numbers */
if (index < 16) return make64(-index - 2, -1);
index -= 16;
/* powers of two */
if (index < 14) return make64(1 << (index * 2 + 5), 0);
index -= 14;
if (index < 16) return make64(0, 1 << (index * 2 + 1));
index -= 16;
/* done */
*done = 1;
return make64(0, 0);
}
static void handler_SIGFPE(int signum)
{
assert(signum == SIGFPE);
/* restore the signal handler */
if (signal(SIGFPE, handler_SIGFPE) == SIG_ERR) ERR;
/* division by zero occurred, was this expected? */
if (expect_SIGFPE) {
/* expected: jump back to test */
expect_SIGFPE = 0;
longjmp(jmpbuf_SIGFPE, -1);
} else {
/* not expected: error and jump back to main */
longjmp(jmpbuf_main, -1);
}
/* not reachable */
assert(0);
exit(-1);
}
static void testmul(void)
{
int kdone, kidx;
u32_t ilo = ex64lo(i), jlo = ex64lo(j);
u64_t prod = mul64(i, j);
int prodbits;
/* compute maximum index of highest-order bit */
prodbits = bsr64(i) + bsr64(j) + 1;
if (cmp64u(i, 0) == 0 || cmp64u(j, 0) == 0) prodbits = -1;
if (bsr64(prod) > prodbits) ERR;
/* compare to 32-bit multiplication if possible */
if (ex64hi(i) == 0 && ex64hi(j) == 0) {
if (cmp64(prod, mul64u(ilo, jlo)) != 0) ERR;
/* if there is no overflow we can check against pure 32-bit */
if (prodbits < 32 && cmp64u(prod, ilo * jlo) != 0) ERR;
}
/* in 32-bit arith low-order DWORD matches regardless of overflow */
if (ex64lo(prod) != ilo * jlo) ERR;
/* multiplication by zero yields zero */
if (prodbits < 0 && cmp64u(prod, 0) != 0) ERR;
/* if there is no overflow, check absence of zero divisors */
if (prodbits >= 0 && prodbits < 64 && cmp64u(prod, 0) == 0) ERR;
/* commutativity */
if (cmp64(prod, mul64(j, i)) != 0) ERR;
/* loop though all argument value combinations for third argument */
for (kdone = 0, kidx = 0; k = getargval(kidx, &kdone), !kdone; kidx++) {
/* associativity */
if (cmp64(mul64(mul64(i, j), k), mul64(i, mul64(j, k))) != 0) ERR;
/* left and right distributivity */
if (cmp64(mul64(add64(i, j), k), add64(mul64(i, k), mul64(j, k))) != 0) ERR;
if (cmp64(mul64(i, add64(j, k)), add64(mul64(i, j), mul64(i, k))) != 0) ERR;
}
}
static void testdiv0(void)
{
int funcidx;
assert(cmp64u(j, 0) == 0);
/* loop through the 5 different division functions */
for (funcidx = 0; funcidx < 5; funcidx++) {
expect_SIGFPE = 1;
if (setjmp(jmpbuf_SIGFPE) == 0) {
/* divide by zero using various functions */
switch (funcidx) {
case 0: div64(i, j); ERR; break;
case 1: div64u64(i, ex64lo(j)); ERR; break;
case 2: div64u(i, ex64lo(j)); ERR; break;
case 3: rem64(i, j); ERR; break;
case 4: rem64u(i, ex64lo(j)); ERR; break;
default: assert(0); ERR; break;
}
/* if we reach this point there was no signal and an
* error has been recorded
*/
expect_SIGFPE = 0;
} else {
/* a signal has been received and expect_SIGFPE has
* been reset; all is ok now
*/
assert(!expect_SIGFPE);
}
}
}
static void testdiv(void)
{
u64_t q, r;
#if TIMED
struct timeval tvstart, tvend;
printf("i=0x%.8x%.8x; j=0x%.8x%.8x\n",
ex64hi(i), ex64lo(i),
ex64hi(j), ex64lo(j));
fflush(stdout);
if (gettimeofday(&tvstart, NULL) < 0) ERR;
#endif
/* division by zero has a separate test */
if (cmp64u(j, 0) == 0) {
testdiv0();
return;
}
/* perform division, store q in k to make ERR more informative */
q = div64(i, j);
r = rem64(i, j);
k = q;
#if TIMED
if (gettimeofday(&tvend, NULL) < 0) ERR;
tvend.tv_sec -= tvstart.tv_sec;
tvend.tv_usec -= tvstart.tv_usec;
if (tvend.tv_usec < 0) {
tvend.tv_sec -= 1;
tvend.tv_usec += 1000000;
}
printf("q=0x%.8x%.8x; r=0x%.8x%.8x; time=%d.%.6d\n",
ex64hi(q), ex64lo(q),
ex64hi(r), ex64lo(r),
tvend.tv_sec, tvend.tv_usec);
fflush(stdout);
#endif
/* compare to 64/32-bit division if possible */
if (!ex64hi(j)) {
if (cmp64(q, div64u64(i, ex64lo(j))) != 0) ERR;
if (!ex64hi(q)) {
if (cmp64u(q, div64u(i, ex64lo(j))) != 0) ERR;
}
if (cmp64u(r, rem64u(i, ex64lo(j))) != 0) ERR;
/* compare to 32-bit division if possible */
if (!ex64hi(i)) {
if (cmp64u(q, ex64lo(i) / ex64lo(j)) != 0) ERR;
if (cmp64u(r, ex64lo(i) % ex64lo(j)) != 0) ERR;
}
}
/* check results using i = q j + r and r < j */
if (cmp64(i, add64(mul64(q, j), r)) != 0) ERR;
if (cmp64(r, j) >= 0) ERR;
}
static void test(void)
{
int idone, jdone, iidx, jidx;
/* loop though all argument value combinations */
for (idone = 0, iidx = 0; i = getargval(iidx, &idone), !idone; iidx++)
for (jdone = 0, jidx = 0; j = getargval(jidx, &jdone), !jdone; jidx++) {
testmul();
testdiv();
}
}
int main(void)
{
printf("Test 53 ");
/* set up signal handler to deal with div by zero */
if (setjmp(jmpbuf_main) == 0) {
if (signal(SIGFPE, handler_SIGFPE) == SIG_ERR) ERR;
/* perform tests */
test();
} else {
/* an unexpected SIGFPE has occurred */
ERR;
}
/* this was all */
quit();
assert(0); /* not reachable */
return -1;
}