Full 64-bit multitplication and division added to u64 library

This commit is contained in:
Erik van der Kouwe 2010-05-17 16:44:26 +00:00
parent 5fa734b708
commit 7570df267f
10 changed files with 477 additions and 4 deletions

View file

@ -15,13 +15,18 @@ u64_t add64ul(u64_t i, unsigned long j);
u64_t sub64(u64_t i, u64_t j);
u64_t sub64u(u64_t i, unsigned j);
u64_t sub64ul(u64_t i, unsigned long j);
int bsr64(u64_t i);
unsigned diff64(u64_t i, u64_t j);
u64_t cvu64(unsigned i);
u64_t cvul64(unsigned long i);
unsigned cv64u(u64_t i);
unsigned long cv64ul(u64_t i);
u64_t div64(u64_t i, u64_t j);
unsigned long div64u(u64_t i, unsigned j);
u64_t div64u64(u64_t i, unsigned j);
u64_t rem64(u64_t i, u64_t j);
unsigned rem64u(u64_t i, unsigned j);
u64_t mul64(u64_t i, u64_t j);
u64_t mul64u(unsigned long i, unsigned j);
int cmp64(u64_t i, u64_t j);
int cmp64u(u64_t i, unsigned j);

View file

@ -4,13 +4,16 @@
SRCS+= \
add64.S \
add64u.S \
bsr64.S \
cmp64.S \
cv64u.S \
cvu64.S \
diff64.S \
div64.c \
div64u.S \
ex64.S \
make64.S \
mul64.c \
mul64u.S \
sub64.S \
sub64u.S

View file

@ -0,0 +1,17 @@
/* bsr64() - 64 bit bit scan reverse Author: Erik van der Kouwe */
/* 15 May 2010 */
#include <minix/compiler.h>
.text
.globl _bsr64
_bsr64:
/* int bsr64(u64_t i); */
bsr 8(%esp), %eax /* check high-order DWORD */
jnz 0f /* non-zero: return index+32 */
bsr 4(%esp), %eax /* check low-order DWORD */
jnz 1f /* non-zero: return index */
movl $-1, %eax /* both were zero, return -1 */
jmp 1f
0: addl $32, %eax /* add 32 to high-order index */
1: ret

View file

@ -0,0 +1,87 @@
/* div64() - full 64-bit division */
/* rem64() - full 64-bit modulo */
/* Author: Erik van der Kouwe */
/* 14 May 2010 */
#include <assert.h>
#include <minix/u64.h>
static u32_t shl64hi(u64_t i, unsigned shift)
{
/* compute the high-order 32-bit value in (i << shift) */
if (shift == 0)
return i.hi;
else if (shift < 32)
return (i.hi << shift) | (i.lo >> (32 - shift));
else if (shift == 32)
return i.lo;
else if (shift < 64)
return i.lo << (shift - 32);
else
return 0;
}
static u64_t divrem64(u64_t *i, u64_t j)
{
u32_t i32, j32, q;
u64_t result = { 0, 0 };
unsigned shift;
assert(i);
/* this function is not suitable for small divisors */
assert(ex64hi(j) != 0);
/* as long as i >= j we work on reducing i */
while (cmp64(*i, j) >= 0) {
/* shift to obtain the 32 most significant bits */
shift = 63 - bsr64(*i);
i32 = shl64hi(*i, shift);
j32 = shl64hi(j, shift);
/* find a lower bound for *i/j */
if (j32 + 1 < j32) {
/* avoid overflow, since *i >= j we know q >= 1 */
q = 1;
} else {
/* use 32-bit division, round j32 up to ensure that
* we obtain a lower bound
*/
q = i32 / (j32 + 1);
/* since *i >= j we know q >= 1 */
if (q < 1) q = 1;
}
/* perform the division using the lower bound we found */
*i = sub64(*i, mul64(j, cvu64(q)));
result = add64u(result, q);
}
/* if we get here then *i < j; because we round down we are finished */
return result;
}
u64_t div64(u64_t i, u64_t j)
{
/* divrem64 is unsuitable for small divisors, especially zero which would
* trigger a infinite loop; use assembly function in this case
*/
if (!ex64hi(j)) {
return div64u64(i, ex64lo(j));
}
return divrem64(&i, j);
}
u64_t rem64(u64_t i, u64_t j)
{
/* divrem64 is unsuitable for small divisors, especially zero which would
* trigger a infinite loop; use assembly function in this case
*/
if (!ex64hi(j)) {
return cvu64(rem64u(i, ex64lo(j)));
}
divrem64(&i, j);
return i;
}

View file

@ -1,8 +1,10 @@
/* div64u() - 64 bit divided by unsigned giving unsigned long */
/* Author: Kees J. Bot */
/* 7 Dec 1995 */
#include <minix/compiler.h>
.text
.globl _div64u, _rem64u
.globl _div64u, _div64u64, _rem64u
_div64u:
/* unsigned long div64u(u64_t i, unsigned j); */
@ -13,6 +15,19 @@ _div64u:
divl 12(%esp) /* i / j = (q<<32) + ((r<<32) + il) / j */
ret
_div64u64:
/* u64_t div64u64(u64_t i, unsigned j); */
xorl %edx, %edx
movl 12(%esp), %eax /* i = (ih<<32) + il */
divl 16(%esp) /* ih = q * j + r */
movl 4(%esp), %ecx /* get pointer to result */
movl %eax, 4(%ecx) /* store high-order result */
movl 8(%esp), %eax
divl 16(%esp) /* i / j = (q<<32) + ((r<<32) + il) / j */
movl %eax, 0(%ecx) /* store low result */
movl %ecx, %eax /* return pointer to result struct */
ret BYTES_TO_POP_ON_STRUCT_RETURN
_rem64u:
/* unsigned rem64u(u64_t i, unsigned j); */
pop %ecx

View file

@ -0,0 +1,20 @@
#include <minix/u64.h>
u64_t mul64(u64_t i, u64_t j)
{
u64_t result;
/* Compute as follows:
* i * j =
* (i.hi << 32 + i.lo) * (j.hi << 32 + j.lo) =
* (i.hi << 32) * (j.hi << 32 + j.lo) + i.lo * (j.hi << 32 + j.lo) =
* (i.hi * j.hi) << 64 + (i.hi * j.lo) << 32 + (i.lo * j.hi << 32) + i.lo * j.lo
*
* 64-bit-result multiply only needed for (i.lo * j.lo)
* upper 32 bits overflow for (i.lo * j.hi) and (i.hi * j.lo)
* all overflows for (i.hi * j.hi)
*/
result = mul64u(i.lo, j.lo);
result.hi += i.hi * j.lo + i.lo * j.hi;
return result;
}

View file

@ -1,6 +1,6 @@
.TH INT64 3
.SH NAME
int64, add64, add64u, add64ul, sub64, sub64u, sub64ul, diff64, cvu64, cvul64, cv64u, cv64ul, div64u, rem64u, mul64u, cmp64, cmp64u, cmp64ul, ex64lo, ex64hi, make64 \- 64 bit disk offset computations
int64, add64, add64u, add64ul, sub64, sub64u, sub64ul, diff64, bsr64, cvu64, cvul64, cv64u, cv64ul, div64, div64u, div64u64, rem64, rem64u, mul64, mul64u, cmp64, cmp64u, cmp64ul, ex64lo, ex64hi, make64 \- 64 bit disk offset computations
.SH SYNOPSIS
.ft B
.nf
@ -13,12 +13,17 @@ u64_t sub64(u64_t \fIi\fP, u64_t \fIj\fP)
u64_t sub64u(u64_t \fIi\fP, unsigned \fIj\fP)
u64_t sub64ul(u64_t \fIi\fP, unsigned long \fIj\fP)
unsigned diff64(u64_t \fIi\fP, u64_t \fIj\fP)
int bsr64(u64_t \fIi\fP)
u64_t cvu64(unsigned \fIi\fP)
u64_t cvul64(unsigned long \fIi\fP)
unsigned cv64u(u64_t \fIi\fP)
unsigned long cv64ul(u64_t \fIi\fP)
u64_t div64(u64_t \fIi\fP, u64_t \fIj\fP)
unsigned long div64u(u64_t \fIi\fP, unsigned \fIj\fP)
u64_t div64u64(u64_t \fIi\fP, unsigned \fIj\fP)
u64_t rem64(u64_t \fIi\fP, u64_t \fIj\fP)
unsigned rem64u(u64_t \fIi\fP, unsigned \fIj\fP)
u64_t mul64(u64_t \fIi\fP, u64_t \fIj\fP)
u64_t mul64u(unsigned long \fIi\fP, unsigned \fIj\fP)
int cmp64(u64_t \fIi\fP, u64_t \fIj\fP)
int cmp64u(u64_t \fIi\fP, unsigned \fIj\fP)
@ -95,6 +100,9 @@ from the 64 bit number
.I i
forming an unsigned. Overflow is not checked.
.TP
.B "int bsr64(u64_t \fIi\fP)"
Return the index of the highest-order bit set. If the value is zero, -1 is returned.
.TP
.B "u64_t cvu64(unsigned \fIi\fP)"
Convert an unsigned to a 64 bit number.
.TP
@ -109,6 +117,12 @@ Convert a 64 bit number to an unsigned if it fits, otherwise return
Convert a 64 bit number to an unsigned long if it fits, otherwise return
.BR ULONG_MAX .
.TP
.B "u64_t div64(u64_t \fIi\fP, u64_t \fIj\fP)"
Divide the 64 bit number
.I i
by the 64 bit number
.I j
giving a 64 bit number.
.B "unsigned long div64u(u64_t \fIi\fP, unsigned \fIj\fP)"
Divide the 64 bit number
.I i
@ -116,6 +130,19 @@ by the unsigned
.I j
giving an unsigned long. Overflow is not checked. (Typical "byte offset to
block number" conversion.)
.B "u64_t div64u64(u64_t \fIi\fP, unsigned \fIj\fP)"
Divide the 64 bit number
.I i
by the unsigned
.I j
giving a 64 bit number.
.TP
.B "u64_t rem64(u64_t \fIi\fP, u64_t \fIj\fP)"
Compute the remainder of the division of the 64 bit number
.I i
by the 64 bit number
.I j
as a 64 bit number.
.TP
.B "unsigned rem64u(u64_t \fIi\fP, unsigned \fIj\fP)"
Compute the remainder of the division of the 64 bit number
@ -124,6 +151,13 @@ by the unsigned
.I j
as an unsigned. (Typical "byte offset within a block" computation.)
.TP
.B "u64_t mul64(u64_t \fIi\fP, u64_t \fIj\fP)"
Multiply the 64 bit number
.I i
by the 64 bit number
.I j
giving a 64 bit number.
.TP
.B "u64_t mul64u(unsigned long \fIi\fP, unsigned \fIj\fP)"
Multiply the unsigned long
.I i

View file

@ -12,7 +12,7 @@ OBJ= test1 test2 test3 test4 test5 test6 test7 test8 test9 \
test21 test22 test23 test25 test26 test27 test28 test29 \
test30 test31 test32 test34 test35 test36 test37 test38 \
test39 t10a t11a t11b test40 t40a t40b t40c t40d t40e t40f test41 \
test42 test44 test45 test47 test48 test49 test50 test51 test52
test42 test44 test45 test47 test48 test49 test50 test51 test52 test53
BIGOBJ= test20 test24
ROOTOBJ= test11 test33 test43 test46

View file

@ -14,7 +14,7 @@ badones= # list of tests that failed
tests=" 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 \
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 \
41 42 43 44 45 45-gcc 46 47 48 49 49-gcc 50 \
51 51-gcc 52 52-gcc \
51 51-gcc 52 52-gcc 53 \
sh1.sh sh2.sh"
tests_no=`expr 0`

292
test/test53.c Normal file
View file

@ -0,0 +1,292 @@
#include <assert.h>
#include <minix/u64.h>
#include <setjmp.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <unistd.h>
#define ERR err(__LINE__)
#define MAX_ERROR 4
#define TIMED 0
static volatile int errct;
static volatile expect_SIGFPE;
static u64_t i, j, k;
static jmp_buf jmpbuf_SIGFPE, jmpbuf_main;
static void quit(void)
{
if (errct == 0) {
printf("ok\n");
exit(0);
} else {
printf("%d errors\n", errct);
exit(1);
}
assert(0); /* not reachable */
}
static void err(int line)
{
/* print error information */
printf("error line %d; i=0x%.8x%.8x; j=0x%.8x%.8x; k=0x%.8x%.8x\n",
line,
ex64hi(i), ex64lo(i),
ex64hi(j), ex64lo(j),
ex64hi(k), ex64lo(k));
/* quit after too many errors */
if (errct++ > MAX_ERROR) {
printf("Too many errors; test aborted\n");
quit();
}
}
#define LENGTHOF(arr) (sizeof(arr) / sizeof(arr[0]))
static u64_t getargval(int index, int *done)
{
u32_t values[] = {
/* corner cases */
0,
1,
0x7fffffff,
0x80000000,
0x80000001,
0xffffffff,
/* random values */
0xa9,
0x0d88,
0x242811,
0xeb44d1bc,
0x5b,
0xfb50,
0x569c02,
0xb23c8f7d,
0xc3,
0x2366,
0xfabb73,
0xcb4e8aef,
0xe9,
0xffdc,
0x05842d,
0x3fff902d};
assert(done);
/* values with corner case and random 32-bit components */
if (index < LENGTHOF(values) * LENGTHOF(values))
return make64(values[index / LENGTHOF(values)], values[index % LENGTHOF(values)]);
index -= LENGTHOF(values) * LENGTHOF(values);
/* small numbers */
if (index < 16) return make64(index + 2, 0);
index -= 16;
/* big numbers */
if (index < 16) return make64(-index - 2, -1);
index -= 16;
/* powers of two */
if (index < 14) return make64(1 << (index * 2 + 5), 0);
index -= 14;
if (index < 16) return make64(0, 1 << (index * 2 + 1));
index -= 16;
/* done */
*done = 1;
return make64(0, 0);
}
static void handler_SIGFPE(int signum)
{
assert(signum == SIGFPE);
/* restore the signal handler */
if (signal(SIGFPE, handler_SIGFPE) == SIG_ERR) ERR;
/* division by zero occurred, was this expected? */
if (expect_SIGFPE) {
/* expected: jump back to test */
expect_SIGFPE = 0;
longjmp(jmpbuf_SIGFPE, -1);
} else {
/* not expected: error and jump back to main */
longjmp(jmpbuf_main, -1);
}
/* not reachable */
assert(0);
exit(-1);
}
static void testmul(void)
{
int kdone, kidx;
u32_t ilo = ex64lo(i), jlo = ex64lo(j);
u64_t prod = mul64(i, j);
int prodbits;
/* compute maximum index of highest-order bit */
prodbits = bsr64(i) + bsr64(j) + 1;
if (cmp64u(i, 0) == 0 || cmp64u(j, 0) == 0) prodbits = -1;
if (bsr64(prod) > prodbits) ERR;
/* compare to 32-bit multiplication if possible */
if (ex64hi(i) == 0 && ex64hi(j) == 0) {
if (cmp64(prod, mul64u(ilo, jlo)) != 0) ERR;
/* if there is no overflow we can check against pure 32-bit */
if (prodbits < 32 && cmp64u(prod, ilo * jlo) != 0) ERR;
}
/* in 32-bit arith low-order DWORD matches regardless of overflow */
if (ex64lo(prod) != ilo * jlo) ERR;
/* multiplication by zero yields zero */
if (prodbits < 0 && cmp64u(prod, 0) != 0) ERR;
/* if there is no overflow, check absence of zero divisors */
if (prodbits >= 0 && prodbits < 64 && cmp64u(prod, 0) == 0) ERR;
/* commutativity */
if (cmp64(prod, mul64(j, i)) != 0) ERR;
/* loop though all argument value combinations for third argument */
for (kdone = 0, kidx = 0; k = getargval(kidx, &kdone), !kdone; kidx++) {
/* associativity */
if (cmp64(mul64(mul64(i, j), k), mul64(i, mul64(j, k))) != 0) ERR;
/* left and right distributivity */
if (cmp64(mul64(add64(i, j), k), add64(mul64(i, k), mul64(j, k))) != 0) ERR;
if (cmp64(mul64(i, add64(j, k)), add64(mul64(i, j), mul64(i, k))) != 0) ERR;
}
}
static void testdiv0(void)
{
int funcidx;
assert(cmp64u(j, 0) == 0);
/* loop through the 5 different division functions */
for (funcidx = 0; funcidx < 5; funcidx++) {
expect_SIGFPE = 1;
if (setjmp(jmpbuf_SIGFPE) == 0) {
/* divide by zero using various functions */
switch (funcidx) {
case 0: div64(i, j); ERR; break;
case 1: div64u64(i, ex64lo(j)); ERR; break;
case 2: div64u(i, ex64lo(j)); ERR; break;
case 3: rem64(i, j); ERR; break;
case 4: rem64u(i, ex64lo(j)); ERR; break;
default: assert(0); ERR; break;
}
/* if we reach this point there was no signal and an
* error has been recorded
*/
expect_SIGFPE = 0;
} else {
/* a signal has been received and expect_SIGFPE has
* been reset; all is ok now
*/
assert(!expect_SIGFPE);
}
}
}
static void testdiv(void)
{
u64_t q, r;
#if TIMED
struct timeval tvstart, tvend;
printf("i=0x%.8x%.8x; j=0x%.8x%.8x\n",
ex64hi(i), ex64lo(i),
ex64hi(j), ex64lo(j));
fflush(stdout);
if (gettimeofday(&tvstart, NULL) < 0) ERR;
#endif
/* division by zero has a separate test */
if (cmp64u(j, 0) == 0) {
testdiv0();
return;
}
/* perform division, store q in k to make ERR more informative */
q = div64(i, j);
r = rem64(i, j);
k = q;
#if TIMED
if (gettimeofday(&tvend, NULL) < 0) ERR;
tvend.tv_sec -= tvstart.tv_sec;
tvend.tv_usec -= tvstart.tv_usec;
if (tvend.tv_usec < 0) {
tvend.tv_sec -= 1;
tvend.tv_usec += 1000000;
}
printf("q=0x%.8x%.8x; r=0x%.8x%.8x; time=%d.%.6d\n",
ex64hi(q), ex64lo(q),
ex64hi(r), ex64lo(r),
tvend.tv_sec, tvend.tv_usec);
fflush(stdout);
#endif
/* compare to 64/32-bit division if possible */
if (!ex64hi(j)) {
if (cmp64(q, div64u64(i, ex64lo(j))) != 0) ERR;
if (!ex64hi(q)) {
if (cmp64u(q, div64u(i, ex64lo(j))) != 0) ERR;
}
if (cmp64u(r, rem64u(i, ex64lo(j))) != 0) ERR;
/* compare to 32-bit division if possible */
if (!ex64hi(i)) {
if (cmp64u(q, ex64lo(i) / ex64lo(j)) != 0) ERR;
if (cmp64u(r, ex64lo(i) % ex64lo(j)) != 0) ERR;
}
}
/* check results using i = q j + r and r < j */
if (cmp64(i, add64(mul64(q, j), r)) != 0) ERR;
if (cmp64(r, j) >= 0) ERR;
}
static void test(void)
{
int idone, jdone, iidx, jidx;
/* loop though all argument value combinations */
for (idone = 0, iidx = 0; i = getargval(iidx, &idone), !idone; iidx++)
for (jdone = 0, jidx = 0; j = getargval(jidx, &jdone), !jdone; jidx++) {
testmul();
testdiv();
}
}
int main(void)
{
printf("Test 53 ");
/* set up signal handler to deal with div by zero */
if (setjmp(jmpbuf_main) == 0) {
if (signal(SIGFPE, handler_SIGFPE) == SIG_ERR) ERR;
/* perform tests */
test();
} else {
/* an unexpected SIGFPE has occurred */
ERR;
}
/* this was all */
quit();
assert(0); /* not reachable */
return -1;
}